<a href="https://colab.research.google.com/github/Chow125/ml-projects/blob/main/speech_to_text_translator_using_hugging_face_models.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
 pip install transformers librosa torch



In [19]:
from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq, AutoTokenizer, AutoModelForSeq2SeqLM
import librosa
import torch

def preprocess_audio(audio_path):
    audio, sample_rate = librosa.load(audio_path, sr=16000)
    return torch.tensor(audio), sample_rate

def speech_to_text(audio_path):
    print("Converting speech to text...")
    processor = AutoProcessor.from_pretrained("openai/whisper-base")
    model = AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-base")

    audio, sample_rate = preprocess_audio(audio_path)
    inputs = processor(audio, sampling_rate=sample_rate, return_tensors="pt")

    with torch.no_grad():
        generated_ids = model.generate(inputs["input_features"])

    transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)
    return transcription[0]

def translate_text_hindi(text, target_language="hindi"):
    print("Translating text to", target_language, "...")
    tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-hi")
    model = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-en-hi")

    inputs = tokenizer.encode(text, return_tensors="pt", padding=True)
    outputs = model.generate(inputs)
    translation = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return translation

def translate_text_french(text, target_language="french"):
    print("Translating text to", target_language, "...")
    tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-fr")
    model = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-en-fr")

    inputs = tokenizer.encode(text, return_tensors="pt", padding=True)
    outputs = model.generate(inputs)
    translation = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return translation

def translate_text_spanich(text, target_language="spanich"):
    print("Translating text to", target_language, "...")
    tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-es")
    model = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-en-es")

    inputs = tokenizer.encode(text, return_tensors="pt", padding=True)
    outputs = model.generate(inputs)
    translation = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return translation

def translate_text_japanese(text, target_language="japanese"):
    print("Translating text to", target_language, "...")
    tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-jap")
    model = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-en-jap")

    inputs = tokenizer.encode(text, return_tensors="pt", padding=True)
    outputs = model.generate(inputs)
    translation = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return translation



if __name__ == "__main__":
    audio_file_path = "village dialogue.wav"
    text = speech_to_text(audio_file_path)
    lang = input("Enter the language:")
    if lang == "hindi":
        target_lang = "hindi"
        translated_text = translate_text_hindi(text, target_lang)
        print("\nTranscription:", text)
        print("\nTranslated text:", translated_text)

    elif lang == "spanich":
        target_lang = "spanich"
        translated_text = translate_text_spanich(text, target_lang)
        print("\nTranscription:", text)
        print("\nTranslated text:", translated_text)

    elif lang == "japanese":
        target_lang = "japanese"
        translated_text = translate_text_tamil(text, target_lang)
        print("\nTranscription:", text)
        print("\nTranslated text:", translated_text)

    else:
        target_lang = "french"
        translated_text = translate_text_french(text, target_lang)
        print("\nTranscription:", text)
        print("\nTranslated text:", translated_text)

Converting speech to text...
Enter the language:hindi
Translating text to hindi ...

Transcription:  I've never been out of the village before. I've never been out of the village before. But this time, I'm truly free.

Translated text: मैं पहले गांव से बाहर कभी नहीं किया गया है. मैं गांव से पहले कभी नहीं किया गया है. लेकिन इस बार, मैं वास्तव में स्वतंत्र हूँ.
