<a href="https://colab.research.google.com/github/Nukaraju2003/Python_practice/blob/master/Eng_Audio_to_telu_text_and_audio.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install git+https://github.com/openai/whisper.git

Collecting git+https://github.com/openai/whisper.git
  Cloning https://github.com/openai/whisper.git to /tmp/pip-req-build-iz30qnce
  Running command git clone --filter=blob:none --quiet https://github.com/openai/whisper.git /tmp/pip-req-build-iz30qnce
  Resolved https://github.com/openai/whisper.git to commit 90db0de1896c23cbfaf0c58bc2d30665f709f170
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone


In [2]:
!pip install transformers



In [3]:
# Import required libraries
import whisper
import warnings
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, MarianMTModel, MarianTokenizer
warnings.filterwarnings('ignore')

In [4]:
# Load the Whisper model (choose 'small', 'medium', or 'large' for better accuracy)
model = whisper.load_model("small")

In [5]:
# Translation models and tokenizers setup
nllb_model_name = "facebook/nllb-200-distilled-1.3B"
nllb_tokenizer = AutoTokenizer.from_pretrained(nllb_model_name)
nllb_model = AutoModelForSeq2SeqLM.from_pretrained(nllb_model_name)

In [6]:
# Helper function to split long text into chunks
def split_text_into_chunks(text, max_tokens, tokenizer):
    tokens = tokenizer.encode(text)
    chunks = []
    for i in range(0, len(tokens), max_tokens):
        chunk_tokens = tokens[i:i + max_tokens]
        chunks.append(tokenizer.decode(chunk_tokens, skip_special_tokens=True))
    return chunks

In [7]:
# Helper function to translate chunks of text
def translate_chunks(chunks, translation_function):
    translated_chunks = [translation_function(chunk) for chunk in chunks]
    return " ".join(translated_chunks)

In [8]:
# Wrapper function for large text translation
def translate_large_text(text, translation_function, tokenizer, max_tokens=512):
    chunks = split_text_into_chunks(text, max_tokens, tokenizer)
    translated_text = translate_chunks(chunks, translation_function)
    return translated_text

In [9]:
def english_to_telugu(text):
    src_lang = "eng_Latn"
    tgt_lang = "tel_Telu"
    nllb_tokenizer.src_lang = src_lang
    inputs = nllb_tokenizer(text, return_tensors="pt", padding=True)
    outputs = nllb_model.generate(**inputs, forced_bos_token_id=nllb_tokenizer.convert_tokens_to_ids(tgt_lang))
    return nllb_tokenizer.decode(outputs[0], skip_special_tokens=True)

In [10]:
!pip install accelerate gTTS



In [32]:
# !pip install gtts
import gtts
from gtts import gTTS

def text_to_speech(text, lang, filename_prefix="audio"):
    """Converts text to speech using gTTS and saves it as an MP3 file.

    Args:
        text (str): Text to convert to speech.
        lang (str): Language code (e.g., 'en', 'te').

    Returns:
        str: Filename of the generated audio file.
    """
    # gTTS expects language codes like 'en', 'te', etc.
    # Mapping 'tel' to 'te' for Telugu
    lang_mapping = {
        'tel': 'te'
    }
    lang = lang_mapping.get(lang, lang) # Get mapped value or keep original if not found

    tts = gTTS(text=text, lang=lang)
    filename = f"/content/drive/MyDrive/My Projects/Trizen/CBPT1SGA/{filename_prefix}_audio.mp3"
    tts.save(filename)
    return filename

In [30]:
def transcribe_and_translate(audio_path, model, translate_large_text, tokenizer, target_lang_code='tel_Telu'):
    """Transcribes an audio file and translates the text to a target language.

    Args:
        audio_path (str): Path to the audio file.
        model (whisper.Whisper): The Whisper model to use for transcription.
        translate_large_text (function): The function to use for large text translation.
        tokenizer (transformers.PreTrainedTokenizer): The tokenizer to use for translation.
        target_lang_code (str, optional): The target language code (e.g., 'tel_Telu'). Defaults to 'tel_Telu'.
    """
    # Step 1: Transcribe the audio file
    print("Transcribing audio...")
    result = model.transcribe(audio_path)
    transcribed_text = result["text"]
    print("Transcription Complete!")
    print("Transcribed Text:", transcribed_text)

    # Step 2: Translate the text
    translated_text = translate_large_text(transcribed_text, english_to_telugu, tokenizer)
    print("Translated Text:")
    print(translated_text)

    # Step 3: Perform Text-to-Speech for English
    english_audio_filename = text_to_speech(transcribed_text, 'en', filename_prefix="english")
    print("English audio saved to:", english_audio_filename)

    # Step 4: Perform Text-to-Speech for Telugu
    telugu_audio_filename = text_to_speech(translated_text, target_lang_code[:3], filename_prefix="telugu")
    print("Telugu audio saved to:", telugu_audio_filename)

    return translated_text, english_audio_filename, telugu_audio_filename

In [13]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [14]:
# Specify the audio file path
audio_path = "/content/drive/MyDrive/My Projects/Trizen/CBPT1SGA/audio.wav"  # Replace with your file path

In [15]:
print(audio_path)

/content/drive/MyDrive/My Projects/Trizen/CBPT1SGA/audio.wav


In [33]:
# Run the pipeline
translated_text, english_audio_filename, telugu_audio_filename = transcribe_and_translate(audio_path, model, translate_large_text, nllb_tokenizer)

Transcribing audio...
Transcription Complete!
Transcribed Text:  Today I'm going to teach you one expression, just one, because I want you to memorize it and start using it every time you speak English. This expression is extremely useful and native speakers use it all the time. Every day we come across something that is very easy or obvious to do or understand. You don't even need to think about it. It's obvious that water is wet, then the sun is hot. And it's obvious that a cat will always land on its feet. Try your own risk. So when you don't have to consider something for a long time, it's just because it's a no-brainer. It's a no-brainer that you will improve your English if you follow me here.
Translated Text:
ఈ రోజు నేను మీకు ఒక వ్యక్తీకరణ నేర్పబోతున్నాను, ఒక్కటి మాత్రమే, ఎందుకంటే మీరు ఆంగ్లంలో మాట్లాడే ప్రతిసారీ దాన్ని గుర్తుంచుకోవాలని మరియు ఉపయోగించడం ప్రారంభించాలని నేను కోరుకుంటున్నాను. ఈ వ్యక్తీకరణ చాలా ఉపయోగకరంగా ఉంటుంది మరియు స్థానిక మాట్లాడేవారు దీన్ని అన్ని సమయాలలో ఉపయోగ