In [1]:
pip install transformers

Collecting transformers
  Downloading transformers-4.43.4-py3-none-any.whl.metadata (43 kB)
     ---------------------------------------- 0.0/43.7 kB ? eta -:--:--
     ----------------- -------------------- 20.5/43.7 kB 640.0 kB/s eta 0:00:01
     -------------------------- ----------- 30.7/43.7 kB 435.7 kB/s eta 0:00:01
     -------------------------- ----------- 30.7/43.7 kB 435.7 kB/s eta 0:00:01
     -------------------------- ----------- 30.7/43.7 kB 435.7 kB/s eta 0:00:01
     -------------------------------------- 43.7/43.7 kB 142.8 kB/s eta 0:00:00
Collecting filelock (from transformers)
  Downloading filelock-3.15.4-py3-none-any.whl.metadata (2.9 kB)
Collecting huggingface-hub<1.0,>=0.23.2 (from transformers)
  Downloading huggingface_hub-0.24.5-py3-none-any.whl.metadata (13 kB)
Collecting regex!=2019.12.17 (from transformers)
  Downloading regex-2024.7.24-cp38-cp38-win_amd64.whl.metadata (41 kB)
     ---------------------------------------- 0.0/41.5 kB ? eta -:--:--
     ---


[notice] A new release of pip is available: 24.1.2 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
pip install gtts

In [3]:
from transformers import MBartForConditionalGeneration, AutoModelForSeq2SeqLM
from transformers import AlbertTokenizer, AutoTokenizer
from gtts import gTTS
import os

# Initialize tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("ai4bharat/IndicBART", do_lower_case=False, use_fast=False, keep_accents=True)
model = AutoModelForSeq2SeqLM.from_pretrained("ai4bharat/IndicBART")

# Some initial mapping
bos_id = tokenizer._convert_token_to_id_with_added_voc("<s>")
eos_id = tokenizer._convert_token_to_id_with_added_voc("</s>")
pad_id = tokenizer._convert_token_to_id_with_added_voc("<pad>")
lang_ids = {'en': tokenizer._convert_token_to_id_with_added_voc("<2en>"),
            'hi': tokenizer._convert_token_to_id_with_added_voc("<2hi>")}

def tts_generate(text, lang, filename):
    tts = gTTS(text=text, lang=lang)
    tts.save(filename)

def process_and_speak(input_text, target_lang):
    # Tokenize input and output
    inp = tokenizer(f"{input_text} </s> <2{target_lang}>", add_special_tokens=False, return_tensors="pt", padding=True).input_ids
    target_lang_id = lang_ids.get(target_lang, tokenizer._convert_token_to_id_with_added_voc("<2en>")) # default to English
    
    # Generate output
    model_output = model.generate(
        inp, use_cache=True, num_beams=4, max_length=20, min_length=1, early_stopping=True, 
        pad_token_id=pad_id, bos_token_id=bos_id, eos_token_id=eos_id, 
        decoder_start_token_id=target_lang_id
    )
    
    # Decode and print output
    decoded_output = tokenizer.decode(model_output[0], skip_special_tokens=True, clean_up_tokenization_spaces=False)
    print(decoded_output)
    
    # Generate TTS only for English and Hindi
    if target_lang in ['en', 'hi']:
        lang_code = 'en' if target_lang == 'en' else 'hi'
        filename = f"{target_lang}_output.mp3"
        tts_generate(decoded_output, lang_code, filename)
        print(f"TTS audio saved as {filename}")

# Example usage
process_and_speak("I am a boy", "en")
process_and_speak("मैं  एक लड़का हूँ", "hi")
process_and_speak("मला [MASK] पाहिजे </s> <2mr>", "mr")


tokenizer_config.json:   0%|          | 0.00/498 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


config.json:   0%|          | 0.00/832 [00:00<?, ?B/s]

ImportError: 
AlbertTokenizer requires the SentencePiece library but it was not found in your environment. Checkout the instructions on the
installation page of its repo: https://github.com/google/sentencepiece#installation and follow the ones
that match your environment. Please note that you may need to restart your runtime after installation.


In [7]:
from transformers import BartTokenizer, BartForConditionalGeneration
from gtts import gTTS
import torch

# Initialize tokenizer and model
tokenizer = BartTokenizer.from_pretrained("facebook/bart-base")
model = BartForConditionalGeneration.from_pretrained("facebook/bart-base")

# Some initial mapping
bos_id = tokenizer.convert_tokens_to_ids("<s>")
eos_id = tokenizer.convert_tokens_to_ids("</s>")
pad_id = tokenizer.convert_tokens_to_ids("<pad>")

def tts_generate(text, lang, filename):
    tts = gTTS(text=text, lang=lang)
    tts.save(filename)

def process_and_speak(input_text, target_lang):
    # Tokenize input
    inp = tokenizer(input_text, return_tensors="pt", padding=True)
    
    # Generate output
    model_output = model.generate(
        inp['input_ids'], 
        use_cache=True, 
        num_beams=4, 
        max_length=20, 
        min_length=1, 
        early_stopping=True, 
        pad_token_id=pad_id, 
        bos_token_id=bos_id, 
        eos_token_id=eos_id
    )
    
    # Decode and print output
    decoded_output = tokenizer.decode(model_output[0], skip_special_tokens=True)
    print(decoded_output)
    
    # Generate TTS only for English and Hindi
    if target_lang in ['en', 'hi']:
        lang_code = 'en' if target_lang == 'en' else 'hi'
        filename = f"{target_lang}_output.mp3"
        tts_generate(decoded_output, lang_code, filename)
        print(f"TTS audio saved as {filename}")

# Example usage
process_and_speak("I am a boy", "en")
process_and_speak("मैं  एक लड़का हूँ", "hi")


vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.72k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/558M [00:00<?, ?B/s]

KeyboardInterrupt: 

In [5]:
import sentencepiece
print(sentencepiece.__version__)


0.2.0


In [14]:
from transformers import BartTokenizer, BartForConditionalGeneration
from gtts import gTTS
import torch

# Initialize tokenizer and model
tokenizer = BartTokenizer.from_pretrained("facebook/bart-base")
model = BartForConditionalGeneration.from_pretrained("facebook/bart-base")

# Some initial mapping
bos_id = tokenizer.convert_tokens_to_ids("<s>")
eos_id = tokenizer.convert_tokens_to_ids("</s>")
pad_id = tokenizer.convert_tokens_to_ids("<pad>")

def tts_generate(text, lang, filename):
    tts = gTTS(text=text, lang=lang)
    tts.save(filename)

def process_and_speak(input_text, target_lang):
    # Tokenize input
    inp = tokenizer(input_text, return_tensors="pt", padding=True)
    
    # Generate output
    model_output = model.generate(
        inp['input_ids'], 
        use_cache=True, 
        num_beams=4, 
        max_length=20, 
        min_length=1, 
        early_stopping=True, 
        pad_token_id=pad_id, 
        bos_token_id=bos_id, 
        eos_token_id=eos_id
    )
    
    # Decode and print output
    decoded_output = tokenizer.decode(model_output[0], skip_special_tokens=True)
    print("Generated Output:", decoded_output)
    
    # Generate TTS only for English and Hindi
    if target_lang in ['en', 'hi']:
        lang_code = 'en' if target_lang == 'en' else 'hi'
        filename = f"{target_lang}_output.mp3"
        tts_generate(decoded_output, lang_code, filename)
        print(f"TTS audio saved as {filename}")

# User input
input_text = input("Enter the text to process: ")
target_lang = input("Enter the target language (en/hi): ").strip().lower()

# Process and speak
process_and_speak(input_text, target_lang)


Enter the text to process:  मेरा नाम है
Enter the target language (en/hi):  en


Generated Output: मेरा नाम है
TTS audio saved as en_output.mp3


In [2]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from gtts import gTTS
import os

# Initialize tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("ai4bharat/IndicBART", do_lower_case=False, use_fast=False, keep_accents=True)
model = AutoModelForSeq2SeqLM.from_pretrained("ai4bharat/IndicBART")

# Some initial mapping
bos_id = tokenizer._convert_token_to_id_with_added_voc("<s>")
eos_id = tokenizer._convert_token_to_id_with_added_voc("</s>")
pad_id = tokenizer._convert_token_to_id_with_added_voc("<pad>")
lang_ids = {'en': tokenizer._convert_token_to_id_with_added_voc("<2en>"),
            'hi': tokenizer._convert_token_to_id_with_added_voc("<2hi>")}

def tts_generate(text, lang, filename):
    tts = gTTS(text=text, lang=lang)
    tts.save(filename)

def process_and_speak(input_text, target_lang):
    if target_lang not in ['en', 'hi']:
        raise ValueError("Target language must be 'en' or 'hi'")
    
    # Tokenize input and output
    inp = tokenizer(f"{input_text} </s> <2{target_lang}>", add_special_tokens=False, return_tensors="pt", padding=True).input_ids
    target_lang_id = lang_ids.get(target_lang, tokenizer._convert_token_to_id_with_added_voc("<2en>"))  # default to English
    
    # Generate output
    model_output = model.generate(
        inp, use_cache=True, num_beams=4, max_length=20, min_length=1, early_stopping=True, 
        pad_token_id=pad_id, bos_token_id=bos_id, eos_token_id=eos_id, 
        decoder_start_token_id=target_lang_id
    )
    
    # Decode and print output
    decoded_output = tokenizer.decode(model_output[0], skip_special_tokens=True, clean_up_tokenization_spaces=False)
    print("Generated Output:", decoded_output)
    
    # Generate TTS only for English and Hindi
    if target_lang in ['en', 'hi']:
        lang_code = 'en' if target_lang == 'en' else 'hi'
        filename = f"{target_lang}_output.mp3"
        tts_generate(decoded_output, lang_code, filename)
        print(f"TTS audio saved as {filename}")

# User input
input_text = input("Enter the text to process (in English): ")
target_lang = input("Enter the target language (en/hi): ").strip().lower()

# Validate language input
if target_lang not in ['en', 'hi']:
    print("Invalid target language. Please choose 'en' for English or 'hi' for Hindi.")
else:
    # Process and speak
    process_and_speak(input_text, target_lang)


Enter the text to process (in English):  how are you
Enter the target language (en/hi):  hi


Generated Output: how are you
TTS audio saved as hi_output.mp3


In [7]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from gtts import gTTS
import os

# Initialize tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("ai4bharat/IndicBART", do_lower_case=False, use_fast=False, keep_accents=True)
model = AutoModelForSeq2SeqLM.from_pretrained("ai4bharat/IndicBART")

# Some initial mapping
bos_id = tokenizer.convert_tokens_to_ids("<s>")
eos_id = tokenizer.convert_tokens_to_ids("</s>")
pad_id = tokenizer.convert_tokens_to_ids("<pad>")
lang_ids = {'hi': tokenizer.convert_tokens_to_ids("<2hi>")}  # Hindi language ID

def tts_generate(text, lang, filename):
    tts = gTTS(text=text, lang=lang)
    tts.save(filename)

def process_and_speak(input_text, target_lang):
    if target_lang != 'hi':
        raise ValueError("Currently, only translation to Hindi is supported.")

    # Tokenize input for Hindi translation
    inp = tokenizer(f"{input_text} </s> <2hi>", add_special_tokens=False, return_tensors="pt", padding=True).input_ids
    target_lang_id = lang_ids['hi']  # Hindi language ID
    
    # Generate output
    model_output = model.generate(
        inp, use_cache=True, num_beams=4, max_length=50, min_length=1, early_stopping=True, 
        pad_token_id=pad_id, bos_token_id=bos_id, eos_token_id=eos_id, 
        decoder_start_token_id=target_lang_id
    )
    
    # Decode and print output
    decoded_output = tokenizer.decode(model_output[0], skip_special_tokens=True, clean_up_tokenization_spaces=False)
    print("Generated Output (Hindi):", decoded_output)
    
    # Generate TTS only for Hindi
    if decoded_output:
        lang_code = 'hi'
        filename = "hi_output.mp3"
        tts_generate(decoded_output, lang_code, filename)
        print(f"TTS audio saved as {filename}")
    else:
        print("No translation was generated.")

# User input
input_text = input("Enter the text to translate (in English): ")
target_lang = input("Enter the target language (hi for Hindi): ").strip().lower()

# Validate language input
if target_lang != 'hi':
    print("Currently, only translation to Hindi is supported. Please choose 'hi' for Hindi.")
else:
    # Process and speak
    process_and_speak(input_text, target_lang)


Enter the text to translate (in English):  how are you
Enter the target language (hi for Hindi):  hi


Generated Output (Hindi): how are you
TTS audio saved as hi_output.mp3


In [9]:
from googletrans import Translator

def translate_text(text, target_lang='hi'):
    translator = Translator()
    translation = translator.translate(text, dest=target_lang)
    return translation.text

text = "Hello, how are you?"
translated_text = translate_text(text, target_lang='hi')
print(translated_text)


नमस्ते, आप कैसे हैं?


In [10]:
from googletrans import Translator
from gtts import gTTS

def translate_text(text, target_lang='hi'):
    translator = Translator()
    translation = translator.translate(text, dest=target_lang)
    return translation.text

def tts_generate(text, lang, filename):
    tts = gTTS(text=text, lang=lang)
    tts.save(filename)

input_text = "Hello, how are you?"
translated_text = translate_text(input_text, target_lang='hi')
print("Translated Text:", translated_text)

tts_generate(translated_text, lang='hi', filename='hi_output.mp3')
print("TTS audio saved as hi_output.mp3")


Translated Text: नमस्ते, आप कैसे हैं?
TTS audio saved as hi_output.mp3


In [13]:
from googletrans import Translator
from gtts import gTTS

def translate_text(text, target_lang='hi'):
    translator = Translator()
    translation = translator.translate(text, dest=target_lang)
    return translation.text

def tts_generate(text, lang, filename):
    tts = gTTS(text=text, lang=lang)
    tts.save(filename)

# User input
input_text = input("Enter the text to translate (in English): ")
target_lang = input("Enter the target language (e.g., 'hi' for Hindi): ").strip().lower()

# Validate language input
if target_lang not in ['hi', 'en']:
    print("Unsupported target language. Currently, only 'hi' for Hindi and 'en' for English are supported.")
else:
    # Translate the text
    translated_text = translate_text(input_text, target_lang=target_lang)
    print("Translated Text:", translated_text)

    # Generate TTS
    tts_generate(translated_text, lang=target_lang, filename=f"{target_lang}_output.mp3")
    print(f"TTS audio saved as {target_lang}_output.mp3")


Enter the text to translate (in English):  how are you
Enter the target language (e.g., 'hi' for Hindi):  hi


Translated Text: आप कैसे हैं
TTS audio saved as hi_output.mp3


In [18]:
from googletrans import Translator
from gtts import gTTS

def translate_text(text, target_lang='hi'):
    translator = Translator()
    translation = translator.translate(text, dest=target_lang)
    return translation.text

def tts_generate(text, lang, filename):
    tts = gTTS(text=text, lang=lang)
    tts.save(filename)

# User input
input_text = input("Enter the text to translate (in English): ")
target_lang = input("Enter the target language (e.g., 'hi' for Hindi, 'or' for Odia, 'en' for English): ").strip().lower()

# Validate language input
if target_lang not in ['hi', 'or', 'en']:
    print("Unsupported target language. Currently, only 'hi' for Hindi, 'or' for Odia, and 'en' for English are supported.")
else:
    # Translate the text
    translated_text = translate_text(input_text, target_lang=target_lang)
    print("Translated Text:", translated_text)

    # Generate TTS
    tts_generate(translated_text, lang=target_lang, filename=f"{target_lang}_output.mp3")
    print(f"TTS audio saved as {target_lang}_output.mp3")


Enter the text to translate (in English):  how are you
Enter the target language (e.g., 'hi' for Hindi, 'or' for Odia, 'en' for English):  or


Translated Text: କେମିତି ଅଛନ୍ତି, କେମିତି ଅଛ


ValueError: Language not supported: or

In [21]:
from googletrans import Translator
from gtts import gTTS

def translate_text(text, target_lang='hi'):
    translator = Translator()
    translation = translator.translate(text, dest=target_lang)
    return translation.text

def tts_generate(text, lang, filename):
    try:
        tts = gTTS(text=text, lang=lang)
        tts.save(filename)
        print(f"TTS audio saved as {filename}")
    except ValueError as e:
        print(f"Error in TTS generation: {e}")

# User input
input_text = input("Enter the text to translate (in English): ")
target_lang = input("Enter the target language (e.g., 'hi' for Hindi, 'ta' for Tamil, 'bn' for Bengali, 'en' for English): ").strip().lower()

# Validate language input
supported_langs = {
    'hi': 'Hindi',
    'ta': 'Tamil',
    'bn': 'Bengali',
    'en': 'English'
}

if target_lang not in supported_langs:
    print(f"Unsupported target language. Currently, supported languages are: {', '.join(supported_langs.keys())}.")
else:
    # Translate the text
    translated_text = translate_text(input_text, target_lang=target_lang)
    print("Translated Text:", translated_text)

    # Generate TTS
    tts_generate(translated_text, lang=target_lang, filename=f"{target_lang}_output.mp3")


Enter the text to translate (in English):  how are you
Enter the target language (e.g., 'hi' for Hindi, 'ta' for Tamil, 'bn' for Bengali, 'en' for English):  bn


Translated Text: আপনি কেমন আছেন
TTS audio saved as bn_output.mp3


In [1]:
from googletrans import Translator
from gtts import gTTS

def translate_text(text, target_lang='hi'):
    translator = Translator()
    translation = translator.translate(text, dest=target_lang)
    return translation.text

def tts_generate(text, lang, filename):
    try:
        tts = gTTS(text=text, lang=lang)
        tts.save(filename)
        print(f"TTS audio saved as {filename}")
    except ValueError as e:
        print(f"Error in TTS generation: {e}")

# User input
input_text = input("Enter the text to translate (in English): ")
target_lang = input("Enter the target language (e.g., 'hi' for Hindi, 'ta' for Tamil, 'bn' for Bengali, 'mr' for Marathi, 'or' for Odia, 'gu' for Gujarati, 'te' for Telugu, 'en' for English): ").strip().lower()

# Validate language input
supported_langs = {
    'hi': 'Hindi',
    'ta': 'Tamil',
    'bn': 'Bengali',
    'mr': 'Marathi',
    'or': 'Odia',
    'gu': 'Gujarati',
    'te': 'Telugu',
    'en': 'English'
}

if target_lang not in supported_langs:
    print(f"Unsupported target language. Currently, supported languages are: {', '.join(supported_langs.keys())}.")
else:
    # Translate the text
    translated_text = translate_text(input_text, target_lang=target_lang)
    print("Translated Text:", translated_text)

    # Generate TTS
    tts_generate(translated_text, lang=target_lang, filename=f"{target_lang}_output.wav")


Enter the text to translate (in English):  hello my name is arun
Enter the target language (e.g., 'hi' for Hindi, 'ta' for Tamil, 'bn' for Bengali, 'mr' for Marathi, 'or' for Odia, 'gu' for Gujarati, 'te' for Telugu, 'en' for English):  ta


Translated Text: வணக்கம் என் பெயர் அருண்
TTS audio saved as ta_output.wav
