# Language Translation with Transformer

#### Hemant Thapa

` !pip install transformers gtts SpeechRecognition googletrans==4.0.0-rc1`

In [1]:
#importing libraries
import os
import speech_recognition as sr

In [2]:
from transformers import MarianMTModel, MarianTokenizer
from gtts import gTTS
from googletrans import Translator, LANGUAGES

#### Google Translater 

#### Types of Languages in google translater module 

In [3]:
print("Available languages:")
for code, language in LANGUAGES.items():
    print(f"{code}: {language}")

Available languages:
af: afrikaans
sq: albanian
am: amharic
ar: arabic
hy: armenian
az: azerbaijani
eu: basque
be: belarusian
bn: bengali
bs: bosnian
bg: bulgarian
ca: catalan
ceb: cebuano
ny: chichewa
zh-cn: chinese (simplified)
zh-tw: chinese (traditional)
co: corsican
hr: croatian
cs: czech
da: danish
nl: dutch
en: english
eo: esperanto
et: estonian
tl: filipino
fi: finnish
fr: french
fy: frisian
gl: galician
ka: georgian
de: german
el: greek
gu: gujarati
ht: haitian creole
ha: hausa
haw: hawaiian
iw: hebrew
he: hebrew
hi: hindi
hmn: hmong
hu: hungarian
is: icelandic
ig: igbo
id: indonesian
ga: irish
it: italian
ja: japanese
jw: javanese
kn: kannada
kk: kazakh
km: khmer
ko: korean
ku: kurdish (kurmanji)
ky: kyrgyz
lo: lao
la: latin
lv: latvian
lt: lithuanian
lb: luxembourgish
mk: macedonian
mg: malagasy
ms: malay
ml: malayalam
mt: maltese
mi: maori
mr: marathi
mn: mongolian
my: myanmar (burmese)
ne: nepali
no: norwegian
or: odia
ps: pashto
fa: persian
pl: polish
pt: portuguese
pa: p

#### Language code search according to Langauge

In [4]:
# function to get code for desire language
def get_code_from_language(language_name):
    language_name = language_name.lower()
    for code, name in LANGUAGES.items():
        if name.lower() == language_name:
            return code
    return 'Unknown'

language_name = input("Enter the language name: ")
language_code = get_code_from_language(language_name)
print(f"The language code corresponding to '{language_name}' is: {language_code}")


Enter the language name:  Hindi


The language code corresponding to 'Hindi' is: hi


In [5]:
def translate_text(input_text, source_lang, target_lang):
    try:
        translator = Translator()
        translated_text = translator.translate(input_text, src=source_lang, dest=target_lang)
        return translated_text.text
    except Exception as e:
        print("Translation error:", e)
        return None

def translate(input_text, source_lang, target_lang):
    translated_text = translate_text(input_text, source_lang, target_lang)
    print(translated_text)

In [6]:
input_text = "Hello, how are you?, My name is Harry"
source_lang = 'en'
target_lang = 'iw'

translate(input_text, source_lang, target_lang)

שלום, מה שלומך?, שמי הארי


In [7]:
input_text = "Hello, how are you?"
source_lang = 'en'
target_lang = 'ar'

translate(input_text, source_lang, target_lang)

مرحبا، كيف حالك؟


In [8]:
input_text = "Hello, how are you?"
source_lang = 'en'
target_lang = 'hi'

translate(input_text, source_lang, target_lang)

नमस्ते, आप कैसे हैं?


#### Text into Speech with specific Language 

In [9]:
def text_to_audio(input_text, source_lang, target_lang):
    try:
        translator = Translator()
        translated_text = translator.translate(input_text, src=source_lang, dest=target_lang)
        return translated_text.text
    except Exception as e:
        print("Translation error:", e)
        return None

def translate(input_text, source_lang, target_lang):
    translated_text = translate_text(input_text, source_lang, target_lang)
    print(translated_text)
    tts = gTTS(translated_text, lang=target_lang)
    tts.save("translated_audio.mp3")
    os.system("start translated_audio.mp3")

input_text = "Hello, how are you?"
source_lang = "en"  
target_lang = "ar"  
translate(input_text, source_lang, target_lang)

مرحبا، كيف حالك؟


#### English Audio as input and Hindi Audi as output 

In [10]:
def speech_to_audio_conversion(input_text, source_lang, target_lang):
    try:
        translator = Translator()
        translated_text = translator.translate(input_text, src=source_lang, dest=target_lang)
        return translated_text.text
    except Exception as e:
        print("Translation error:", e)
        return None

def translate(input_text, source_lang, target_lang):
    translated_text = speech_to_audio_conversion(input_text, source_lang, target_lang)
    print("Translated text:", translated_text)
    tts = gTTS(translated_text, lang=target_lang)
    tts.save("translated_audio.mp3")
    os.system("start translated_audio.mp3")

def take_voice_input():
    recognizer = sr.Recognizer()
    with sr.Microphone() as source:
        print("Speak the text you want to translate...")
        recognizer.adjust_for_ambient_noise(source)
        audio = recognizer.listen(source)
    try:
        print("Recognizing...")
        input_text = recognizer.recognize_google(audio)
        print("You said:", input_text)
        return input_text
    except sr.UnknownValueError:
        print("Sorry, could not understand audio.")
        return ""
    except sr.RequestError as e:
        print("Error fetching results; {0}".format(e))
        return ""

input_text = take_voice_input()
source_lang = "en"
target_lang = "hi"
translate(input_text, source_lang, target_lang)

Speak the text you want to translate...
Recognizing...
You said: you might have a wonderful book of poetry but if no one knows about it it's simply not going to sell well
Translated text: आपके पास कविता की एक अद्भुत पुस्तक हो सकती है, लेकिन अगर कोई भी इसके बारे में नहीं जानता है तो यह बस अच्छी तरह से बेचने वाला नहीं है


#### Transformer 

In [12]:
import warnings
warnings.filterwarnings("ignore")

In [13]:
model_name = "Helsinki-NLP/opus-mt-en-hi"
model = MarianMTModel.from_pretrained(model_name)
tokenizer = MarianTokenizer.from_pretrained(model_name)

input_text = "Hello, how are you?"
input_ids = tokenizer.encode(input_text, return_tensors="pt")
translated_ids = model.generate(input_ids)
translated_text = tokenizer.decode(translated_ids[0], skip_special_tokens=True)
print("Translated text:", translated_text)

Translated text: हैलो, तुम कैसे हो?


#### Converting Speech to speech translation 

##### English to Hindi 

In [16]:
def english_to_hindi(model_name="Helsinki-NLP/opus-mt-en-hi"):
    model = MarianMTModel.from_pretrained(model_name)
    tokenizer = MarianTokenizer.from_pretrained(model_name)

    recognizer = sr.Recognizer()
    def play_audio(audio_file):
        os.system("start " + audio_file)

    with sr.Microphone() as source:
        print("Speak the text you want to translate...")
        recognizer.adjust_for_ambient_noise(source)
        audio = recognizer.listen(source)
    try:
        print("Recognizing...")
        input_text = recognizer.recognize_google(audio)
        print("You said:", input_text)
    except sr.UnknownValueError:
        print("Sorry, could not understand audio.")
        input_text = ""
    except sr.RequestError as e:
        print("Error fetching results; {0}".format(e))
        input_text = ""

    input_ids = tokenizer.encode(input_text, return_tensors="pt")
    translated_ids = model.generate(input_ids)
    translated_text = tokenizer.decode(translated_ids[0], skip_special_tokens=True)
    print("Translated text in Hindi:", translated_text)
    tts = gTTS(translated_text, lang='hi')
    tts.save("translated_audio.mp3")
    print("Playing translated audio...")
    play_audio("translated_audio.mp3")

english_to_hindi()

Speak the text you want to translate...
Recognizing...
You said: can you read a book and it make my whole body so cold on fire can ever want me I know that is poetry
Translated text in Hindi: आप एक पुस्तक पढ़ सकते हैं और यह मेरे पूरे शरीर को आग पर इतना ठंडा बना सकते हैं...... मैं जानता हूँ कि कविता है कि कभी नहीं चाहता.
Playing translated audio...


##### English to French 

In [17]:
def english_to_french(model_name="Helsinki-NLP/opus-mt-en-fr"):
    model = MarianMTModel.from_pretrained(model_name)
    tokenizer = MarianTokenizer.from_pretrained(model_name)
    recognizer = sr.Recognizer()
    
    def play_audio(audio_file):
        os.system("start " + audio_file)
    with sr.Microphone() as source:
        print("Speak the text you want to translate...")
        recognizer.adjust_for_ambient_noise(source)
        audio = recognizer.listen(source)
    try:
        print("Recognizing...")
        # Recognize the speech and convert it to text
        input_text = recognizer.recognize_google(audio)
        print("You said:", input_text)
    except sr.UnknownValueError:
        print("Sorry, could not understand audio.")
        input_text = ""
    except sr.RequestError as e:
        print("Error fetching results; {0}".format(e))
        input_text = ""
    input_ids = tokenizer.encode(input_text, return_tensors="pt")
    translated_ids = model.generate(input_ids)
    translated_text = tokenizer.decode(translated_ids[0], skip_special_tokens=True)
    print("Translated text in French:", translated_text)
    tts = gTTS(translated_text, lang='fr')  
    tts.save("translated_audio.mp3")
    print("Playing translated audio...")
    play_audio("translated_audio.mp3")
english_to_french()

Speak the text you want to translate...
Recognizing...
You said: how to publish a poetry online
Translated text in French: comment publier une poésie en ligne
Playing translated audio...


##### Hindi to English 

In [19]:
def hindi_to_english(model_name="Helsinki-NLP/opus-mt-hi-en"):
    model = MarianMTModel.from_pretrained(model_name)
    tokenizer = MarianTokenizer.from_pretrained(model_name)

    recognizer = sr.Recognizer()

    def play_audio(audio_file):
        os.system("start " + audio_file)
        
    with sr.Microphone() as source:
        print("Speak the text you want to translate...")
        recognizer.adjust_for_ambient_noise(source)
        audio = recognizer.listen(source)

    try:
        print("Recognizing...")
        input_text = recognizer.recognize_google(audio, language='hi-IN')  
    except sr.UnknownValueError:
        print("Sorry, could not understand audio.")
        input_text = ""
    except sr.RequestError as e:
        print("Error fetching results; {0}".format(e))
        input_text = ""


    input_ids = tokenizer.encode(input_text, return_tensors="pt")
    translated_ids = model.generate(input_ids)
    translated_text = tokenizer.decode(translated_ids[0], skip_special_tokens=True)
    print("Translated text in English:", translated_text)
    tts = gTTS(translated_text, lang='en')  
    tts.save("translated_audio.mp3")
    print("Playing translated audio...")
    play_audio("translated_audio.mp3")

hindi_to_english()

Speak the text you want to translate...
Recognizing...
Translated text in English: You said the weather would be good today and watch even the sun
Playing translated audio...


##### English to Arabic 

In [23]:
def english_to_arabic(model_name="Helsinki-NLP/opus-mt-en-ar"):
    model = MarianMTModel.from_pretrained(model_name)
    tokenizer = MarianTokenizer.from_pretrained(model_name)
    recognizer = sr.Recognizer()

    def play_audio(audio_file):
        os.system("start " + audio_file)

    with sr.Microphone() as source:
        recognizer.adjust_for_ambient_noise(source)
        audio = recognizer.listen(source)

    try:
        input_text = recognizer.recognize_google(audio)
        print("Input text:", input_text)  
    except sr.UnknownValueError:
        input_text = ""
    except sr.RequestError as e:
        input_text = ""

    input_ids = tokenizer.encode(input_text, return_tensors="pt")
    translated_ids = model.generate(input_ids)
    translated_text = tokenizer.decode(translated_ids[0], skip_special_tokens=True)

    print("Translated text:", translated_text)  
    tts = gTTS(translated_text, lang='ar')
    tts.save("translated_audio.mp3")

    play_audio("translated_audio.mp3")

english_to_arabic()

Translated text: ألف - الوظائف الممولة من الميزانية العادية
