In [None]:
import os
import time
import speech_recognition as sr
from transformers import MarianMTModel, MarianTokenizer
from gtts import gTTS
from playsound import playsound

# -------------------------------------------------------------------
#                        CONFIGURATION
# -------------------------------------------------------------------

# Define language codes for:
# - SpeechRecognition: IETF language tags (e.g., "en-US")
# - gTTS: simple language codes (e.g., "en")
# - MarianMT: we'll use lowercase ISO codes (e.g., "en")
LANGUAGES = {
    "English": {"sr": "en-US", "gtts": "en", "mt": "en"},
    "French":  {"sr": "fr-FR", "gtts": "fr", "mt": "fr"},
    "Spanish": {"sr": "es-ES", "gtts": "es", "mt": "es"},
    "Arabic":  {"sr": "ar-EG", "gtts": "ar", "mt": "ar"},
    "German":  {"sr": "de-DE", "gtts": "de", "mt": "de"},
    "Dutch":   {"sr": "nl-NL", "gtts": "nl", "mt": "nl"}
}

# Path where TTS audio files will be saved
 # You can change this to "/Users/sammerking/Desktop/y/" if desired

# -------------------------------------------------------------------
#              PRELOAD TRANSLATION MODELS (MarianMT)
# -------------------------------------------------------------------
# We will load models for every pair (src, tgt) where src != tgt.
# This way, when main() runs, translation happens immediately.

translation_models = {}
print("Preloading translation models for all language pairs (this may take a while)...")
for src in LANGUAGES:
    for tgt in LANGUAGES:
        if src == tgt:
            continue
        model_name = f"Helsinki-NLP/opus-mt-{LANGUAGES[src]['mt']}-{LANGUAGES[tgt]['mt']}"
        try:
            print(f"  {src} → {tgt}: {model_name}")
            tokenizer = MarianTokenizer.from_pretrained(model_name)
            model = MarianMTModel.from_pretrained(model_name)
            translation_models[(src, tgt)] = (tokenizer, model)
        except Exception as e:
            print(f"  Failed to load {model_name} for {src}→{tgt}: {e}")
print("✅ All available translation models preloaded.\n")


In [9]:
AUDIO_SAVE_PATH = ""

In [14]:
# -------------------------------------------------------------------
#                         HELPER FUNCTIONS
# -------------------------------------------------------------------

def choose_languages():
    print("Available Languages:")
    for i, lang in enumerate(LANGUAGES.keys(), start=1):
        print(f"{i}. {lang}")
    inp_choice = int(input("Choose Input Language (number): "))
    out_choice = int(input("Choose Output Language (number): "))
    input_lang = list(LANGUAGES.keys())[inp_choice - 1]
    output_lang = list(LANGUAGES.keys())[out_choice - 1]
    return input_lang, output_lang

def record_speech(lang):
    """
    Record speech from the microphone using SpeechRecognition.
    The recording stops after 1 second of silence.
    """
    recognizer = sr.Recognizer()
    recognizer.pause_threshold = 2.0  # 1 second of silence ends recording
    with sr.Microphone() as source:
        print("\nListening... Speak now.")
        audio = recognizer.listen(source)
    print("Finished recording. Processing...")
    try:
        # Recognize speech using the Google Web Speech API with the chosen language.
        text = recognizer.recognize_google(audio, language=LANGUAGES[lang]["sr"])
        print(f"\nRecognized ({lang}): {text}")
        return text
    except sr.UnknownValueError:
        print("Could not understand the audio.")
    except sr.RequestError as e:
        print(f"Request error from Google Speech Recognition service: {e}")
    return ""

def translate_text(text, input_lang, output_lang):
    """
    Translate text from input_lang to output_lang using the preloaded MarianMT model.
    """
    if (input_lang, output_lang) not in translation_models:
        print(f"No preloaded translation model for {input_lang}→{output_lang}.")
        return ""
    tokenizer, model = translation_models[(input_lang, output_lang)]
    inputs = tokenizer(text, return_tensors="pt", padding=True)
    translated_ids = model.generate(**inputs)
    translated = tokenizer.decode(translated_ids[0], skip_special_tokens=True)
    return translated

def speak_text(text, output_lang):
    """
    Convert text to speech using gTTS, save the output to a file, play it, and then remove the file.
    """
    tts = gTTS(text, lang=LANGUAGES[output_lang]["gtts"])
    filename = os.path.join(AUDIO_SAVE_PATH, "output.mp3")
    tts.save(filename)
    print("\nPlaying translated speech...")
    playsound(filename)
    os.remove(filename)

In [None]:
# -------------------------------------------------------------------
#                          MAIN PROGRAM
# -------------------------------------------------------------------

def main():
    input_lang, output_lang = choose_languages()
    print(f"\nTranslating from {input_lang} to {output_lang}.\n")
    
    # Record speech from the microphone (STT)
    spoken_text = record_speech(input_lang)
    if not spoken_text:
        print("No speech recognized. Exiting.")
        return
    
    # Translate the recognized text using the preloaded model
    translated_text = translate_text(spoken_text, input_lang, output_lang)
    print(f"\nTranslated ({output_lang}): {translated_text}")
    
    # Use gTTS to speak the translated text
    speak_text(translated_text, output_lang)

if __name__ == "__main__":
    main()