In [1]:
import os
import pandas as pd

# Create data folder
os.makedirs('data', exist_ok=True)

# Bilingual dataset
data = {
    'english': ['hello', 'good morning', 'how are you', 'thank you', 'goodbye',
                "what's your name", 'my name is', 'nice to meet you', 'please', 'sorry',
                'yes', 'no', 'where is', "I don't know", 'help me',
                'I understand', "I don't understand", 'can you help me', 'I love you', 'see you later'],
    'spanish': ['hola', 'buenos días', 'cómo estás', 'gracias', 'adiós',
                'cómo te llamas', 'me llamo', 'mucho gusto', 'por favor', 'lo siento',
                'sí', 'no', 'dónde está', 'no sé', 'ayúdame',
                'entiendo', 'no entiendo', 'puedes ayudarme', 'te quiero', 'hasta luego']
}

# Save to CSV
df = pd.DataFrame(data)
df.to_csv('data/bilingual_corpus.csv', index=False)
print("✅ Dataset saved to 'data/bilingual_corpus.csv'")

✅ Dataset saved to 'data/bilingual_corpus.csv'


In [2]:
import pandas as pd
import os
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
import joblib

# Create model folder
os.makedirs('model', exist_ok=True)

# Load dataset
df = pd.read_csv('data/bilingual_corpus.csv')

# --- English to Spanish Model ---
eng_vectorizer = CountVectorizer()
X_eng = eng_vectorizer.fit_transform(df['english'])
y_span = df['spanish']

eng2span_model = MultinomialNB()
eng2span_model.fit(X_eng, y_span)

# Save model and vectorizer
joblib.dump(eng2span_model, 'model/eng2span_model.pkl')
joblib.dump(eng_vectorizer, 'model/eng_vectorizer.pkl')

# --- Spanish to English Model ---
span_vectorizer = CountVectorizer()
X_span = span_vectorizer.fit_transform(df['spanish'])
y_eng = df['english']

span2eng_model = MultinomialNB()
span2eng_model.fit(X_span, y_eng)

# Save model and vectorizer
joblib.dump(span2eng_model, 'model/span2eng_model.pkl')
joblib.dump(span_vectorizer, 'model/span_vectorizer.pkl')

print(" ✅Both models and vectorizers saved in 'model/' folder.")


✅ Both models and vectorizers saved in 'model/' folder.


In [3]:
# --- Spanish to English Model ---
span_vectorizer = CountVectorizer()
X_span = span_vectorizer.fit_transform(df['spanish'])
y_eng = df['english']

span2eng_model = MultinomialNB()
span2eng_model.fit(X_span, y_eng)

# Save Spanish to English model and vectorizer
joblib.dump(span2eng_model, 'model/span2eng_model.pkl')
joblib.dump(span_vectorizer, 'model/span_vectorizer.pkl')

print("✅ Both models trained and saved successfully!")


✅ Both models trained and saved successfully!


In [4]:
def translate_text(text, direction='eng2span'):
    if direction == 'eng2span':
        vectorizer = joblib.load('model/eng_vectorizer.pkl')
        model = joblib.load('model/eng2span_model.pkl')
    else:
        vectorizer = joblib.load('model/span_vectorizer.pkl')
        model = joblib.load('model/span2eng_model.pkl')

    vector = vectorizer.transform([text.lower()])
    prediction = model.predict(vector)
    return prediction[0]

# 🔍 Example usage
print("EN → ES:", translate_text("hello", direction='eng2span'))
print("ES → EN:", translate_text("gracias", direction='span2eng'))


EN → ES: hola
ES → EN: thank you


In [5]:
!pip install SpeechRecognition pyaudio gTTS playsound

Collecting SpeechRecognition
  Obtaining dependency information for SpeechRecognition from https://files.pythonhosted.org/packages/aa/cd/4b5f5d04c8a4e25c376858d0ad28c325f079f17c82bf379185abf45e41bf/speechrecognition-3.14.3-py3-none-any.whl.metadata
  Downloading speechrecognition-3.14.3-py3-none-any.whl.metadata (30 kB)
Collecting pyaudio
  Obtaining dependency information for pyaudio from https://files.pythonhosted.org/packages/82/d8/f043c854aad450a76e476b0cf9cda1956419e1dacf1062eb9df3c0055abe/PyAudio-0.2.14-cp311-cp311-win_amd64.whl.metadata
  Downloading PyAudio-0.2.14-cp311-cp311-win_amd64.whl.metadata (2.7 kB)
Collecting gTTS
  Obtaining dependency information for gTTS from https://files.pythonhosted.org/packages/e3/6c/8b8b1fdcaee7e268536f1bb00183a5894627726b54a9ddc6fc9909888447/gTTS-2.5.4-py3-none-any.whl.metadata
  Downloading gTTS-2.5.4-py3-none-any.whl.metadata (4.1 kB)
Collecting playsound
  Downloading playsound-1.3.0.tar.gz (7.7 kB)
  Preparing metadata (setup.py): started


In [3]:
import speech_recognition as sr
from gtts import gTTS
import playsound
import tempfile

def recognize_speech(language='en-US'):
    r = sr.Recognizer()
    with sr.Microphone() as source:
        print(f"Speak now ({language}):")
        audio = r.listen(source)
    try:
        text = r.recognize_google(audio, language=language)
        print("You said:", text)
        return text
    except sr.UnknownValueError:
        print("Sorry, could not understand.")
        return ""
    except sr.RequestError:
        print("Speech recognition service error.")
        return ""

def speak_text(text, lang='en'):
    tts = gTTS(text=text, lang=lang)
    with tempfile.NamedTemporaryFile(delete=True, suffix='.mp3') as fp:
        path = fp.name
        tts.save(path)
        # Escape backslashes and add quotes around path
        safe_path = '"' + path.replace('\\', '\\\\') + '"'
        import subprocess
        subprocess.call(['start', safe_path], shell=True) 
        

def translate_text(text, direction='eng2span'):
    # simple bilingual dictionary
    eng2span_dict = {
        'hello': 'hola',
        'good morning': 'buenos días',
        'how are you': 'cómo estás',
        'thank you': 'gracias',
        'goodbye': 'adiós'
        # add more as needed
    }
    span2eng_dict = {v: k for k, v in eng2span_dict.items()}
    
    if direction == 'eng2span':
        return eng2span_dict.get(text.lower(), "Translation not found")
    elif direction == 'span2eng':
        return span2eng_dict.get(text.lower(), "Translation not found")
    else:
        return "Invalid direction"



In [7]:
# 🔁 Real-time flow: English speaker to Spanish listener
eng_input = recognize_speech(language='en-US')
if eng_input:
    translated = translate_text(eng_input, direction='eng2span')
    print("📝 Translated (ES):", translated)
    speak_text(translated, lang='es')

# 🔁 Real-time flow: Spanish speaker to English listener
span_input = recognize_speech(language='es-ES')
if span_input:
    translated = translate_text(span_input, direction='span2eng')
    print("📝 Translated (EN):", translated)
    speak_text(translated, lang='en')


Speak now (en-US):
Sorry, could not understand.
Speak now (es-ES):
Sorry, could not understand.
