In [None]:
!pip install SpeechRecognition googletrans==4.0.0-rc1 gtts pydub



In [None]:
!apt-get install ffmpeg

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).
0 upgraded, 0 newly installed, 0 to remove and 49 not upgraded.


In [None]:
import numpy as np
import pandas as pd
import os
from google.colab import drive
from sklearn.preprocessing import OneHotEncoder
import librosa
import librosa.display
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from keras.models import load_model
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from IPython.display import Audio, display
import speech_recognition as sr
from googletrans import Translator
from gtts import gTTS
from pydub import AudioSegment

In [None]:
# Suppress warnings
warnings.filterwarnings('ignore')

# Mount Google Drive
drive.mount('/content/drive')

# Load the emotion recognition model
emotion_model = load_model('/content/drive/MyDrive/emotion_model.h5')

# Emotion labels (adjust based on your dataset)
emotion_labels = ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']

# Initialize translator and speech recognizer
recognizer = sr.Recognizer()
translator = Translator()

LANGUAGES = {
    'en': 'English',
    'zh-cn': 'Mandarin Chinese',
    'hi': 'Hindi',
    'es': 'Spanish',
    'fr': 'French',
    'ar': 'Arabic',
    'it': 'Italian',
    'ru': 'Russian',
    'pt': 'Portuguese',
    'de': 'German',
    'ja': 'Japanese'
}


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).




In [None]:

# Function to extract MFCC features
def extract_mfcc(filename):
    y, sr = librosa.load(filename, duration=3, offset=0.5)
    mfcc = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40).T, axis=0)
    return mfcc.reshape(1, 40, 1)

def predict_and_print_emotion(audio_path):
    mfcc = extract_mfcc(audio_path)
    prediction = emotion_model.predict(mfcc)
    emotion = emotion_labels[np.argmax(prediction)]
    print(f"Detected Emotion: {emotion}")
    return emotion

# Function to display audio and exit
def display_audio_and_exit(file_path):
    print("\nHere is the translated audio:")
    display(Audio(file_path))
    print("Goodbye!")
    raise SystemExit

# Function to translate text and generate audio
def translate_and_generate_audio(text, target_lang):
    translated_text = translator.translate(text, dest=target_lang).text
    print(f"Translated Text: {translated_text}")

    tts = gTTS(text=translated_text, lang=target_lang)
    audio_path = "response.mp3"
    tts.save(audio_path)

    display_audio_and_exit(audio_path)

# Function to translate from MP3 file
def translate_from_mp3(file_path, target_lang):
    audio = AudioSegment.from_mp3(file_path)
    # Explicitly convert to mono before exporting
    audio = audio.set_channels(1)  # Force mono audio
    audio.export("temp.wav", format="wav")

    with sr.AudioFile("temp.wav") as source:
        audio_data = recognizer.record(source)
        text = recognizer.recognize_google(audio_data)
        print(f"Recognized from MP3: {text}")

        translate_and_generate_audio(text, target_lang)

# Function for real-time speech recognition and translation
def recognize_and_translate(target_lang):
    print("Listening for real-time translation...")
    with sr.Microphone() as source:
        recognizer.adjust_for_ambient_noise(source)
        audio = recognizer.listen(source, timeout=5, phrase_time_limit=5)
        text = recognizer.recognize_google(audio)
        print(f"Recognized: {text}")

        translate_and_generate_audio(text, target_lang)


In [None]:

# Main menu function
def main_menu():
    print("Welcome to the Real-time and MP3 Translation System")

    while True:
        print("\nPlease choose a destination language:")
        for code, language in LANGUAGES.items():
            print(f"'{code}' for {language}")

        target_lang = input("Enter the language code: ").strip().lower()

        if target_lang not in LANGUAGES:
            print("Invalid language code. Please try again.")
            continue

        print("\nChoose an input method:")
        print("1. Real-time voice translation")
        print("2. Translate from MP3 file")
        print("3. Predict emotion from audio file")
        print("4. Exit")

        choice = input("Enter your choice (1, 2, 3, or 4): ")

        if choice == '1':
            print("\nYou selected real-time voice translation.")
            recognize_and_translate(target_lang)
        elif choice == '2':
            file_path = input("\nEnter the MP3 file path: ")
            translate_from_mp3(file_path, target_lang)
        elif choice == '3':
            audio_path = input("\nEnter the audio file path: ")
            predict_emotion(audio_path)
        elif choice == '4':
            print("Exiting the program. Goodbye!")
            break
        else:
            print("Invalid choice. Please select a valid option.")

# Run the program
main_menu()


Welcome to the Real-time and MP3 Translation System

Please choose a destination language:
'en' for English
'zh-cn' for Mandarin Chinese
'hi' for Hindi
'es' for Spanish
'fr' for French
'ar' for Arabic
'it' for Italian
'ru' for Russian
'pt' for Portuguese
'de' for German
'ja' for Japanese
Enter the language code: hi

Choose an input method:
1. Real-time voice translation
2. Translate from MP3 file
3. Predict emotion from audio file
4. Exit
Enter your choice (1, 2, 3, or 4): 2

Enter the MP3 file path: /content/03-01-05-01-01-02-02.wav
Recognized from MP3: kids are talking by the door
Translated Text: बच्चे दरवाजे से बात कर रहे हैं

Here is the translated audio:


Goodbye!


SystemExit: 