In [1]:
import os
import wave
import pyaudio
import vosk
import json
from pydub import AudioSegment
import tkinter as tk
from tkinter import ttk, messagebox
from googletrans import Translator
from gtts import gTTS
import pygame

pygame 2.6.0 (SDL 2.28.4, Python 3.9.19)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [2]:
vosk_model_path = "C:\\Users\\swarn\\Desktop\\Projects\\Real Time Interpreter\\vosk-model-small-en-us-0.15"

In [3]:
def record_audio(filename, duration=5):
    chunk = 1024
    sample_format = pyaudio.paInt16  # 16-bit format
    channels = 1  # Mono
    fs = 16000  # 16kHz sample rate
    p = pyaudio.PyAudio()

    stream = p.open(format=sample_format,
                    channels=channels,
                    rate=fs,
                    input=True,
                    frames_per_buffer=chunk)

    frames = []

    for _ in range(0, int(fs / chunk * duration)):
        data = stream.read(chunk)
        frames.append(data)

    stream.stop_stream()
    stream.close()
    p.terminate()

    wf = wave.open(filename, 'wb')
    wf.setnchannels(channels)
    wf.setsampwidth(p.get_sample_size(sample_format))
    wf.setframerate(fs)
    wf.writeframes(b''.join(frames))
    wf.close()

In [4]:
def convert_audio_to_16kHz(input_audio):
    sound = AudioSegment.from_wav(input_audio)
    sound = sound.set_frame_rate(16000).set_channels(1)  # Set to mono and 16kHz
    output_audio = "converted_" + input_audio
    sound.export(output_audio, format="wav")
    return output_audio

In [5]:
def recognize_speech_vosk(audio_file, model_path):
    try:
        wf = wave.open(audio_file, "rb")
        if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getframerate() != 16000:
            raise ValueError("Audio file must be WAV format mono PCM with a 16kHz sample rate")

        model = vosk.Model(model_path)
        rec = vosk.KaldiRecognizer(model, wf.getframerate())

        while True:
            data = wf.readframes(4000)
            if len(data) == 0:
                break
            if rec.AcceptWaveform(data):
                result = json.loads(rec.Result())
                return result.get('text', '')

        final_result = json.loads(rec.FinalResult())
        return final_result.get('text', '')
    except Exception as e:
        return f"Error in speech recognition: {e}"

In [6]:
def translate_text(text, src_lang, dest_lang):
    translator = Translator()
    translated = translator.translate(text, src=src_lang, dest=dest_lang)
    return translated.text

In [7]:
def text_to_speech(text, lang, filename):
    if os.path.exists(filename):
        os.remove(filename)  # Ensure the file is deleted before saving new content
    tts = gTTS(text=text, lang=lang)
    tts.save(filename)

In [8]:
def play_audio(filename):
    pygame.mixer.init()
    pygame.mixer.music.load(filename)
    pygame.mixer.music.play()
    while pygame.mixer.music.get_busy():
        pygame.time.Clock().tick(10)

In [9]:
def on_submit():
    input_lang = input_lang_var.get()
    output_lang = output_lang_var.get()

    # Disable the submit button and show "Listening..." message
    submit_button.config(state=tk.DISABLED)
    status_label.config(text="Listening... Please speak.")
    root.update_idletasks()  # Update UI to reflect the change immediately

    record_audio("input.wav")
    converted_audio = convert_audio_to_16kHz("input.wav")

    status_label.config(text="Processing... Please wait.")
    root.update_idletasks()

    recognized_text = recognize_speech_vosk(converted_audio, vosk_model_path)
    if recognized_text.startswith("Error"):
        messagebox.showerror("Error", recognized_text)
        submit_button.config(state=tk.NORMAL)
        status_label.config(text="")
        return

    translated_text = translate_text(recognized_text, input_lang, output_lang)
    text_to_speech(translated_text, output_lang, "output.mp3")
    play_audio("output.mp3")

    # Enable the submit button and clear the status message
    submit_button.config(state=tk.NORMAL)
    status_label.config(text="")
    
    # Display the recognized text and translated text in a message box
    messagebox.showinfo("Recognized Text", f"Recognized Text: {recognized_text}\nTranslated Text: {translated_text}")

In [10]:
root = tk.Tk()
root.title("Language Translator")

input_lang_var = tk.StringVar()
output_lang_var = tk.StringVar()

languages = ["en", "es", "fr", "de", "zh-cn", "ja", "hi", "ar"]

tk.Label(root, text="Select Input Language").pack(pady=10)
input_lang_menu = ttk.Combobox(root, textvariable=input_lang_var)
input_lang_menu['values'] = languages
input_lang_menu.pack(pady=10)

tk.Label(root, text="Select Output Language").pack(pady=10)
output_lang_menu = ttk.Combobox(root, textvariable=output_lang_var)
output_lang_menu['values'] = languages
output_lang_menu.pack(pady=10)

submit_button = tk.Button(root, text="Submit", command=on_submit)
submit_button.pack(pady=20)

# Status label for showing "Listening..." or "Processing..." messages
status_label = tk.Label(root, text="", fg="blue")
status_label.pack(pady=10)

root.mainloop()