In [20]:
!pip install gradio librosa gTTS transformers sentencepiece

Collecting sentencepiece
  Downloading sentencepiece-0.2.0-cp310-cp310-win_amd64.whl.metadata (8.3 kB)
Downloading sentencepiece-0.2.0-cp310-cp310-win_amd64.whl (991 kB)
   ---------------------------------------- 0.0/991.5 kB ? eta -:--:--
   ------------------------------- -------- 786.4/991.5 kB 8.3 MB/s eta 0:00:01
   ---------------------------------------- 991.5/991.5 kB 7.8 MB/s eta 0:00:00
Installing collected packages: sentencepiece
Successfully installed sentencepiece-0.2.0


In [7]:
import gradio as gr
import numpy as np
import librosa
import os
import uuid
from gtts import gTTS
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM

In [9]:
asr_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-small")

print("Loading English-Sinhala model...")
sinhala_tokenizer = AutoTokenizer.from_pretrained("thilina/mt5-sinhalese-english")
sinhala_model = AutoModelForSeq2SeqLM.from_pretrained("thilina/mt5-sinhalese-english")

print("Loading Singlish-English model...")
singlish_pipe = pipeline("text2text-generation", model="raqdo09/singlish-to-english-synthetic")

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading English-Sinhala model...
Loading Singlish-English model...


model.safetensors:  24%|##4       | 283M/1.17G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/20.7k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

In [10]:
def translate_english_to_sinhala(english_text):
    """Translate English text to Sinhala using mt5 model"""
    try:
        # Prefix for English to Sinhala translation
        input_text = f"translate English to Sinhala: {english_text}"
        inputs = sinhala_tokenizer(input_text, return_tensors="pt", padding=True, truncation=True, max_length=512)
        
        # Generate translation
        with sinhala_tokenizer.as_target_tokenizer():
            outputs = sinhala_model.generate(
                **inputs,
                max_length=256,
                num_beams=4,
                do_sample=True,
                temperature=0.7,
                early_stopping=True
            )
        
        sinhala_text = sinhala_tokenizer.decode(outputs[0], skip_special_tokens=True)
        return sinhala_text
    except Exception as e:
        return f"Translation error: {str(e)}"

In [11]:
def translate_singlish_to_english(singlish_text):
    """Translate Singlish text to English using pipeline"""
    try:
        result = singlish_pipe(singlish_text, max_length=256, do_sample=True, temperature=0.7)
        return result[0]['generated_text']
    except Exception as e:
        return f"Translation error: {str(e)}"

In [12]:
def process_english_to_sinhala_audio(file_path):
    """Process English audio and translate to Sinhala"""
    try:
        if file_path is None:
            return None, None, None, "Please upload an audio file"
        
        # Load and process audio
        audio_data, sr = librosa.load(file_path, sr=16000)
        
        # Speech recognition (Audio to English text)
        result = asr_pipeline(audio_data.astype(np.float32))
        english_text = result["text"]
        
        if not english_text.strip():
            return english_text, None, None, "No speech detected in audio"
        
        # Translation (English to Sinhala)
        sinhala_text = translate_english_to_sinhala(english_text)
        
        # Note: gTTS doesn't support Sinhala directly
        # You might need to use alternative TTS services for Sinhala
        try:
            output_filename = f"sinhala_audio_{uuid.uuid4().hex}.mp3"
            tts = gTTS(text=sinhala_text, lang='si')  # 'si' for Sinhala
            tts.save(output_filename)
            status = "Translation complete! (Note: Sinhala TTS may have limited support)"
        except:
            output_filename = None
            status = "Translation complete! (Sinhala audio generation not supported by gTTS)"
        
        return english_text, sinhala_text, output_filename, status
        
    except Exception as e:
        return None, None, None, f"Error: {str(e)}"

In [13]:
def process_singlish_to_english_audio(file_path):
    """Process Singlish audio and translate to English"""
    try:
        if file_path is None:
            return None, None, None, "Please upload an audio file"
        
        # Load and process audio
        audio_data, sr = librosa.load(file_path, sr=16000)
        
        # Speech recognition (Audio to Singlish text)
        result = asr_pipeline(audio_data.astype(np.float32))
        singlish_text = result["text"]
        
        if not singlish_text.strip():
            return singlish_text, None, None, "No speech detected in audio"
        
        # Translation (Singlish to English)
        english_text = translate_singlish_to_english(singlish_text)
        
        # Text-to-speech (English)
        output_filename = f"english_audio_{uuid.uuid4().hex}.mp3"
        tts = gTTS(text=english_text, lang='en')
        tts.save(output_filename)
        
        return singlish_text, english_text, output_filename, "Translation complete!"
        
    except Exception as e:
        return None, None, None, f"Error: {str(e)}"

In [20]:
with gr.Blocks(title="Multi-Language Speech Translator") as app:
    gr.Markdown("# Multi-Language Speech Translation App")
    gr.Markdown("Choose between English-to-Sinhala or Singlish-to-English translation")
    
    with gr.Tabs():
        # Tab 1: English to Sinhala
        with gr.TabItem("English → Sinhala"):
            gr.Markdown("### Upload English audio to get Sinhala translation")
            
            with gr.Row():
                with gr.Column():
                    en_file_input = gr.Audio(type="filepath", label="Upload English Audio File")
                    en_translate_button = gr.Button("Translate to Sinhala", variant="primary")
                
                with gr.Column():
                    en_original_text = gr.Textbox(label="English Transcription", interactive=False)
                    si_translated_text = gr.Textbox(label="Sinhala Translation", interactive=False)
                    si_audio_output = gr.Audio(label="Sinhala Audio Output", type="filepath")
                    en_status_output = gr.Textbox(label="Status", interactive=False)
            
            en_translate_button.click(
                fn=process_english_to_sinhala_audio,
                inputs=en_file_input,
                outputs=[en_original_text, si_translated_text, si_audio_output, en_status_output]
            )
        
        # Tab 2: Singlish to English
        with gr.TabItem("Singlish → English"):
            gr.Markdown("### Upload Singlish audio to get proper English translation")
            
            with gr.Row():
                with gr.Column():
                    sg_file_input = gr.Audio(type="filepath", label="Upload Singlish Audio File")
                    sg_translate_button = gr.Button("Translate to English", variant="primary")
                
                with gr.Column():
                    sg_original_text = gr.Textbox(label="Singlish Transcription", interactive=False)
                    en_translated_text = gr.Textbox(label="English Translation", interactive=False)
                    en_audio_output = gr.Audio(label="English Audio Output", type="filepath")
                    sg_status_output = gr.Textbox(label="Status", interactive=False)
            
            sg_translate_button.click(
                fn=process_singlish_to_english_audio,
                inputs=sg_file_input,
                outputs=[sg_original_text, en_translated_text, en_audio_output, sg_status_output]
            )

# Launch the app
if __name__ == "__main__":
    app.launch(share=True)

* Running on local URL:  http://127.0.0.1:7864
* Running on public URL: https://347727ef64ff5fb604.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


