In [3]:
# ✅ Install dependencies
!pip install transformers sentencepiece gradio langdetect indic-transliteration gtts

import os
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
from langdetect import detect, DetectorFactory
from indic_transliteration import sanscript
import gradio as gr
from gtts import gTTS
import io
from datetime import datetime

# Fix langdetect randomness
DetectorFactory.seed = 0

os.environ.pop("HUGGINGFACE_TOKEN", None)

# ✅ Load tokenizer and model
model_name = "facebook/m2m100_418M"
tokenizer = M2M100Tokenizer.from_pretrained(model_name)
model = M2M100ForConditionalGeneration.from_pretrained(model_name)

print("✅ Model and tokenizer loaded!")

# Map language codes to full names and TTS languages
language_map = {
    "en": ("English", "en"),
    "hi": ("Hindi", "hi"),
    "fr": ("French", "fr"),
    "de": ("German", "de"),
    "es": ("Spanish", "es"),
    "zh": ("Chinese", "zh"),
    "ja": ("Japanese", "ja"),
    "ko": ("Korean", "ko"),
    "mr": ("Marathi", "mr"),
    "gu": ("Gujarati", "gu"),
    "ta": ("Tamil", "ta"),
    "ml": ("Malayalam", "ml")
}

src_langs = list(language_map.keys())
full_src_langs = [name for code, (name, tts) in language_map.items()]
src_langs_with_auto = ["Auto Detect"] + full_src_langs

# History storage
history = []

def translate_with_full_names(text, src_lang_full, tgt_lang_full, hinglish_translit=False, progress=gr.Progress()):
    if not text.strip():
        return "", "0.0", []
    # Convert full names back to codes for the model
    src_lang = None
    processed_text = text
    confidence = 0.0

    progress(0, desc="Starting translation...")

    if hinglish_translit:
        progress(0.1, desc="Transliterating Hinglish to Devanagari...")
        try:
            processed_text = sanscript.transliterate(text, sanscript.IAST, sanscript.DEVANAGARI)
            src_lang = "hi"
            confidence = 0.9
            progress(0.2, desc="Hinglish transliteration complete.")
        except Exception:
            processed_text = text
            src_lang = "en"
            confidence = 0.5
            progress(0.2, desc="Hinglish transliteration failed.")
    elif src_lang_full == "Auto Detect":
        progress(0.1, desc="Detecting language...")
        try:
            detected = detect(text)
            confidence = 0.95  # Mock confidence
            if detected == "hi":
                if any(c.isalpha() for c in text if c.isascii()):
                    progress(0.2, desc="Transliterating Roman Hindi...")
                    try:
                        processed_text = sanscript.transliterate(text, sanscript.IAST, sanscript.DEVANAGARI)
                        src_lang = "hi"
                        progress(0.3, desc="Transliteration complete.")
                    except Exception:
                        processed_text = text
                        src_lang = detected
                        progress(0.3, desc="Transliteration failed, continuing with original text.")
                else:
                    processed_text = text
                    src_lang = detected
                    progress(0.3, desc="Detected Devanagari Hindi.")
            else:
                processed_text = text
                src_lang = detected
                progress(0.3, desc=f"Detected language: {language_map.get(detected, (detected, ''))[0]}")

        except Exception:
            processed_text = text
            src_lang = "en"
            confidence = 0.5
            progress(0.3, desc="Language detection failed, defaulting to English.")
    else:
        src_lang_list = [code for code, (full, _) in language_map.items() if full == src_lang_full]
        if src_lang_list:
            src_lang = src_lang_list[0]
            processed_text = text
            confidence = 1.0
            progress(0.3, desc=f"Source language set to: {src_lang_full}")
        else:
            progress(1.0, desc="Error")
            return "Error: Invalid source language selected.", "0.0", []

    tgt_lang_list = [code for code, (full, _) in language_map.items() if full == tgt_lang_full]
    if tgt_lang_list:
        tgt_lang = tgt_lang_list[0]
        progress(0.4, desc=f"Target language set to: {tgt_lang_full}")
    else:
        progress(1.0, desc="Error")
        return "Error: Invalid target language selected.", "0.0", []

    if src_lang == tgt_lang:
        # Same language, return processed text
        timestamp = datetime.now().strftime("%H:%M:%S")
        history.append(f"[{timestamp}] {src_lang_full} → {tgt_lang_full}: {text} → {processed_text}")
        if len(history) > 10:
            history.pop(0)
        return processed_text, f"{confidence:.2f}", history[-5:]

    try:
        progress(0.5, desc="Encoding text...")
        tokenizer.src_lang = src_lang
        encoded = tokenizer(processed_text, return_tensors="pt", max_length=512, truncation=True)
        progress(0.6, desc="Generating translation tokens...")
        generated_tokens = model.generate(
            **encoded,
            forced_bos_token_id=tokenizer.get_lang_id(tgt_lang),
            num_beams=4,
            early_stopping=True,
            max_length=512,
            max_new_tokens=100,
            do_sample=False
        )
        progress(0.9, desc="Decoding generated tokens...")
        translated = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
        progress(1.0, desc="Translation complete!")

        # Add to history
        timestamp = datetime.now().strftime("%H:%M:%S")
        history.append(f"[{timestamp}] {src_lang_full} → {tgt_lang_full}: {text} → {translated}")
        if len(history) > 10:
            history.pop(0)

        return translated, f"{confidence:.2f}", history[-5:]  # Last 5 for display
    except Exception as e:
        progress(1.0, desc="Error")
        return "Error during translation. Please try again.", "0.0", history[-5:]

def generate_audio(text, tgt_lang_full):
    if not text.strip():
        return None
    tgt_lang_code = [code for code, (full, tts) in language_map.items() if full == tgt_lang_full][0]
    tts_lang = language_map.get(tgt_lang_code, ("", "en"))[1]
    try:
        tts = gTTS(text=text, lang=tts_lang)
        fp = io.BytesIO()
        tts.write_to_fp(fp)
        fp.seek(0)
        return fp.read()
    except Exception:
        return None

def swap_languages(src, tgt):
    if src == "Auto Detect":
        return tgt, "English"
    elif tgt == "Auto Detect":
        return "English", src
    else:
        return tgt, src

def on_input_change(text, src, tgt, realtime, hinglish):
    if realtime and text.strip():
        translated, conf, hist = translate_with_full_names(text, src, tgt, hinglish)
        history_md = "### Recent Translations\n" + "\n".join(hist) if hist else "No translations yet."
        return translated, f"Confidence: {conf}", history_md
    return "", "Confidence: 0.0", "### Recent Translations\nNo translations yet."

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    demo.css = """
    body {
        background: linear-gradient(to bottom right, #e0f7fa, #b2ebf2);
        font-family: 'Roboto', sans-serif;
    }
    .gradio-container {
        background-color: #ffffff;
        border-radius: 12px;
        padding: 25px;
        box-shadow: 0 6px 12px rgba(0, 0, 0, 0.15);
        max-width: 1000px;
        margin: 20px auto;
    }
    h1 {
        color: #0288d1;
        text-align: center;
        margin-bottom: 30px;
        font-size: 2.5em;
        text-shadow: 1px 1px 2px rgba(0, 0, 0, 0.05);
    }
    .gr-textbox textarea {
        border-radius: 8px;
        border: 1px solid #b0bec5;
        padding: 15px;
        font-size: 1.2em;
        resize: vertical;
    }
    .gr-button {
        border-radius: 8px;
        padding: 12px 24px;
        font-size: 1.1em;
        margin: 5px;
        transition: all 0.3s ease;
    }
    .gr-button-primary {
        background-color: #4285f4;
        color: white;
        border: none;
    }
    .gr-button-secondary {
        background-color: #34a853;
        color: white;
        border: none;
    }
    #output-area {
        background-color: #f1f3f4;
        border: 1px solid #dadce0;
        border-radius: 8px;
        padding: 15px;
        margin-top: 10px;
    }
    #confidence {
        color: #5f6368;
        font-size: 0.9em;
        margin-top: 5px;
    }
    #history {
        background-color: #f8f9fa;
        border: 1px solid #e0e0e0;
        border-radius: 8px;
        padding: 10px;
        max-height: 200px;
        overflow-y: auto;
        font-size: 0.9em;
    }
    """

    gr.Markdown("# 🌍 Multi Language Translator )")
    gr.Markdown("*Unique Features: Translate language , Translation History, Pronunciation Audio, Confidence Score*")

    realtime_checkbox = gr.Checkbox(label="🔄 Enable swap language and Translate", value=False)
    hinglish_checkbox = gr.Checkbox(label="🗣️ Enable Hinglish Transliteration ", value=False)

    with gr.Row():
        with gr.Column(scale=1):
            src = gr.Dropdown(choices=src_langs_with_auto, value="Auto Detect", label="Source Language")
        with gr.Column(scale=0.5, min_width=60):
            swap_btn = gr.Button("🔄", size="sm")
        with gr.Column(scale=1):
            tgt = gr.Dropdown(choices=full_src_langs, value="Hindi", label="Target Language")

    with gr.Row():
        input_text = gr.Textbox(label="Enter text to translate", lines=6, placeholder="Type or paste text here... (e.g., aur bhaii kaise ho)", elem_id="input-area")

    with gr.Row():
        translate_btn = gr.Button("Translate", variant="primary", size="lg")

    with gr.Row():
        with gr.Column(scale=1):
            output = gr.Textbox(label="Translation", lines=6, interactive=False, elem_id="output-area")
            confidence = gr.Markdown("Confidence: 0.0", elem_id="confidence")
        with gr.Column(scale=0.3):
            audio = gr.Audio(label="Pronunciation", interactive=False)
            copy_btn = gr.Button("Copy Translation", variant="secondary")

    with gr.Row():
        history_display = gr.Markdown("### Recent Translations\nNo translations yet.", elem_id="history")

    # Events
    def translate_fn(text, src, tgt, hinglish):
        translated, conf, hist = translate_with_full_names(text, src, tgt, hinglish)
        audio_data = generate_audio(translated, tgt)
        history_md = "### Recent Translations\n" + "\n".join(hist) if hist else "No translations yet."
        return translated, f"Confidence: {conf}", audio_data, history_md

    translate_btn.click(translate_fn, inputs=[input_text, src, tgt, hinglish_checkbox], outputs=[output, confidence, audio, history_display])

    input_text.change(on_input_change, inputs=[input_text, src, tgt, realtime_checkbox, hinglish_checkbox], outputs=[output, confidence, history_display])

    input_text.submit(translate_fn, inputs=[input_text, src, tgt, hinglish_checkbox], outputs=[output, confidence, audio, history_display])

    def copy_translation(translated):
        return gr.update(value=translated)  # Gradio doesn't have built-in copy, suggest manual

    copy_btn.click(lambda x: x, inputs=[output], outputs=[output])  # Placeholder

    swap_btn.click(swap_languages, inputs=[src, tgt], outputs=[src, tgt])

demo.launch(share=True)


✅ Model and tokenizer loaded!




Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://d5666f334e1d31d5f7.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


