In [None]:
import gradio as gr
from transformers import T5ForConditionalGeneration, T5Tokenizer, MarianMTModel, MarianTokenizer
import torch
from gtts import gTTS
from PIL import Image
import os

try:
    from docx import Document
    DOCX_SUPPORT = True
except ImportError:
    DOCX_SUPPORT = False

# Path to logo image
logo_path = "logo.webp"

# Language configurations with model info
LANGUAGE_CODES = {
    "Romanian (T5)": {
        "path": "./t5-small-finetuned-en-ro",
        "prefix": "translate English to Romanian: ",
        "type": "t5",
        "lang_code": "ro"
    },
    "Romanian (MarianMT)": {
        "path": "./finetuned-en-ro",
        "prefix": "",
        "type": "marian",
        "lang_code": "ro"
    },"German (MarianMT)": {
        "path": "./simple-finetuned-en-de",
        "prefix": "",
        "type": "marian",
        "lang_code": "de"
    },"German (T5)": {
        "path": "./t5-small-finetuned-en-de",
        "prefix": "translate English to German: ",
        "type": "t5",
        "lang_code": "de"
    },
    "Spanish": {
        "path": "./Chris en-es",
        "prefix": "translate English to Spanish: ",
        "type": "marian",
        "lang_code": "es"
    }
}

# Load model/tokenizer
def load_model(config):
    if config["type"] == "t5":
        tokenizer = T5Tokenizer.from_pretrained(config["path"])
        model = T5ForConditionalGeneration.from_pretrained(config["path"])
    else:
        tokenizer = MarianTokenizer.from_pretrained(config["path"])
        model = MarianMTModel.from_pretrained(config["path"])
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    return model, tokenizer, device

# Translate text using selected model
def translate_text(text, selected_key):
    config = LANGUAGE_CODES[selected_key]
    model, tokenizer, device = load_model(config)
    input_text = config["prefix"] + text.strip()
    inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True).to(device)
    with torch.no_grad():
        outputs = model.generate(**inputs, max_length=128)
    translated = tokenizer.decode(outputs[0], skip_special_tokens=True)
    #explanation = "\n".join([f"{word.strip()}: [Translation or Definition]" for word in translated.split()])
    return translated #, explanation

# Generate audio using gTTS
def generate_audio(text, target_key):
    lang_code = LANGUAGE_CODES[target_key]["lang_code"]
    tts = gTTS(text=text, lang=lang_code)
    audio_path = "output_audio.mp3"
    tts.save(audio_path)
    return audio_path


In [None]:

# Gradio UI

def create_interface():
    with gr.Blocks(theme=gr.themes.Soft()) as demo:
        with gr.Row():
            with gr.Column():
                gr.Markdown("# Hi, I’m Tabby! ")
                gr.Markdown("""
                **Powered by my advanced capabilities, this app offers seamless translation of large text
                chunks and entire documents with remarkable accuracy. Whether it's complex phrases or technical jargon, I
                ensure every translation retains the original meaning. With an easy-to-use interface and cutting-edge AI,
                it’s perfect for both personal and professional use.**
                """)
                gr.Markdown("""
                **Currently, we proudly support translations in Spanish, German, and Romanian — but hey,
                with a little bit of funding (hint, hint), we’ll be able to expand our language offerings
                and cover even more regions of the world. Stay tuned!**
                """)
            logo = Image.open(logo_path)
            gr.Image(value=logo, show_label=False, container=False)

        with gr.Row(equal_height=True):
            with gr.Column():
                text_input = gr.Textbox(label="Enter text to translate", lines=8)
            with gr.Column():
                file_input = gr.File(label="Upload a document", type="filepath")

        with gr.Row():
            lang_dropdown = gr.Dropdown(list(LANGUAGE_CODES.keys()), label="Select Target Language")

        with gr.Row(visible=False) as output_row:
            translated_text = gr.Text(label="Translated Text", show_copy_button=True)
            audio_output = gr.Audio(label="Hear the translation", type="filepath")

        def on_submit(text, file, target_language):
            if file is not None:
                if file.name.endswith(".txt"):
                    try:
                        with open(file.name, "r", encoding="utf-8") as f:
                            text = f.read()
                    except UnicodeDecodeError:
                        with open(file.name, "r", encoding="latin-1") as f:
                            text = f.read()
                elif file.name.endswith(".docx"):
                    try:
                        doc = Document(file.name)
                        text = "\n".join([para.text for para in doc.paragraphs])
                    except Exception:
                        return {
                            output_row: gr.update(visible=True),
                            translated_text: "❌ Error reading the .docx file.",
                            audio_output: None,
                        }
                else:
                    return {
                        output_row: gr.update(visible=True),
                        translated_text: "❌ Only .txt and .docx files are supported.",
                        audio_output: None,
                    }

            translated = translate_text(text, target_language)
            audio_path = generate_audio(translated, target_language)
            return {
                output_row: gr.update(visible=True),
                translated_text: translated,
                audio_output: audio_path,
            }

        submit_btn = gr.Button("Translate")
        submit_btn.click(on_submit, inputs=[text_input, file_input, lang_dropdown],
                         outputs=[output_row, translated_text, audio_output])

    return demo

if __name__ == "__main__":
    demo = create_interface()
    demo.launch(debug=True)
