<a href="https://colab.research.google.com/github/KEERTHANAPUJAR/Text-Translation/blob/main/M2M100_FINAL_DEMO.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install gradio
!pip install evaluate
!pip install rouge_score
!pip install sacrebleu
!pip install requests
!pip install transformers

Collecting gradio
  Downloading gradio-4.44.1-py3-none-any.whl.metadata (15 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0 (from gradio)
  Downloading fastapi-0.115.0-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.3.0 (from gradio)
  Downloading gradio_client-1.3.0-py3-none-any.whl.metadata (7.1 kB)
Collecting httpx>=0.24.1 (from gradio)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting orjson~=3.0 (from gradio)
  Downloading orjson-3.10.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.4/50.4 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.9 (from g

In [None]:
import gradio as gr
import evaluate
import requests
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer

# Load BLEU and ROUGE evaluators
bleu = evaluate.load("sacrebleu")
rouge = evaluate.load("rouge")

# Load the model and tokenizer
model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")

# Define available language options
language_options = {
    'en': 'English',
    'fr': 'French',
    'hi': 'Hindi',
    'es': 'Spanish',
    'de': 'German',
    'zh': 'Chinese',
}

# Placeholder for Gemini API endpoint
GEMINI_API_URL = "https://gemini-api-url.com/process"
API_KEY = "AIzaSyB7mY0qR01gbpz7WIIMPdM7kYXS9sy0ERM"

def process_with_gemini(source_text):
    headers = {
        "Authorization": f"Bearer {API_KEY}",
        "Content-Type": "application/json",
    }

    data = {
        "text": source_text
    }

    try:
        response = requests.post(GEMINI_API_URL, json=data, headers=headers)
        response.raise_for_status()
        processed_text = response.json().get('processed_text', source_text)
        return processed_text
    except Exception as e:
        print(f"Error processing with Gemini: {e}")
        return source_text

# Modify translate_text to include beam search
def translate_text(source_text, source_lang, target_lang, num_beams=5):
    tokenizer.src_lang = source_lang
    encoded_text = tokenizer(source_text, return_tensors="pt")

    # Beam search with the specified number of beams
    generated_tokens = model.generate(
        **encoded_text,
        forced_bos_token_id=tokenizer.get_lang_id(target_lang),
        num_beams=num_beams,  # Beam search added here
        early_stopping=True  # Stops once the most likely beam has been completed
    )

    translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
    return translated_text

def evaluate_translation(reference, hypothesis):
    bleu_score = bleu.compute(predictions=[hypothesis], references=[[reference]])
    rouge_score = rouge.compute(predictions=[hypothesis], references=[[reference]])
    return bleu_score, rouge_score

# Updated translation_and_evaluation to include hardcoded reference_translation
def translation_and_evaluation(source_text, source_lang, target_lang, num_beams=5):
    processed_text = process_with_gemini(source_text)
    translated_text = translate_text(processed_text, source_lang, target_lang, num_beams)

    # Hardcoded reference translation (example: French)
    reference_translation = "Bonjour, comment es-tu ?"

    print("Source Text:", source_text)
    print("Generated Translation:", translated_text)
    print("Reference Translation:", reference_translation)

    bleu_score, rouge_score = evaluate_translation(reference_translation, translated_text)

    return translated_text, bleu_score['score'], rouge_score['rouge1']

# Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("# Multi-Language Translation with Beam Search, Gemini API, and Evaluation")

    with gr.Row():
        source_text_input = gr.Textbox(label="Enter text to translate:")
        source_lang_input = gr.Dropdown(choices=list(language_options.keys()), label="Source Language")
        target_lang_input = gr.Dropdown(choices=list(language_options.keys()), label="Target Language")
        num_beams_input = gr.Slider(minimum=1, maximum=10, step=1, value=5, label="Number of Beams for Beam Search")

    translate_button = gr.Button("Translate and Evaluate")
    translated_output = gr.Textbox(label="Translated Text", interactive=False)
    bleu_output = gr.Textbox(label="BLEU Score", interactive=False)
    rouge_output = gr.Textbox(label="ROUGE Score", interactive=False)

    translate_button.click(
        fn=translation_and_evaluation,
        inputs=[source_text_input, source_lang_input, target_lang_input, num_beams_input],
        outputs=[translated_output, bleu_output, rouge_output]
    )

# Launch the interface
demo.launch()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Downloading builder script:   0%|          | 0.00/8.15k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/908 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.94G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/233 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/298 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/3.71M [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/1.14k [00:00<?, ?B/s]



Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://ec19e966889af71d50.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


