In [None]:
import streamlit as st
import pandas as pd
from transformers import MarianMTModel, MarianTokenizer

# Load dataset
csv_file = "C://Users//LIKIANU//Downloads//large_multilingual_translation_dataset.csv"
df = pd.read_csv(csv_file)

# Available translation models
language_pairs = {
    "English to French": "Helsinki-NLP/opus-mt-en-fr",
    "English to Spanish": "Helsinki-NLP/opus-mt-en-es",
    "English to German": "Helsinki-NLP/opus-mt-en-de",
    "French to English": "Helsinki-NLP/opus-mt-fr-en",
    "Spanish to English": "Helsinki-NLP/opus-mt-es-en",
    "German to English": "Helsinki-NLP/opus-mt-de-en"
}

# Streamlit App Interface
st.title("Real-Time Language Translation (NMT)")
st.write("Bridging Language Barriers with Neural Machine Translation")

# User Input
selected_pair = st.selectbox("Select Language Pair", list(language_pairs.keys()))
user_text = st.text_area("Enter text to translate:")

if st.button("Translate") and user_text:
    source_lang, target_lang = selected_pair.split(" to ")
    
    # Check dataset for translation
    if source_lang in df.columns and target_lang in df.columns:
        match = df[df[source_lang].str.lower() == user_text.lower()]
        if not match.empty:
            translated_text = match[target_lang].values[0]
            st.subheader("Translated Text (From Dataset):")
            st.success(translated_text)
        else:
            st.write("Translation not found in dataset. Using NMT model...")
            use_model = True
    else:
        use_model = True

    # Use model if dataset lookup fails
    if 'use_model' in locals() and use_model:
        model_name = language_pairs[selected_pair]
        tokenizer = MarianTokenizer.from_pretrained(model_name)
        model = MarianMTModel.from_pretrained(model_name)
        
        # Tokenize and Translate
        inputs = tokenizer([user_text], return_tensors="pt", padding=True, truncation=True)
        translated = model.generate(**inputs)
        translated_text = tokenizer.batch_decode(translated, skip_special_tokens=True)[0]
        
        # Display Output
        st.subheader("Translated Text (Using Model):")
        st.success(translated_text)

# Footer
st.markdown("---")
st.write("Developed with ❤️ using Hugging Face Transformers, Streamlit & Dataset")

In [None]:


import torch
from transformers import MarianMTModel, MarianTokenizer
from nltk.translate.bleu_score import sentence_bleu
from nltk.translate.meteor_score import meteor_score
from sacrebleu.metrics import TER
import numpy as np

# Define languages and respective models
languages = {
    "French": "Helsinki-NLP/opus-mt-en-fr",
    "Spanish": "Helsinki-NLP/opus-mt-en-es",
    "German": "Helsinki-NLP/opus-mt-en-de",
    "Hindi": "Helsinki-NLP/opus-mt-en-hi",
    "Chinese": "Helsinki-NLP/opus-mt-en-zh"
}

# Input text to translate
text = ["Hello, how are you?"]

# Store results
translations = {}

# Translate to multiple languages
for lang, model_name in languages.items():
    tokenizer = MarianTokenizer.from_pretrained(model_name)
    model = MarianMTModel.from_pretrained(model_name)

    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
    translated = model.generate(**inputs)
    translated_text = tokenizer.batch_decode(translated, skip_special_tokens=True)
    translations[lang] = translated_text[0]

# Output translations
for lang, translated_text in translations.items():
    print(f"{lang}: {translated_text}")

# Example reference and candidate for evaluation
reference = [['this', 'is', 'a', 'test']]
candidate = ['this', 'is', 'test']

# BLEU Score
bleu_score = sentence_bleu(reference, candidate)
print("BLEU Score:", bleu_score)

# METEOR Score
meteor_score_value = meteor_score(reference, candidate)
print("METEOR Score:", meteor_score_value)

# TER Score Calculation
ter = TER()
reference_text = ["this is a test"]
candidate_text = "this is test"
ter_score = ter.corpus_score([candidate_text], [[ref] for ref in reference_text])
print("TER Score:", ter_score.score)

In [1]:
# Translation and Evaluation using MarianMT (Helsinki-NLP)
This notebook translates English text into multiple languages using the Helsinki-NLP MarianMT model and evaluates translations using BLEU, METEOR, and TER scores.
"""

# Import necessary libraries
import torch
from transformers import MarianMTModel, MarianTokenizer
from nltk.translate.bleu_score import sentence_bleu
from nltk.translate.meteor_score import meteor_score
from sacrebleu.metrics import TER
import numpy as np

# Define languages and respective models
languages = {
    "French": "Helsinki-NLP/opus-mt-en-fr",
    "Spanish": "Helsinki-NLP/opus-mt-en-es",
    "German": "Helsinki-NLP/opus-mt-en-de",
    "Hindi": "Helsinki-NLP/opus-mt-en-hi",
    "Chinese": "Helsinki-NLP/opus-mt-en-zh"
}

# Input text to translate
text = ["Hello, how are you?"]

# Store results
translations = {}

# Translate to multiple languages
for lang, model_name in languages.items():
    print(f"Loading model for {lang}...")
    tokenizer = MarianTokenizer.from_pretrained(model_name)
    model = MarianMTModel.from_pretrained(model_name)
    
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
    translated = model.generate(**inputs)
    translated_text = tokenizer.batch_decode(translated, skip_special_tokens=True)
    translations[lang] = translated_text[0]
    print(f"{lang}: {translated_text[0]}")

# Display translations
import pandas as pd

df = pd.DataFrame(list(translations.items()), columns=["Language", "Translation"])
display(df)

"""
# Translation Evaluation
Now, we evaluate the translations using BLEU, METEOR, and TER scores.
"""

# Example reference and candidate for evaluation
reference = [['this', 'is', 'a', 'test']]
candidate = ['this', 'is', 'test']

# BLEU Score
bleu_score = sentence_bleu(reference, candidate)
print("BLEU Score:", bleu_score)

# METEOR Score
meteor_score_value = meteor_score(reference, candidate)
print("METEOR Score:", meteor_score_value)

# TER Score Calculation
ter = TER()
reference_text = ["this is a test"]
candidate_text = "this is test"
ter_score = ter.corpus_score([candidate_text], [[ref] for ref in reference_text])
print("TER Score:", ter_score.score)

SyntaxError: unterminated triple-quoted string literal (detected at line 68) (2980368134.py, line 49)

SyntaxError: invalid syntax (17978542.py, line 1)