In [2]:
import os
os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"
import torch
from transformers import MarianMTModel, MarianTokenizer
from nltk.translate.bleu_score import sentence_bleu
from nltk.translate.meteor_score import meteor_score
from sacrebleu.metrics import TER
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score
import numpy as np

# Define language models for multiple translations
languages = {
    "French": "Helsinki-NLP/opus-mt-en-fr",
    "Spanish": "Helsinki-NLP/opus-mt-en-es",
    "German": "Helsinki-NLP/opus-mt-en-de",
    "Hindi": "Helsinki-NLP/opus-mt-en-hi",
    "Chinese": "Helsinki-NLP/opus-mt-en-zh"
}

# Input text to translate
text = ["Hello, how are you?"]

# Store translations
translations = {}

# Translate to multiple languages
for lang, model_name in languages.items():
    tokenizer = MarianTokenizer.from_pretrained(model_name)
    model = MarianMTModel.from_pretrained(model_name)

    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
    translated = model.generate(**inputs)
    translated_text = tokenizer.batch_decode(translated, skip_special_tokens=True)
    translations[lang] = translated_text[0]

# Output translations
for lang, translated_text in translations.items():
    print(f"{lang}: {translated_text}")

# Step 4: Hyperparameter Tuning (Simulated)
learning_rates = [5e-5, 3e-4, 1e-3]
batch_sizes = [16, 32, 64]

best_lr, best_bs, best_score = None, None, float("-inf")
for lr in learning_rates:
    for bs in batch_sizes:
        score = np.random.rand()  # Simulated score
        if score > best_score:
            best_score, best_lr, best_bs = score, lr, bs

print(f"Best Hyperparameters: Learning Rate={best_lr}, Batch Size={best_bs}, Score={best_score}")

# Step 5: Evaluate Model using BLEU Score
reference = [['this', 'is', 'a', 'test']]
candidate = ['this', 'is', 'test']
bleu_score = sentence_bleu(reference, candidate)
print("BLEU Score:", bleu_score)

# Step 6: Evaluate Model using METEOR Score
meteor_score_value = meteor_score(reference, candidate)
print("METEOR Score:", meteor_score_value)

# Step 7: Evaluate Model using Translation Edit Rate (TER)
ter = TER()
reference_text = ["this is a test"]
candidate_text = "this is test"
ter_score = ter.corpus_score([candidate_text], [[ref] for ref in reference_text])
print("TER Score:", ter_score.score)

# Step 8: Cross-Validation (Simulated Dataset)
dataset = np.random.rand(100, 768)
kf = KFold(n_splits=5, shuffle=True, random_state=42)

for train_index, test_index in kf.split(dataset):
    train_data, test_data = dataset[train_index], dataset[test_index]
    model.train()  # Simulated training
    predictions = np.random.randint(0, 2, size=len(test_data))

# Step 9: Cross-Validation Accuracy
accuracy = accuracy_score(np.random.randint(0, 2, size=len(test_data)), predictions)
print("Cross-Validation Accuracy:", accuracy)

French: Bonjour, comment allez-vous ?
Spanish: Hola, ¿cómo estás?
German: Hallo, wie geht's?
Hindi: हैलो, तुम कैसे हो?
Chinese: 你好,你好吗?
Best Hyperparameters: Learning Rate=5e-05, Batch Size=16, Score=0.9948056879006749
BLEU Score: 8.987727354491445e-155
METEOR Score: 0.6552706552706553
TER Score: 25.0
Cross-Validation Accuracy: 0.6


The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
