In [1]:
# Step 1: Install the necessary libraries
# Use -q to make the installation output less verbose
!pip install transformers -q
!pip install sacrebleu -q
!pip install sentencepiece -q # Required for many Helsinki-NLP models

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/51.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.8/51.8 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/104.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m104.1/104.1 kB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
[?25h

In [3]:
# Step 2: Import libraries and set up the translation pipeline
from transformers import pipeline
import sacrebleu

# Load a pre-trained translation model (English to Hindi)
# We now use the 'opus-mt-en-hi' model for this task.
translator = pipeline("translation_en_to_hi", model="Helsinki-NLP/opus-mt-en-hi")

# Step 3: Define your source sentences and reference translations
# These are the sentences you want to translate.
source_sentences = [
    "The cat is sitting on the mat.",
    "I love to learn about machine learning.",
    "The weather today is beautiful and sunny."
]

# These are the "gold standard" human translations in Hindi.
# Note: It's a list of lists because sacrebleu can handle multiple correct references.
reference_translations = [[
    "बिल्ली चटाई पर बैठी है।",
    "मुझे मशीन लर्निंग के बारे में सीखना पसंद है।",
    "आज का मौसम खूबसूरत और धूप वाला है।"
]]

# Step 4: Generate the machine translations (hypotheses)
machine_translations_output = translator(source_sentences)
machine_translations = [t['translation_text'] for t in machine_translations_output]

# Step 5: Assess the translation accuracy using BLEU score
# The 'corpus_bleu' function compares the machine translations with the references.
bleu = sacrebleu.corpus_bleu(machine_translations, reference_translations)

# Step 6: Print the results
print("--- Translation Results (English to Hindi) ---")
for i in range(len(source_sentences)):
    print(f"Source:      {source_sentences[i]}")
    print(f"Reference:   {reference_translations[0][i]}")
    print(f"Machine:     {machine_translations[i]}\n")

print("--- Assessment ---")
print(f"BLEU Score: {bleu.score:.2f}")

config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/306M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/306M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/44.0 [00:00<?, ?B/s]

source.spm:   0%|          | 0.00/812k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/1.07M [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

Device set to use cuda:0


--- Translation Results (English to Hindi) ---
Source:      The cat is sitting on the mat.
Reference:   बिल्ली चटाई पर बैठी है।
Machine:     बिल्ली बिस्तर पर बैठी है.

Source:      I love to learn about machine learning.
Reference:   मुझे मशीन लर्निंग के बारे में सीखना पसंद है।
Machine:     मुझे मशीन सीखने के बारे में सीखना अच्छा लगता है.

Source:      The weather today is beautiful and sunny.
Reference:   आज का मौसम खूबसूरत और धूप वाला है।
Machine:     आज मौसम सुंदर और धूप है.

--- Assessment ---
BLEU Score: 18.40


In [4]:
# Step 2: Import the required components
from transformers import pipeline
import sacrebleu

print("✅ Libraries installed and imported.")

# --- Model and Data Setup ---
# Here we define our model, source text, and reference translations.

# Step 3: Load a more powerful, state-of-the-art translation model
# We are using Meta AI's NLLB (No Language Left Behind) model.
# It's larger and more accurate than the Helsinki-NLP model.
# Note: The first time you run this, it will download the model (approx. 2.4 GB).
print("⬇️ Loading the NLLB translation model...")
translator = pipeline(
    'translation',
    model='facebook/nllb-200-distilled-600M',
    src_lang='eng_Latn',  # Source language: English (Latin script)
    tgt_lang='hin_Deva'   # Target language: Hindi (Devanagari script)
)
print("✅ Model loaded successfully.")

# Step 4: Define source sentences and high-quality reference translations
source_sentences = [
    "The cat is sitting on the mat.",
    "I love to learn about machine learning.",
    "The weather today is beautiful and sunny."
]

# The "gold standard" human translations to compare against.
reference_translations = [[
    "बिल्ली चटाई पर बैठी है।",
    "मुझे मशीन लर्निंग के बारे में सीखना पसंद है।",
    "आज का मौसम खूबसूरत और धूप वाला है।"
]]

# --- Translation and Evaluation ---
# Now we generate the translations and calculate the accuracy.

# Step 5: Generate translations using an optimized decoding strategy
# 'num_beams=5' makes the model explore more possible translations, often leading to better quality.
print("\n⏳ Translating sentences with beam search...")
machine_translations_output = translator(
    source_sentences,
    num_beams=5
)

# Extract just the translated text from the model's output
machine_translations = [t['translation_text'] for t in machine_translations_output]
print("✅ Translation complete.")


# Step 6: Assess the translation accuracy using the BLEU score
# This compares the model's output against the human references.
bleu = sacrebleu.corpus_bleu(machine_translations, reference_translations)


# Step 7: Print the final results
print("\n--- 📊 Translation Results and Accuracy ---")
for i in range(len(source_sentences)):
    print(f"\nSource:      {source_sentences[i]}")
    print(f"Reference:   {reference_translations[0][i]}")
    print(f"Machine:     {machine_translations[i]}")

print("\n--------------------------------------------")
# The .score attribute gives the final BLEU score.
print(f"🏆 Final BLEU Score: {bleu.score:.2f}")
print("--------------------------------------------")

✅ Libraries installed and imported.
⬇️ Loading the NLLB translation model...


config.json:   0%|          | 0.00/846 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/2.46G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.46G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/189 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/564 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/4.85M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.3M [00:00<?, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

Device set to use cuda:0


✅ Model loaded successfully.

⏳ Translating sentences with beam search...
✅ Translation complete.

--- 📊 Translation Results and Accuracy ---

Source:      The cat is sitting on the mat.
Reference:   बिल्ली चटाई पर बैठी है।
Machine:     बिल्ली गद्दे पर बैठी है।

Source:      I love to learn about machine learning.
Reference:   मुझे मशीन लर्निंग के बारे में सीखना पसंद है।
Machine:     मुझे मशीन लर्निंग के बारे में सीखना बहुत पसंद है।

Source:      The weather today is beautiful and sunny.
Reference:   आज का मौसम खूबसूरत और धूप वाला है।
Machine:     आज का मौसम सुंदर और धूप वाला है।

--------------------------------------------
🏆 Final BLEU Score: 58.25
--------------------------------------------
