In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import pandas as pd
from datasets import Dataset
import evaluate
from tqdm import tqdm
import numpy as np
from sklearn.model_selection import train_test_split

In [2]:
model_path = "./english-tamil-final-model"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSeq2SeqLM.from_pretrained(model_path)
tokenizer.target_language = "ta"



In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
model.eval()

MarianMTModel(
  (model): MarianModel(
    (shared): Embedding(64110, 512, padding_idx=64109)
    (encoder): MarianEncoder(
      (embed_tokens): Embedding(64110, 512, padding_idx=64109)
      (embed_positions): MarianSinusoidalPositionalEmbedding(512, 512)
      (layers): ModuleList(
        (0-5): 6 x MarianEncoderLayer(
          (self_attn): MarianAttention(
            (k_proj): Linear(in_features=512, out_features=512, bias=True)
            (v_proj): Linear(in_features=512, out_features=512, bias=True)
            (q_proj): Linear(in_features=512, out_features=512, bias=True)
            (out_proj): Linear(in_features=512, out_features=512, bias=True)
          )
          (self_attn_layer_norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
          (activation_fn): SiLU()
          (fc1): Linear(in_features=512, out_features=2048, bias=True)
          (fc2): Linear(in_features=2048, out_features=512, bias=True)
          (final_layer_norm): LayerNorm((512,), eps=1e-05

In [None]:
df = pd.read_csv('test_dt.csv')

In [27]:
test_df = df.sample(n=1000, random_state=42)
print(f"Test set size: {len(test_df)} rows")


test_dataset = Dataset.from_pandas(test_df)

Test set size: 1000 rows


In [28]:
test_dataset

Dataset({
    features: ['english', 'tamil', '__index_level_0__'],
    num_rows: 1000
})

In [10]:
bleu_metric = evaluate.load("sacrebleu")
chrf_metric = evaluate.load("chrf")

In [30]:
all_predictions = []
all_references = []

In [31]:
batch_size = 16

In [32]:

for i in tqdm(range(0, len(test_dataset), batch_size)):
    batch = test_dataset[i:i + batch_size]
    
    sources = batch['english']
    references = batch['tamil']
    

    inputs = tokenizer(
        sources, 
        return_tensors="pt", 
        max_length=128, 
        truncation=True, 
        padding=True
    )
    inputs = {k: v.to(device) for k, v in inputs.items()}
    
    
    with torch.no_grad():
        outputs = model.generate(
            inputs["input_ids"],
            max_length=128,
            num_beams=4,
            length_penalty=0.6,
            early_stopping=True
        )
    

    predictions = tokenizer.batch_decode(outputs, skip_special_tokens=True)
    
    all_predictions.extend(predictions)
    all_references.extend([[ref] for ref in references])

100%|██████████| 63/63 [03:04<00:00,  2.93s/it]


In [33]:
bleu_score = bleu_metric.compute(predictions=all_predictions, references=all_references)
chrf_score = chrf_metric.compute(predictions=all_predictions, references=all_references)

In [34]:
exact_matches = sum(1 for pred, ref in zip(all_predictions, all_references) 
                   if pred.strip() == ref[0].strip())
exact_match_score = exact_matches / len(all_predictions) * 100

In [41]:



sample_results = pd.DataFrame({
    'Source': test_dataset['english'][:],  
    'Reference': [ref[0] for ref in all_references[:]],
    'Prediction': all_predictions[:]
})

In [42]:
print("\nEvaluation Results:")
print("-" * 50)
print(f"BLEU Score: {bleu_score['score']:.2f}")
print(f"chrF Score: {chrf_score['score']:.2f}")
print(f"Exact Match Percentage: {exact_match_score:.2f}%")
print(f"Number of test samples: {len(test_dataset)}")





Evaluation Results:
--------------------------------------------------
BLEU Score: 39.17
chrF Score: 66.97
Exact Match Percentage: 6.40%
Number of test samples: 1000


In [43]:
results_df = pd.DataFrame({
    'Source': test_dataset['english'],
    'Reference': [ref[0] for ref in all_references],
    'Prediction': all_predictions,
})


In [44]:
results_df.to_csv('translation_results_1000.csv', index=False)