# MT Evaluation Pipeline (Modular Version)

This notebook uses a modular code structure for better organization and reusability.

## 1. Install Required Packages

In [1]:
# %pip install setuptools==69.0.0 sacrebleu nltk pandas torch torchmetrics==0.10.3 unbabel-comet

## 2. Setup

In [2]:
from utils import setup_environment
from models import initialize_all_models
from data_loader import load_datasets
from evaluator import MTEvaluator
from config import LANGUAGE_FILES, RESULTS_DIR

# Setup
setup_environment()

Using device: cuda
GPU: NVIDIA GeForce RTX 3060


In [3]:
# Initialize models
comet_model, comet_qe_model, comet_model_path, comet_qe_model_path = initialize_all_models()

NLTK data downloaded successfully!
Downloading COMET models (this may take a few minutes)...


[nltk_data] Downloading package wordnet to /home/nnik345/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to /home/nnik345/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /home/nnik345/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Encoder model frozen.


Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Encoder model frozen.


COMET models loaded successfully!


In [4]:
# Load datasets
datasets = load_datasets(LANGUAGE_FILES)
print(f"Loaded datasets: {list(datasets.keys())}")

Loaded datasets: ['Hindi', 'Marathi', 'Odia', 'Tamil']


## 3. Run Evaluation

In [5]:
# Create evaluator and run evaluation
evaluator = MTEvaluator(comet_model, comet_qe_model)
results_df = evaluator.evaluate_multiple_datasets(datasets)

Starting evaluation...

Processing Hindi...
  Found 1 MT system(s): ChatGPT 5.2
  Evaluating ChatGPT 5.2...
    ✓ BLEU: 9.09, METEOR: 0.3588, COMET: 0.7666, COMET-QE: -0.0760
  Completed Hindi

Processing Marathi...
  Found 2 MT system(s): BhashaVerse, ChatGPT 5.2
  Evaluating BhashaVerse...
    ✓ BLEU: 1.23, METEOR: 0.1033, COMET: 0.6157, COMET-QE: -0.1980
  Evaluating ChatGPT 5.2...


SystemExit: 1


KeyboardInterrupt



## 4. Display and Save Results

In [None]:
# Display results
evaluator.print_results()

In [8]:
# Save results
output_path = RESULTS_DIR / 'Literary.csv'
evaluator.save_results(output_path)


Results saved to: Results/Literary.csv
Total evaluations: 6


## 5. Clean Up

In [None]:
cleanup_all(
    comet_model=comet_model,
    comet_qe_model=comet_qe_model,
    comet_model_path=comet_model_path,
    comet_qe_model_path=comet_qe_model_path
)

print("\n✓ All resources cleaned up!")