In [3]:
import pandas as pd

# Load the CSV file
df = pd.read_csv('/content/drive/MyDrive/Code/separated_sentences.csv')

# Display the first few rows to check the data
print(df.head())

# Check for missing values
print(df.isnull().sum())

# Drop rows with missing values if any
df.dropna(inplace=True)

# Ensure columns are properly named for clarity
df.columns = ['English', 'Bangla']

# Optional: Check for any duplicates and remove them
df.drop_duplicates(inplace=True)

# Reset index after dropping rows
df.reset_index(drop=True, inplace=True)

print(df.head())


                      English                  Bangla
0       I want to drink water         আমি জল খেতে চাই
1        Sudip is a good boy.   সুদীপ একজন ভালো ছেলে।
2    Think I'll go to school.  ভাবছি আজ স্কুল এ যাবো।
3             My hair is torn    আমার চুল ছেঁড়া গেলো
4  Tumi football khelte paro?  তুমি ফুটবল খেলতে পারো?
English    20
Bangla     20
dtype: int64
                      English                  Bangla
0       I want to drink water         আমি জল খেতে চাই
1        Sudip is a good boy.   সুদীপ একজন ভালো ছেলে।
2    Think I'll go to school.  ভাবছি আজ স্কুল এ যাবো।
3             My hair is torn    আমার চুল ছেঁড়া গেলো
4  Tumi football khelte paro?  তুমি ফুটবল খেলতে পারো?


In [4]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [16]:
from transformers import pipeline

# Load a pre-trained translation pipeline
translation_pipeline = pipeline("translation_bn_to_en", model="Helsinki-NLP/opus-mt-bn-en")

# Example: Translate a single sentence
translation = translation_pipeline("আপনি কেমন আছেন?")[0]['translation_text']
print("Translated sentence:", translation)

translation = translation_pipeline("আজ আকাশে মেঘ করেছে।")[0]['translation_text']
print("Translated sentence:", translation)

translation = translation_pipeline("আমি প্রতিদিন সকালে ঘুম থেকে উঠি।")[0]['translation_text']
print("Translated sentence:", translation)

translation = translation_pipeline("তোমার নাম কি?")[0]['translation_text']
print("Translated sentence:", translation)


Translated sentence: How are you?
Translated sentence: It's clouds in the sky today.
Translated sentence: I wake up every morning.
Translated sentence: What's your name?


In [1]:
!pip install sacrebleu
!pip install datasets
!pip install bert_score
!pip install transformers
!pip install rouge_score
!pip install sentence-separator
!pip install huggingface_hub

Collecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge_score
  Building wheel for rouge_score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge_score: filename=rouge_score-0.1.2-py3-none-any.whl size=24935 sha256=ee25f7f17c97545cf418a86a9280270976b767c7e86b59fc45ad26bc9f96b146
  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4
Successfully built rouge_score
Installing collected packages: rouge_score
Successfully installed rouge_score-0.1.2
[31mERROR: Could not find a version that satisfies the requirement sentence-separator (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for sentence-separator[0m[31m


In [None]:
!huggingface-cli login



    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    A token is already saved on your machine. Run `huggingface-cli whoami` to get more information or `huggingface-cli logout` if you want to log out.
    Setting a new token will erase the existing one.
    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) n
Traceback (most recent call last):

In [7]:
import pandas as pd
import sacrebleu
from datasets import load_metric
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
import bert_score
import torch
from tqdm import tqdm
import warnings

# Suppress warnings
warnings.filterwarnings("ignore")

# Check if a GPU is available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Load your dataset
df = pd.read_csv(r"/content/drive/MyDrive/Code/separated_sentences.csv")

# Ensure 'English' and 'Bangla' columns are strings
df['English'] = df['English'].astype(str)
df['Bangla'] = df['Bangla'].astype(str)

# Initialize evaluation metrics
rouge = load_metric("rouge")
meteor = load_metric("meteor")
bert_scorer = bert_score.BERTScorer(
    lang="bn", rescale_with_baseline=False, device=device
)

# List of translation models to evaluate
models_to_evaluate = [
    {"name": "facebook/mbart-large-50-many-to-many-mmt", "src_lang": "en_XX", "tgt_lang": "bn_IN"},
    {"name": "Helsinki-NLP/opus-mt-en-bn", "src_lang": None, "tgt_lang": None},
    {"name": "facebook/nllb-200-distilled-600M", "src_lang": "eng_Latn", "tgt_lang": "ben_Beng"},
    {"name": "google/mt5-small", "src_lang": None, "tgt_lang": None},
    {"name": "ai4bharat/indictrans-v2-en-bn", "src_lang": None, "tgt_lang": None},
    {"name": "ai4bharat/indic-bert", "src_lang": None, "tgt_lang": None}

    # Add more models as needed
]

# Initialize a list to store the results
results = []

# Function for batch processing with real-time scoring
def batch_translate_and_score(translator, texts, references, batch_size=16):
    translations = []
    total_bleu = 0
    total_rouge_l = 0
    total_meteor = 0
    total_bert_score_f1 = 0

    for i in tqdm(range(0, len(texts), batch_size)):
        batch_texts = texts[i:i + batch_size]
        batch_refs = references[i:i + batch_size]
        batch_texts = [str(text) for text in batch_texts]
        translated_batch = translator(batch_texts)
        batch_translations = [item['translation_text'] for item in translated_batch]
        translations.extend(batch_translations)

        # Calculate scores for this batch
        bleu = sacrebleu.corpus_bleu(batch_translations, [batch_refs]).score
        rouge_score = rouge.compute(predictions=batch_translations, references=batch_refs)
        meteor_score = meteor.compute(predictions=batch_translations, references=batch_refs)
        bert_score_res = bert_scorer.score(batch_translations, batch_refs)

        total_bleu += bleu
        total_rouge_l += rouge_score['rougeL'].mid.fmeasure
        total_meteor += meteor_score['meteor']
        total_bert_score_f1 += bert_score_res[2].mean().item()

        # Print scores for this batch
        print(f"\nBatch {i//batch_size + 1} Scores:")
        print(f"BLEU: {bleu:.2f}")
        print(f"ROUGE-L: {rouge_score['rougeL'].mid.fmeasure:.2f}")
        print(f"METEOR: {meteor_score['meteor']:.2f}")
        print(f"BERTScore F1: {bert_score_res[2].mean().item():.2f}")

    # Calculate average scores
    num_batches = len(texts) // batch_size + (1 if len(texts) % batch_size != 0 else 0)
    avg_bleu = total_bleu / num_batches
    avg_rouge_l = total_rouge_l / num_batches
    avg_meteor = total_meteor / num_batches
    avg_bert_score_f1 = total_bert_score_f1 / num_batches

    return translations, avg_bleu, avg_rouge_l, avg_meteor, avg_bert_score_f1

# Iterate over the models
for model_info in models_to_evaluate:
    model_name = model_info['name']
    src_lang = model_info['src_lang']
    tgt_lang = model_info['tgt_lang']

    print(f"\nProcessing model: {model_name}")

    try:
        # Initialize the translation pipeline with GPU support
        if src_lang and tgt_lang:
            translator = pipeline("translation", model=model_name,
                                  src_lang=src_lang, tgt_lang=tgt_lang,
                                  device=0 if torch.cuda.is_available() else -1)
        else:
            translator = pipeline("translation", model=model_name,
                                  device=0 if torch.cuda.is_available() else -1)

        # Translate English sentences to Bengali in batches and get scores
        translations, avg_bleu, avg_rouge_l, avg_meteor, avg_bert_score_f1 = batch_translate_and_score(
            translator, df['English'].tolist(), df['Bangla'].tolist()
        )

        # Store the results
        results.append({
            "Model": model_name,
            "BLEU": avg_bleu,
            "ROUGE_L": avg_rouge_l,
            "METEOR": avg_meteor,
            "BERTScore_F1": avg_bert_score_f1
        })

        print(f"\nFinal Scores for {model_name}:")
        print(f"Average BLEU: {avg_bleu:.2f}")
        print(f"Average ROUGE-L: {avg_rouge_l:.2f}")
        print(f"Average METEOR: {avg_meteor:.2f}")
        print(f"Average BERTScore F1: {avg_bert_score_f1:.2f}")

    except Exception as e:
        print(f"Error processing model {model_name}: {str(e)}")
        results.append({
            "Model": model_name,
            "Error": str(e)
        })

# Convert the results to a DataFrame
results_df = pd.DataFrame(results)

# Save the results to a CSV file
results_df.to_csv("translation_evaluation_results.csv", index=False)

print("\nEvaluation complete! Results saved to translation_evaluation_results.csv")

The repository for rouge contains custom code which must be executed to correctly load the dataset. You can inspect the repository content at https://hf.co/datasets/rouge.
You can avoid this prompt in future by passing the argument `trust_remote_code=True`.

Do you wish to run the custom code? [y/N] y


Downloading builder script:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

The repository for meteor contains custom code which must be executed to correctly load the dataset. You can inspect the repository content at https://hf.co/datasets/meteor.
You can avoid this prompt in future by passing the argument `trust_remote_code=True`.

Do you wish to run the custom code? [y/N] y


[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...


tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/625 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/996k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.96M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/714M [00:00<?, ?B/s]


Processing model: facebook/mbart-large-50-many-to-many-mmt


config.json:   0%|          | 0.00/1.43k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.44G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/261 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/529 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/649 [00:00<?, ?B/s]

  6%|▌         | 1/18 [02:06<35:57, 126.94s/it]


Batch 1 Scores:
BLEU: 6.46
ROUGE-L: 0.00
METEOR: 0.13
BERTScore F1: 0.78


 11%|█         | 2/18 [04:00<31:42, 118.90s/it]


Batch 2 Scores:
BLEU: 6.22
ROUGE-L: 0.00
METEOR: 0.44
BERTScore F1: 0.83


 17%|█▋        | 3/18 [05:37<27:18, 109.22s/it]


Batch 3 Scores:
BLEU: 6.26
ROUGE-L: 0.00
METEOR: 0.35
BERTScore F1: 0.81


 22%|██▏       | 4/18 [06:58<22:49, 97.81s/it] 


Batch 4 Scores:
BLEU: 10.34
ROUGE-L: 0.00
METEOR: 0.26
BERTScore F1: 0.83


 28%|██▊       | 5/18 [08:31<20:50, 96.22s/it]


Batch 5 Scores:
BLEU: 5.27
ROUGE-L: 0.00
METEOR: 0.30
BERTScore F1: 0.82


 33%|███▎      | 6/18 [09:46<17:47, 88.97s/it]


Batch 6 Scores:
BLEU: 5.40
ROUGE-L: 0.00
METEOR: 0.23
BERTScore F1: 0.82


 39%|███▉      | 7/18 [15:13<30:33, 166.65s/it]


Batch 7 Scores:
BLEU: 0.94
ROUGE-L: 0.00
METEOR: 0.12
BERTScore F1: 0.72


 44%|████▍     | 8/18 [17:54<27:30, 165.08s/it]


Batch 8 Scores:
BLEU: 0.76
ROUGE-L: 0.00
METEOR: 0.08
BERTScore F1: 0.73


 50%|█████     | 9/18 [20:07<23:15, 155.07s/it]


Batch 9 Scores:
BLEU: 2.35
ROUGE-L: 0.00
METEOR: 0.16
BERTScore F1: 0.77


 56%|█████▌    | 10/18 [22:35<20:22, 152.75s/it]


Batch 10 Scores:
BLEU: 0.80
ROUGE-L: 0.00
METEOR: 0.08
BERTScore F1: 0.73


 61%|██████    | 11/18 [25:36<18:48, 161.27s/it]


Batch 11 Scores:
BLEU: 0.82
ROUGE-L: 0.00
METEOR: 0.07
BERTScore F1: 0.74


 67%|██████▋   | 12/18 [29:09<17:43, 177.23s/it]


Batch 12 Scores:
BLEU: 1.94
ROUGE-L: 0.00
METEOR: 0.19
BERTScore F1: 0.74


 72%|███████▏  | 13/18 [32:28<15:19, 183.85s/it]


Batch 13 Scores:
BLEU: 3.18
ROUGE-L: 0.00
METEOR: 0.20
BERTScore F1: 0.73


 78%|███████▊  | 14/18 [36:00<12:49, 192.29s/it]


Batch 14 Scores:
BLEU: 3.86
ROUGE-L: 0.00
METEOR: 0.25
BERTScore F1: 0.73


 83%|████████▎ | 15/18 [38:03<08:33, 171.29s/it]


Batch 15 Scores:
BLEU: 2.62
ROUGE-L: 0.00
METEOR: 0.24
BERTScore F1: 0.79


 89%|████████▉ | 16/18 [39:49<05:03, 151.71s/it]


Batch 16 Scores:
BLEU: 9.13
ROUGE-L: 0.00
METEOR: 0.38
BERTScore F1: 0.84


 94%|█████████▍| 17/18 [41:38<02:18, 138.95s/it]


Batch 17 Scores:
BLEU: 2.61
ROUGE-L: 0.00
METEOR: 0.21
BERTScore F1: 0.78


100%|██████████| 18/18 [42:11<00:00, 140.66s/it]


Batch 18 Scores:
BLEU: 10.10
ROUGE-L: 0.00
METEOR: 0.42
BERTScore F1: 0.89

Final Scores for facebook/mbart-large-50-many-to-many-mmt:
Average BLEU: 4.39
Average ROUGE-L: 0.00
Average METEOR: 0.23
Average BERTScore F1: 0.78

Processing model: Helsinki-NLP/opus-mt-en-bn
Error processing model Helsinki-NLP/opus-mt-en-bn: Helsinki-NLP/opus-mt-en-bn is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'
If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=<your_token>`

Processing model: facebook/nllb-200-distilled-600M





config.json:   0%|          | 0.00/846 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/2.46G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/189 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/564 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/4.85M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.3M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/3.55k [00:00<?, ?B/s]

  6%|▌         | 1/18 [00:50<14:17, 50.42s/it]


Batch 1 Scores:
BLEU: 39.49
ROUGE-L: 0.00
METEOR: 0.33
BERTScore F1: 0.87


 11%|█         | 2/18 [01:21<10:27, 39.22s/it]


Batch 2 Scores:
BLEU: 50.98
ROUGE-L: 0.00
METEOR: 0.68
BERTScore F1: 0.93


 17%|█▋        | 3/18 [01:53<08:55, 35.73s/it]


Batch 3 Scores:
BLEU: 40.10
ROUGE-L: 0.00
METEOR: 0.69
BERTScore F1: 0.95


 22%|██▏       | 4/18 [02:22<07:43, 33.12s/it]


Batch 4 Scores:
BLEU: 34.43
ROUGE-L: 0.00
METEOR: 0.58
BERTScore F1: 0.92


 28%|██▊       | 5/18 [02:49<06:44, 31.08s/it]


Batch 5 Scores:
BLEU: 45.56
ROUGE-L: 0.00
METEOR: 0.60
BERTScore F1: 0.93


 33%|███▎      | 6/18 [03:17<06:00, 30.02s/it]


Batch 6 Scores:
BLEU: 36.98
ROUGE-L: 0.00
METEOR: 0.41
BERTScore F1: 0.90


 39%|███▉      | 7/18 [04:45<08:58, 48.98s/it]


Batch 7 Scores:
BLEU: 19.84
ROUGE-L: 0.00
METEOR: 0.42
BERTScore F1: 0.87


 44%|████▍     | 8/18 [05:25<07:39, 45.92s/it]


Batch 8 Scores:
BLEU: 22.44
ROUGE-L: 0.00
METEOR: 0.35
BERTScore F1: 0.86


 50%|█████     | 9/18 [07:01<09:15, 61.73s/it]


Batch 9 Scores:
BLEU: 7.76
ROUGE-L: 0.00
METEOR: 0.47
BERTScore F1: 0.87


 56%|█████▌    | 10/18 [09:01<10:37, 79.73s/it]


Batch 10 Scores:
BLEU: 5.37
ROUGE-L: 0.00
METEOR: 0.30
BERTScore F1: 0.84


 61%|██████    | 11/18 [09:51<08:14, 70.64s/it]


Batch 11 Scores:
BLEU: 4.99
ROUGE-L: 0.00
METEOR: 0.25
BERTScore F1: 0.82


 67%|██████▋   | 12/18 [10:41<06:26, 64.40s/it]


Batch 12 Scores:
BLEU: 22.02
ROUGE-L: 0.00
METEOR: 0.44
BERTScore F1: 0.88


 72%|███████▏  | 13/18 [11:29<04:56, 59.29s/it]


Batch 13 Scores:
BLEU: 29.45
ROUGE-L: 0.00
METEOR: 0.46
BERTScore F1: 0.88


 78%|███████▊  | 14/18 [12:18<03:45, 56.27s/it]


Batch 14 Scores:
BLEU: 23.51
ROUGE-L: 0.00
METEOR: 0.45
BERTScore F1: 0.87


 83%|████████▎ | 15/18 [12:53<02:29, 49.82s/it]


Batch 15 Scores:
BLEU: 22.79
ROUGE-L: 0.00
METEOR: 0.34
BERTScore F1: 0.87


 89%|████████▉ | 16/18 [13:24<01:28, 44.06s/it]


Batch 16 Scores:
BLEU: 20.87
ROUGE-L: 0.00
METEOR: 0.49
BERTScore F1: 0.89


 94%|█████████▍| 17/18 [13:55<00:40, 40.29s/it]


Batch 17 Scores:
BLEU: 28.87
ROUGE-L: 0.00
METEOR: 0.51
BERTScore F1: 0.92


100%|██████████| 18/18 [14:08<00:00, 47.15s/it]


Batch 18 Scores:
BLEU: 41.18
ROUGE-L: 0.00
METEOR: 0.65
BERTScore F1: 0.96

Final Scores for facebook/nllb-200-distilled-600M:
Average BLEU: 27.59
Average ROUGE-L: 0.00
Average METEOR: 0.47
Average BERTScore F1: 0.89

Processing model: google/mt5-small





config.json:   0%|          | 0.00/553 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.20G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/82.0 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/4.31M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
  0%|          | 0/18 [00:00<?, ?it/s]Your input_length: 57 is bigger than 0.9 * max_length: 20. You might consider increasing your max_length manually, e.g. translator('...', max_length=400)
  6%|▌         | 1/18 [00:08<02:16,  8.02s/it]


Batch 1 Scores:
BLEU: 0.00
ROUGE-L: 0.00
METEOR: 0.00
BERTScore F1: 0.55


 11%|█         | 2/18 [00:12<01:31,  5.74s/it]


Batch 2 Scores:
BLEU: 0.00
ROUGE-L: 0.00
METEOR: 0.00
BERTScore F1: 0.54


 17%|█▋        | 3/18 [00:17<01:20,  5.36s/it]


Batch 3 Scores:
BLEU: 0.46
ROUGE-L: 0.00
METEOR: 0.01
BERTScore F1: 0.55


 22%|██▏       | 4/18 [00:21<01:11,  5.13s/it]


Batch 4 Scores:
BLEU: 0.00
ROUGE-L: 0.00
METEOR: 0.00
BERTScore F1: 0.57


 28%|██▊       | 5/18 [00:26<01:02,  4.79s/it]


Batch 5 Scores:
BLEU: 0.00
ROUGE-L: 0.00
METEOR: 0.00
BERTScore F1: 0.57


 33%|███▎      | 6/18 [00:29<00:54,  4.51s/it]


Batch 6 Scores:
BLEU: 0.00
ROUGE-L: 0.00
METEOR: 0.00
BERTScore F1: 0.57


Your input_length: 30 is bigger than 0.9 * max_length: 20. You might consider increasing your max_length manually, e.g. translator('...', max_length=400)
Your input_length: 26 is bigger than 0.9 * max_length: 20. You might consider increasing your max_length manually, e.g. translator('...', max_length=400)
Your input_length: 28 is bigger than 0.9 * max_length: 20. You might consider increasing your max_length manually, e.g. translator('...', max_length=400)
Your input_length: 31 is bigger than 0.9 * max_length: 20. You might consider increasing your max_length manually, e.g. translator('...', max_length=400)
Your input_length: 24 is bigger than 0.9 * max_length: 20. You might consider increasing your max_length manually, e.g. translator('...', max_length=400)
Your input_length: 31 is bigger than 0.9 * max_length: 20. You might consider increasing your max_length manually, e.g. translator('...', max_length=400)
 39%|███▉      | 7/18 [00:37<01:01,  5.58s/it]


Batch 7 Scores:
BLEU: 0.26
ROUGE-L: 0.00
METEOR: 0.01
BERTScore F1: 0.55


 44%|████▍     | 8/18 [00:42<00:51,  5.15s/it]


Batch 8 Scores:
BLEU: 0.00
ROUGE-L: 0.00
METEOR: 0.00
BERTScore F1: 0.57


 50%|█████     | 9/18 [00:46<00:43,  4.79s/it]


Batch 9 Scores:
BLEU: 0.00
ROUGE-L: 0.00
METEOR: 0.00
BERTScore F1: 0.56


 56%|█████▌    | 10/18 [00:51<00:41,  5.13s/it]


Batch 10 Scores:
BLEU: 0.00
ROUGE-L: 0.00
METEOR: 0.00
BERTScore F1: 0.56


Your input_length: 20 is bigger than 0.9 * max_length: 20. You might consider increasing your max_length manually, e.g. translator('...', max_length=400)
 61%|██████    | 11/18 [00:56<00:34,  4.95s/it]


Batch 11 Scores:
BLEU: 0.00
ROUGE-L: 0.00
METEOR: 0.00
BERTScore F1: 0.55


Your input_length: 19 is bigger than 0.9 * max_length: 20. You might consider increasing your max_length manually, e.g. translator('...', max_length=400)
Your input_length: 19 is bigger than 0.9 * max_length: 20. You might consider increasing your max_length manually, e.g. translator('...', max_length=400)
Your input_length: 20 is bigger than 0.9 * max_length: 20. You might consider increasing your max_length manually, e.g. translator('...', max_length=400)
Your input_length: 19 is bigger than 0.9 * max_length: 20. You might consider increasing your max_length manually, e.g. translator('...', max_length=400)
 67%|██████▋   | 12/18 [01:01<00:30,  5.02s/it]


Batch 12 Scores:
BLEU: 0.00
ROUGE-L: 0.00
METEOR: 0.00
BERTScore F1: 0.55


Your input_length: 19 is bigger than 0.9 * max_length: 20. You might consider increasing your max_length manually, e.g. translator('...', max_length=400)
Your input_length: 19 is bigger than 0.9 * max_length: 20. You might consider increasing your max_length manually, e.g. translator('...', max_length=400)
 72%|███████▏  | 13/18 [01:08<00:27,  5.50s/it]


Batch 13 Scores:
BLEU: 0.00
ROUGE-L: 0.00
METEOR: 0.00
BERTScore F1: 0.55


 78%|███████▊  | 14/18 [01:13<00:22,  5.58s/it]


Batch 14 Scores:
BLEU: 0.00
ROUGE-L: 0.00
METEOR: 0.00
BERTScore F1: 0.54


Your input_length: 19 is bigger than 0.9 * max_length: 20. You might consider increasing your max_length manually, e.g. translator('...', max_length=400)
 83%|████████▎ | 15/18 [01:18<00:15,  5.22s/it]


Batch 15 Scores:
BLEU: 0.00
ROUGE-L: 0.00
METEOR: 0.00
BERTScore F1: 0.55


 89%|████████▉ | 16/18 [01:23<00:10,  5.28s/it]


Batch 16 Scores:
BLEU: 0.51
ROUGE-L: 0.00
METEOR: 0.02
BERTScore F1: 0.56


 94%|█████████▍| 17/18 [01:28<00:05,  5.02s/it]


Batch 17 Scores:
BLEU: 0.00
ROUGE-L: 0.00
METEOR: 0.00
BERTScore F1: 0.56


100%|██████████| 18/18 [01:30<00:00,  5.01s/it]


Batch 18 Scores:
BLEU: 0.91
ROUGE-L: 0.00
METEOR: 0.01
BERTScore F1: 0.56

Final Scores for google/mt5-small:
Average BLEU: 0.12
Average ROUGE-L: 0.00
Average METEOR: 0.00
Average BERTScore F1: 0.56

Processing model: ai4bharat/indictrans-v2-en-bn
Error processing model ai4bharat/indictrans-v2-en-bn: ai4bharat/indictrans-v2-en-bn is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'
If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=<your_token>`

Processing model: ai4bharat/indic-bert





config.json:   0%|          | 0.00/507 [00:00<?, ?B/s]

Error processing model ai4bharat/indic-bert: Could not load model ai4bharat/indic-bert with any of the following classes: (<class 'transformers.models.auto.modeling_auto.AutoModelForSeq2SeqLM'>, <class 'transformers.models.auto.modeling_tf_auto.TFAutoModelForSeq2SeqLM'>). See the original errors:

while loading with AutoModelForSeq2SeqLM, an error is thrown:
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/transformers/pipelines/base.py", line 283, in infer_framework_load_model
    model = model_class.from_pretrained(model, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/transformers/models/auto/auto_factory.py", line 567, in from_pretrained
    raise ValueError(
ValueError: Unrecognized configuration class <class 'transformers.models.albert.configuration_albert.AlbertConfig'> for this kind of AutoModel: AutoModelForSeq2SeqLM.
Model type should be one of BartConfig, BigBirdPegasusConfig, BlenderbotConfig, BlenderbotSmallConfig, EncoderDecoder

In [13]:
import pandas as pd
import csv
from datetime import datetime

def save_detailed_results(results, batch_scores, output_file="detailed_translation_results.csv"):
    # Prepare the data for the CSV
    csv_data = []

    # Add a header row
    header = ["Model", "Batch", "BLEU", "ROUGE-L", "METEOR", "BERTScore F1"]
    csv_data.append(header)

    # Add batch scores for each model
    for model, scores in batch_scores.items():
        for batch, score in enumerate(scores, start=1):
            row = [
                model,
                f"Batch {batch}",
                score['BLEU'],
                score['ROUGE-L'],
                score['METEOR'],
                score['BERTScore F1']
            ]
            csv_data.append(row)

        # Add average scores
        avg_scores = next(item for item in results if item["Model"] == model)
        avg_row = [
            model,
            "Average",
            avg_scores['BLEU'],
            avg_scores['ROUGE-L'],
            avg_scores['METEOR'],
            avg_scores['BERTScore_F1']
        ]
        csv_data.append(avg_row)

        # Add an empty row for better readability
        csv_data.append([])

    # Get current timestamp
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

    # Add timestamp to the filename
    output_file = f"{timestamp}_{output_file}"

    # Write to CSV
    with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerows(csv_data)

    print(f"Detailed results saved to {output_file}")

    # Create a summary DataFrame
    summary_df = pd.DataFrame(results)
    summary_file = f"{timestamp}_summary_results.csv"
    summary_df.to_csv(summary_file, index=False)

    print(f"Summary results saved to {summary_file}")

# Example usage (place this at the end of your main script):
# save_detailed_results(results, batch_scores)

In [11]:
!pip install graphviz
!apt-get install graphviz -y


Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
graphviz is already the newest version (2.42.2-6ubuntu0.1).
0 upgraded, 0 newly installed, 0 to remove and 49 not upgraded.


In [12]:
from graphviz import Digraph

dot = Digraph(comment='Translation Model Workflow')

# Main workflow
dot.node('A', 'Load Dataset')
dot.node('B', 'Preprocess Data')
dot.node('C', 'Initialize Metrics')
dot.node('D', 'Load Translation Models')
dot.node('E', 'Iterate Through Models')
dot.node('F', 'Batch Processing')
dot.node('G', 'Translate Texts')
dot.node('H', 'Calculate Scores')
dot.node('I', 'Store Results')
dot.node('J', 'More Models?')
dot.node('K', 'Aggregate Results')
dot.node('L', 'Save Detailed CSV')
dot.node('M', 'Generate Report')

# Subgraphs for Metrics and Models
with dot.subgraph(name='cluster_0') as c:
    c.attr(label='Metrics')
    c.node('N', 'BLEU')
    c.node('O', 'ROUGE-L')
    c.node('P', 'METEOR')
    c.node('Q', 'BERTScore')

with dot.subgraph(name='cluster_1') as c:
    c.attr(label='Models')
    c.node('R', 'mBART-large')
    c.node('S', 'OPUS-MT')
    c.node('T', 'NLLB-200')
    c.node('U', 'mT5-small')
    c.node('V', 'IndicTrans-v2')

# Workflow connections
dot.edges(['AB', 'BC', 'CD', 'DE', 'EF', 'FG', 'GH', 'HI', 'IJ'])
dot.edge('J', 'E', label='Yes')
dot.edge('J', 'K', label='No')
dot.edge('K', 'L')
dot.edge('L', 'M')

# Connections to subgraphs
dot.edge('C', 'N')
dot.edge('C', 'O')
dot.edge('C', 'P')
dot.edge('C', 'Q')
dot.edge('D', 'R')
dot.edge('D', 'S')
dot.edge('D', 'T')
dot.edge('D', 'U')
dot.edge('D', 'V')

# Render the graph
dot.render('/content/translation_model_workflow', format='png', cleanup=False)
dot.view('/content/translation_model_workflow')



'/content/translation_model_workflow.pdf'