In [None]:
from huggingface_hub import login

login('hugging_face_token')

In [2]:
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

# Load Model and Processor

In [3]:
from transformers import T5ForConditionalGeneration, AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("rahular/varta-t5")
model = T5ForConditionalGeneration.from_pretrained("rahular/varta-t5")
model.to(device)

2025-02-05 18:06:15.593932: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1738753575.619119  282656 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1738753575.626609  282656 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-02-05 18:06:15.650540: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


T5ForConditionalGeneration(
  (shared): Embedding(128128, 768)
  (encoder): T5Stack(
    (embed_tokens): Embedding(128128, 768)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=768, out_features=768, bias=False)
              (k): Linear(in_features=768, out_features=768, bias=False)
              (v): Linear(in_features=768, out_features=768, bias=False)
              (o): Linear(in_features=768, out_features=768, bias=False)
              (relative_attention_bias): Embedding(32, 12)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseGatedActDense(
              (wi_0): Linear(in_features=768, out_features=2048, bias=False)
              (wi_1): Linear(in_features=768, out_features=2048, bias=False)
              (wo

# Make Dataset

In [4]:
import pandas as pd
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, Dataset
from PIL import Image

df = pd.read_csv('training_df.csv')
df = df.drop('Descriptive Meaning by ChatGPT', axis=1)
df['img_path'] = ''
for i in range(len(df)):
    df.loc[i,'img_path'] = 'hindi_img/'+str(i)+'.png'
df.head()

Unnamed: 0,Actual Proverb,Final_Human_Annotation,img_path
0,अधजल गगरी छलकत जाय।,"The Hindi proverb ""अधजल गगरी छलकत जाय"" transla...",hindi_img/0.png
1,अपने मुँह मियाँ मिट्ठू बनाना।,"The Hindi proverb ""अपने मुँह मियाँ मिट्ठू बना...",hindi_img/1.png
2,अपने हाथ में अपना भाग्य होना।,To create a clear and highly specific visualiz...,hindi_img/2.png
3,अपना उल्लू सीधा करना।,"The Hindi proverb ""अपना उल्लू सीधा करना"" trans...",hindi_img/3.png
4,अँगारे बरसना,Certainly! Here is a revised version with a cl...,hindi_img/4.png


In [5]:
temp_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
train_df, val_df = train_test_split(temp_df, test_size=0.2, random_state=42)

train_df = train_df.reset_index(drop=True)
val_df = val_df.reset_index(drop=True)
test_df = test_df.reset_index(drop=True)

train_df.shape, val_df.shape, test_df.shape

((816, 3), (205, 3), (256, 3))

## Try Tokenizer & Model

In [6]:
inputs = tokenizer("What is meaning of proverb named " + train_df.loc[0,'Actual Proverb'] + " ?", return_tensors="pt")
labels = tokenizer(train_df.loc[0, "Final_Human_Annotation"], return_tensors="pt").input_ids

print("Input:", "What is meaning of proverb named " + train_df.loc[0,'Actual Proverb'] + " ?\n")
print("Label:", train_df.loc[0, "Final_Human_Annotation"])

Input: What is meaning of proverb named रेल-पेल होना ?

Label: Certainly! Here's a more specific description for the visual representation:

Imagine a bustling train station platform during peak hours. The scene is packed with a dense crowd of people, each person tightly packed against the others. Individuals are jostling to get closer to the edge of the platform, where an incoming train is arriving. A variety of people are shown: a woman clutching her bag tightly, a man looking at his watch anxiously, a group of school children trying to navigate through the throng, and a vendor holding a tray of snacks high above his shoulder to avoid the crush. The expressions on their faces range from impatience to frustration, capturing the chaos and urgency of the moment. In the background, the blurred motion of the approaching train adds to the sense of hustle and frenzy.


In [7]:
tokenizer.decode(inputs['input_ids'][0])

'What is meaning of proverb named रेल-पेल होना ?</s>'

In [8]:
tokenizer.decode(labels[0])

"Certainly! Here's a more specific description for the visual representation: Imagine a bustling train station platform during peak hours. The scene is packed with a dense crowd of people, each person tightly packed against the others. Individuals are jostling to get closer to the edge of the platform, where an incoming train is arriving. A variety of people are shown: a woman clutching her bag tightly, a man looking at his watch anxiously, a group of school children trying to navigate through the throng, and a vendor holding a tray of snacks high above his shoulder to avoid the crush. The expressions on their faces range from impatience to frustration, capturing the chaos and urgency of the moment. In the background, the blurred motion of the approaching train adds to the sense of hustle and frenzy.</s>"

## custom dataset

In [9]:
from functools import partial

class HindiDataset(Dataset):
    def __init__(self, df):
        super().__init__()
        self.df = df
    def __len__(self):
        return len(self.df)
    def __getitem__(self, idx):
        row = {
            "texts_input": self.df.loc[idx, 'Actual Proverb'],
            'texts_label': self.df.loc[idx, 'Final_Human_Annotation']
        }
        return row

def collate_fn(batch, tokenizer, device):
    input_lis = [r['texts_input'] for r in batch]
    label_lis = [r['texts_label'] for r in batch]
    inputs = tokenizer(
        text=input_lis, return_tensors="pt", padding="max_length", max_length=512, truncation=True
    )
    inputs.pop('token_type_ids', None)
    inputs = {key: valu.to(device) for key, valu in inputs.items()}
    labels_ids = tokenizer(
        text=label_lis, return_tensors="pt", padding="max_length", max_length=512, truncation=True
    ).input_ids
    labels_ids = labels_ids.to(device)
    return inputs, labels_ids
    
train_dataset = HindiDataset(train_df)
val_dataset = HindiDataset(val_df)
test_dataset = HindiDataset(test_df)

train_loader = DataLoader(
        train_dataset,
        batch_size=8,
        collate_fn=partial(collate_fn, tokenizer=tokenizer, device=device)
    )
val_loader = DataLoader(
        val_dataset,
        batch_size=4,
        collate_fn=partial(collate_fn, tokenizer=tokenizer, device=device)
    )
test_loader = DataLoader(
        test_dataset,
        batch_size=4,
        collate_fn=partial(collate_fn, tokenizer=tokenizer, device=device)
    )

# Training Phase

In [10]:
import torch
from transformers import get_cosine_schedule_with_warmup
from tqdm.auto import tqdm

optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
epochs = 15
train_steps = len(train_loader) * epochs
scheduler = get_cosine_schedule_with_warmup(
    optimizer, num_warmup_steps=int(0.1 * train_steps), num_training_steps=train_steps
)
loss_fct = torch.nn.CrossEntropyLoss(ignore_index=tokenizer.pad_token_id)

In [11]:
best_val_loss = float("inf")

for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    train_progress = tqdm(train_loader, desc=f"Training Epoch {epoch + 1}", leave=True, position=0)
    for input_ids, labels in train_progress:
        optimizer.zero_grad()
        outputs = model(
            input_ids=input_ids['input_ids'],
            attention_mask=input_ids['attention_mask'],
            labels=labels
        )
        logits = outputs.logits  
        shift_logits = logits[:, :-1, :].contiguous().view(-1, logits.size(-1))
        shift_labels = labels[:, 1:].contiguous().view(-1)
        loss = loss_fct(shift_logits, shift_labels)
        loss.backward()
        optimizer.step()
        scheduler.step()
        running_loss += loss.item()
        train_progress.set_postfix(loss=loss.item())
    avg_train_loss = running_loss / len(train_loader)
    tqdm.write(f"\n🎯 Epoch {epoch + 1} - Training Loss: {avg_train_loss:.4f}")

    model.eval()
    val_loss = 0.0
    val_progress = tqdm(val_loader, desc="🔍 Validation", leave=True, position=0)
    with torch.no_grad():
        for input_ids, labels in val_progress:
            outputs = model(
                input_ids=input_ids['input_ids'],
                attention_mask=input_ids['attention_mask'],
                labels=labels
            )
            logits = outputs.logits  
            shift_logits = logits[:, :-1, :].contiguous().view(-1, logits.size(-1))
            shift_labels = labels[:, 1:].contiguous().view(-1)
            loss = loss_fct(shift_logits, shift_labels)
            val_loss += loss.item()
            val_progress.set_postfix(loss=loss.item())

    avg_val_loss = val_loss / len(val_loader)
    tqdm.write(f"✅ Epoch {epoch + 1} - Validation Loss: {avg_val_loss:.4f}")

    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        os.makedirs("art_t5/", exist_ok=True)
        model.save_pretrained("art_t5/best_model", from_pt=True)
        tqdm.write(f"💾 Best model saved at Epoch {epoch + 1}")
    tqdm.write("=" * 50)

Training Epoch 1:   0%|          | 0/102 [00:00<?, ?it/s]

Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.



🎯 Epoch 1 - Training Loss: 8.9240


🔍 Validation:   0%|          | 0/52 [00:00<?, ?it/s]

✅ Epoch 1 - Validation Loss: 6.8760
💾 Best model saved at Epoch 1


Training Epoch 2:   0%|          | 0/102 [00:00<?, ?it/s]


🎯 Epoch 2 - Training Loss: 6.8700


🔍 Validation:   0%|          | 0/52 [00:00<?, ?it/s]

✅ Epoch 2 - Validation Loss: 5.9959
💾 Best model saved at Epoch 2


Training Epoch 3:   0%|          | 0/102 [00:00<?, ?it/s]


🎯 Epoch 3 - Training Loss: 6.2348


🔍 Validation:   0%|          | 0/52 [00:00<?, ?it/s]

✅ Epoch 3 - Validation Loss: 5.5270
💾 Best model saved at Epoch 3


Training Epoch 4:   0%|          | 0/102 [00:00<?, ?it/s]


🎯 Epoch 4 - Training Loss: 5.8324


🔍 Validation:   0%|          | 0/52 [00:00<?, ?it/s]

✅ Epoch 4 - Validation Loss: 5.2449
💾 Best model saved at Epoch 4


Training Epoch 5:   0%|          | 0/102 [00:00<?, ?it/s]


🎯 Epoch 5 - Training Loss: 5.5762


🔍 Validation:   0%|          | 0/52 [00:00<?, ?it/s]

✅ Epoch 5 - Validation Loss: 5.0645
💾 Best model saved at Epoch 5


Training Epoch 6:   0%|          | 0/102 [00:00<?, ?it/s]


🎯 Epoch 6 - Training Loss: 5.3949


🔍 Validation:   0%|          | 0/52 [00:00<?, ?it/s]

✅ Epoch 6 - Validation Loss: 4.9346
💾 Best model saved at Epoch 6


Training Epoch 7:   0%|          | 0/102 [00:00<?, ?it/s]


🎯 Epoch 7 - Training Loss: 5.2579


🔍 Validation:   0%|          | 0/52 [00:00<?, ?it/s]

✅ Epoch 7 - Validation Loss: 4.8349
💾 Best model saved at Epoch 7


Training Epoch 8:   0%|          | 0/102 [00:00<?, ?it/s]


🎯 Epoch 8 - Training Loss: 5.1578


🔍 Validation:   0%|          | 0/52 [00:00<?, ?it/s]

✅ Epoch 8 - Validation Loss: 4.7626
💾 Best model saved at Epoch 8


Training Epoch 9:   0%|          | 0/102 [00:00<?, ?it/s]


🎯 Epoch 9 - Training Loss: 5.0878


🔍 Validation:   0%|          | 0/52 [00:00<?, ?it/s]

✅ Epoch 9 - Validation Loss: 4.7081
💾 Best model saved at Epoch 9


Training Epoch 10:   0%|          | 0/102 [00:00<?, ?it/s]


🎯 Epoch 10 - Training Loss: 5.0263


🔍 Validation:   0%|          | 0/52 [00:00<?, ?it/s]

✅ Epoch 10 - Validation Loss: 4.6665
💾 Best model saved at Epoch 10


Training Epoch 11:   0%|          | 0/102 [00:00<?, ?it/s]


🎯 Epoch 11 - Training Loss: 4.9886


🔍 Validation:   0%|          | 0/52 [00:00<?, ?it/s]

✅ Epoch 11 - Validation Loss: 4.6389
💾 Best model saved at Epoch 11


Training Epoch 12:   0%|          | 0/102 [00:00<?, ?it/s]


🎯 Epoch 12 - Training Loss: 4.9562


🔍 Validation:   0%|          | 0/52 [00:00<?, ?it/s]

✅ Epoch 12 - Validation Loss: 4.6219
💾 Best model saved at Epoch 12


Training Epoch 13:   0%|          | 0/102 [00:00<?, ?it/s]


🎯 Epoch 13 - Training Loss: 4.9401


🔍 Validation:   0%|          | 0/52 [00:00<?, ?it/s]

✅ Epoch 13 - Validation Loss: 4.6131
💾 Best model saved at Epoch 13


Training Epoch 14:   0%|          | 0/102 [00:00<?, ?it/s]


🎯 Epoch 14 - Training Loss: 4.9294


🔍 Validation:   0%|          | 0/52 [00:00<?, ?it/s]

✅ Epoch 14 - Validation Loss: 4.6099
💾 Best model saved at Epoch 14


Training Epoch 15:   0%|          | 0/102 [00:00<?, ?it/s]


🎯 Epoch 15 - Training Loss: 4.9302


🔍 Validation:   0%|          | 0/52 [00:00<?, ?it/s]

✅ Epoch 15 - Validation Loss: 4.6094
💾 Best model saved at Epoch 15


# load best model

In [12]:
# from transformers import T5ForConditionalGeneration, AutoTokenizer

# best_model = T5ForConditionalGeneration.from_pretrained("./art_t5/best_model/")
# best_model.to(device)

# Inference

In [13]:
op_lis, lb_lis = [], []

with torch.no_grad():
    for i, l in tqdm(test_loader, desc="test"):
        generated_ids = model.generate(i["input_ids"])
        output_text = tokenizer.batch_decode(
            generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
        )
        label_text = tokenizer.batch_decode(
            l, skip_special_tokens=True, clean_up_tokenization_spaces=False
        )
        op_lis.extend(output_text)
        lb_lis.extend(label_text)

test:   0%|          | 0/64 [00:00<?, ?it/s]

In [17]:
import pandas as pd

test_df = pd.DataFrame({'output': op_lis, 'label': lb_lis})
test_df

Unnamed: 0,output,label
0,Hindirbठना translate tototo a orto a orto a or...,The Hindi proverb 'ठंडी आहें भरना' translates ...
1,Hindirb पत्थर लकीर (stonestick)shor)shorsshorss,The Hindi proverb 'पत्थर की लकीर' translates t...
2,Hindirb मक्खन लगाना translate totoing oring so...,The Hindi proverb 'मक्खन लगाना' translates to ...
3,"Hindirbचमना translate tototo a of a or a, a or...","To visually represent this proverb, consider a..."
4,Hindirbएकनाँना translate tototo a orto in.toto...,The Hindi proverb 'अकेला हँसता भला न रोता भला।...
...,...,...
251,Hindirbसाप दूधना translate tototo milk in. des...,The proverb 'साँप को दूध पिलाना' translates to...
252,Hindirbसजबाग दिखा'sssssssssssss,Certainly! I'll refine the visualization to ma...
253,Hindirbताव आना translate totototototototototot...,"The Hindi proverb 'ताव आना' translates to ""to ..."
254,Hindirbधना translate tototo a orto in. describ...,The Hindi proverb 'दृष्टि फिरना' translates to...


In [15]:
test_df['output'][0]

'Hindirbठना translate tototo a orto a orto a orto a orto'

In [16]:
test_df['label'][0]

"The Hindi proverb 'ठंडी आहें भरना' translates to 'sighing deeply' in English. It signifies a state of deep sorrow, regret, or longing. The imagery associated with this proverb can be visualized with an individual sitting on a wooden bench in a dimly lit park, under the soft glow of a solitary streetlamp. The person, wearing a dark, weathered coat, is exhaling slowly with a melancholic expression. One hand rests gently on their chest while the other supports their head, symbolizing the weight of their emotions. The background features blurred outlines of barren trees and fallen leaves, set against a muted, overcast sky, enhancing the atmosphere of sadness and contemplation."

In [18]:
test_df.to_csv('Submission_BERT_T5/T5_Test.csv',index=False)

In [27]:
# from tqdm.auto import tqdm

# op_lis_2, lb_lis_2 = [], []

# with torch.no_grad():
#     for i, l in tqdm(train_loader, desc="train"):
#         generated_ids = best_model.generate(i["input_ids"])
#         output_text = tokenizer.batch_decode(
#             generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
#         )
#         label_text = tokenizer.batch_decode(
#             l, skip_special_tokens=True, clean_up_tokenization_spaces=False
#         )
#         op_lis_2.extend(output_text)
#         lb_lis_2.extend(label_text)
#     for i, l in tqdm(val_loader, desc="val"):
#         generated_ids = best_model.generate(i["input_ids"])
#         output_text = tokenizer.batch_decode(
#             generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
#         )
#         label_text = tokenizer.batch_decode(
#             l, skip_special_tokens=True, clean_up_tokenization_spaces=False
#         )
#         op_lis_2.extend(output_text)
#         lb_lis_2.extend(label_text)

import evaluate
import numpy as np
from nltk.translate.meteor_score import single_meteor_score

bleu = evaluate.load("bleu")
rouge = evaluate.load("rouge")
bertscore = evaluate.load("bertscore")

def compute_bleu(predictions, references):
    result = bleu.compute(predictions=predictions, references=references)
    return result["bleu"]

def compute_rouge(predictions, references):
    result = rouge.compute(predictions=predictions, references=references)
    return result

def compute_bertscore(predictions, references):
    result = bertscore.compute(predictions=predictions, references=references, model_type="bert-base-uncased")
    return np.mean(result["f1"])
    
def compute_meteor(predictions, references):
    tokenized_preds = [pred.split() for pred in predictions]
    tokenized_refs = [ref[0].split() for ref in references]
    meteor_scores = [single_meteor_score(ref, pred) for ref, pred in zip(tokenized_refs, tokenized_preds)]
    return np.mean(meteor_scores)

references_ = [[r] for r in test_df['label']]
bleu_score = compute_bleu(test_df['output'], references_)
rouge_scores = compute_rouge(test_df['output'], references_)
bert_score = compute_bertscore(test_df['output'], references_)
meteor_score = compute_meteor(test_df['output'], references_)

print(f"BLEU Score: {bleu_score:.4f}")
print(f"ROUGE1 Scores: {rouge_scores['rouge1']}")
print(f"ROUGE2 Scores: {rouge_scores['rouge2']}")
print(f"ROUGEL Scores: {rouge_scores['rougeL']}")
print(f"ROUGELsum Scores: {rouge_scores['rougeLsum']}")
print(f"BERTScore (F1): {bert_score:.4f}")
print(f"METEOR Score: {meteor_score:.4f}")

BLEU Score: 0.0000
ROUGE1 Scores: 0.05345388768111633
ROUGE2 Scores: 0.00124628373268266
ROUGEL Scores: 0.04654722664428242
ROUGELsum Scores: 0.04659231635566289
BERTScore (F1): 0.4001
METEOR Score: 0.0175


In [28]:
with open("Submission_BERT_T5/T5_Test.txt", "w") as f:
    f.write(f"BLEU Score: {bleu_score:.4f}\n")
    f.write(f"ROUGE1 Scores: {rouge_scores['rouge1']}\n")
    f.write(f"ROUGE2 Scores: {rouge_scores['rouge2']}\n")
    f.write(f"ROUGEL Scores: {rouge_scores['rougeL']}\n")
    f.write(f"ROUGELsum Scores: {rouge_scores['rougeLsum']}\n")
    f.write(f"BERTScore (F1): {bert_score:.4f}\n")
    f.write(f"METEOR Score: {meteor_score:.4f}\n")

In [20]:
# op_lis_3, lb_lis_3 = [], []
# op_lis_3.extend(op_lis_2)
# op_lis_3.extend(op_lis)
# lb_lis_3.extend(lb_lis_2)
# lb_lis_3.extend(lb_lis)

df_dataset = HindiDataset(df)

df_loader = DataLoader(
        df_dataset,
        batch_size=8,
        collate_fn=partial(collate_fn, tokenizer=tokenizer, device=device)
    )

In [21]:
# import pandas as pd

# all_df = pd.DataFrame({'output': op_lis_3, 'label': lb_lis_3})
# all_df.head(), len(all_df)

op_lis, lb_lis = [], []

with torch.no_grad():
    for i, l in tqdm(df_loader, desc="test"):
        generated_ids = model.generate(i["input_ids"])
        output_text = tokenizer.batch_decode(
            generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
        )
        label_text = tokenizer.batch_decode(
            l, skip_special_tokens=True, clean_up_tokenization_spaces=False
        )
        op_lis.extend(output_text)
        lb_lis.extend(label_text)

test:   0%|          | 0/160 [00:00<?, ?it/s]

In [23]:
import pandas as pd

all_df = pd.DataFrame({'output': op_lis, 'label': lb_lis})
all_df

Unnamed: 0,output,label
0,Hindirbअधगगगगगगगगगगगगगगगग,"The Hindi proverb ""अधजल गगरी छलकत जाय"" transla..."
1,Hindirbमुँममँममँममँममँममँम,"The Hindi proverb ""अपने मुँह मियाँ मिट्ठू बनान..."
2,Hindirbहाना translate totoing the of's's's's's,To create a clear and highly specific visualiz...
3,Hindirbउ उ उ उ उ उ उ उ उ उ उ उ उ उ उ उ उ उ,"The Hindi proverb ""अपना उल्लू सीधा करना"" trans..."
4,Hindirbअंगना बरसना translate totoing ins of or...,Certainly! Here is a revised version with a cl...
...,...,...
1272,Hindirbहाटी होना translate tototo a orto in. d...,"The Hindi proverb 'हेटी होना' translates to ""t..."
1273,Hindirbहना (hna)ssssssssssss,The Hindi proverb 'होश की दवा करना' translates...
1274,Hindirbहना translate tototo a orto in. describ...,The Hindi proverb 'होश ठिकाने आना' translates ...
1275,Hindirbत्रिंक होना translate totototototototot...,The Hindi proverb 'त्रिशुंक होना' (Trishanku H...


In [24]:
all_df.to_csv('Submission_BERT_T5/T5_allData.csv',index=False)

In [26]:
with open("Submission_BERT_T5/T5_allData.txt", "w") as f:
    f.write(f"BLEU Score: {bleu_score:.4f}\n")
    f.write(f"ROUGE1 Scores: {rouge_scores['rouge1']}\n")
    f.write(f"ROUGE2 Scores: {rouge_scores['rouge2']}\n")
    f.write(f"ROUGEL Scores: {rouge_scores['rougeL']}\n")
    f.write(f"ROUGELsum Scores: {rouge_scores['rougeLsum']}\n")
    f.write(f"BERTScore (F1): {bert_score:.4f}\n")
    f.write(f"METEOR Score: {meteor_score:.4f}\n")