In [1]:
from transformers import AutoTokenizer, AutoModel
import torch
import torch.nn.functional as F
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
labse_tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/LaBSE")
labse_model = AutoModel.from_pretrained("sentence-transformers/LaBSE")

muril_tokenizer = AutoTokenizer.from_pretrained("google/muril-base-cased")
muril_model = AutoModel.from_pretrained("google/muril-base-cased")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
labse_model.to(device)
muril_model.to(device)

2025-11-25 19:28:05.658998: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-25 19:28:05.718978: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-11-25 19:28:07.419321: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


BertModel(
  (embeddings): BertEmbeddings(
    (word_embeddings): Embedding(197285, 768, padding_idx=0)
    (position_embeddings): Embedding(512, 768)
    (token_type_embeddings): Embedding(2, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): BertEncoder(
    (layer): ModuleList(
      (0-11): 12 x BertLayer(
        (attention): BertAttention(
          (self): BertSdpaSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): BertSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=Fals

In [3]:
def compute_ref_based_scores(mt_texts, ref_texts, method="max"):
    results = []
    
    for i, mt in enumerate(mt_texts):
        ref = ref_texts[i]
        
        labse_ref_scores = []
        muril_ref_scores = []
        
        # --- LaBSE ---
        inputs = labse_tokenizer([ref, mt], padding=True, truncation=True, return_tensors="pt").to(device)
        with torch.no_grad():
            embeddings = labse_model(**inputs).pooler_output
        labse_ref_mt = F.cosine_similarity(embeddings[0].unsqueeze(0), embeddings[1].unsqueeze(0)).item()

        # --- MuRIL ---
        inputs = muril_tokenizer([ref, mt], padding=True, truncation=True, return_tensors="pt").to(device)
        with torch.no_grad():
            outputs = muril_model(**inputs)
            embeddings = outputs.last_hidden_state.mean(dim=1)
        muril_ref_mt = F.cosine_similarity(embeddings[0].unsqueeze(0), embeddings[1].unsqueeze(0)).item()

        results.append({
            "mt": mt,
            "labse_ref_mt": labse_ref_mt,
            "muril_ref_mt": muril_ref_mt
        })
    
    df_scores = pd.DataFrame(results)
    
    # System-level averages
    system_scores = {
        "labse_ref_mt": df_scores["labse_ref_mt"].mean(),
        "muril_ref_mt": df_scores["muril_ref_mt"].mean()
    }
    
    return system_scores

In [4]:
labse = {}
muril = {}

## Original

In [5]:
file_name = "original_outputs.csv"
mode = "original"

In [6]:
df = pd.read_csv(file_name)

predictions = df['prediction_mar'].tolist()
references = df['gt_mar'].tolist()

scores = compute_ref_based_scores(predictions, references)
labse[mode] = scores['labse_ref_mt']
muril[mode] = scores['muril_ref_mt']

print(f"LaBSE and MuRIL Scores for {mode}:")
print(scores)

LaBSE and MuRIL Scores for original:
{'labse_ref_mt': 0.9226261585980744, 'muril_ref_mt': 0.9981174723076127}


## Only Punctuations

In [7]:
file_name = "with_outputs.csv"
mode = "with"

In [8]:
df = pd.read_csv(file_name)

predictions = df['prediction_mar'].tolist()
references = df['gt_mar'].tolist()

scores = compute_ref_based_scores(predictions, references)
labse[mode] = scores['labse_ref_mt']
muril[mode] = scores['muril_ref_mt']

print(f"LaBSE and MuRIL Scores for {mode}:")
print(scores)

LaBSE and MuRIL Scores for with:
{'labse_ref_mt': 0.9197827294812637, 'muril_ref_mt': 0.9981659788903645}


## Without Punctuations

In [9]:
file_name = "without_outputs.csv"
mode = "without"

In [10]:
df = pd.read_csv(file_name)

predictions = df['prediction_mar'].tolist()
references = df['gt_mar'].tolist()

scores = compute_ref_based_scores(predictions, references)
labse[mode] = scores['labse_ref_mt']
muril[mode] = scores['muril_ref_mt']

print(f"LaBSE and MuRIL Scores for {mode}:")
print(scores)

LaBSE and MuRIL Scores for without:
{'labse_ref_mt': 0.9201303059861079, 'muril_ref_mt': 0.9981740449472081}


## Combined - LR & Epochs Changed

In [11]:
file_name = "combined_2x_outputs.csv"
mode = "combined_2x"

In [12]:
df = pd.read_csv(file_name)

predictions = df['prediction_mar'].tolist()
references = df['gt_mar'].tolist()

scores = compute_ref_based_scores(predictions, references)
labse[mode] = scores['labse_ref_mt']
muril[mode] = scores['muril_ref_mt']

print(f"LaBSE and MuRIL Scores for {mode}:")
print(scores)

LaBSE and MuRIL Scores for combined_2x:
{'labse_ref_mt': 0.9204223514679323, 'muril_ref_mt': 0.9981520064735604}


## Combined - LR & Epochs & Dataset Changed

In [13]:
file_name = "combined_x_outputs.csv"
mode = "combined_x"

In [14]:
df = pd.read_csv(file_name)

predictions = df['prediction_mar'].tolist()
references = df['gt_mar'].tolist()

scores = compute_ref_based_scores(predictions, references)
labse[mode] = scores['labse_ref_mt']
muril[mode] = scores['muril_ref_mt']

print(f"LaBSE and MuRIL Scores for {mode}:")
print(scores)

LaBSE and MuRIL Scores for combined_x:
{'labse_ref_mt': 0.9217669360018302, 'muril_ref_mt': 0.9981642475458181}


## T5

In [15]:
file_name = "t5_outputs.csv"
mode = "t5"

In [16]:
df = pd.read_csv(file_name)

predictions = df['prediction_mar'].tolist()
references = df['gt_mar'].tolist()

scores = compute_ref_based_scores(predictions, references)
labse[mode] = scores['labse_ref_mt']
muril[mode] = scores['muril_ref_mt']

print(f"LaBSE and MuRIL Scores for {mode}:")
print(scores)

LaBSE and MuRIL Scores for t5:
{'labse_ref_mt': 0.9212691990159818, 'muril_ref_mt': 0.9981615691031951}


In [17]:
for key, val in labse.items():
    print(f"{key} : {val:.4f}")

original : 0.9226
with : 0.9198
without : 0.9201
combined_2x : 0.9204
combined_x : 0.9218
t5 : 0.9213


In [21]:
for key, val in muril.items():
    print(f"{key} : {val:.6f}")

original : 0.998117
with : 0.998166
without : 0.998174
combined_2x : 0.998152
combined_x : 0.998164
t5 : 0.998162
