In [1]:
from transformers import AutoTokenizer, AutoModel
import torch
import torch.nn.functional as F
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
labse_tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/LaBSE")
labse_model = AutoModel.from_pretrained("sentence-transformers/LaBSE")

muril_tokenizer = AutoTokenizer.from_pretrained("google/muril-base-cased")
muril_model = AutoModel.from_pretrained("google/muril-base-cased")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
labse_model.to(device)
muril_model.to(device)

2025-11-25 14:23:57.148489: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-25 14:23:57.222232: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-11-25 14:23:59.402547: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


BertModel(
  (embeddings): BertEmbeddings(
    (word_embeddings): Embedding(197285, 768, padding_idx=0)
    (position_embeddings): Embedding(512, 768)
    (token_type_embeddings): Embedding(2, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): BertEncoder(
    (layer): ModuleList(
      (0-11): 12 x BertLayer(
        (attention): BertAttention(
          (self): BertSdpaSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): BertSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=Fals

In [3]:
def compute_ref_based_scores(mt_texts, ref_texts, method="max"):
    results = []
    
    for i, mt in enumerate(mt_texts):
        ref = ref_texts[i]
        
        labse_ref_scores = []
        muril_ref_scores = []
        
        # --- LaBSE ---
        inputs = labse_tokenizer([ref, mt], padding=True, truncation=True, return_tensors="pt").to(device)
        with torch.no_grad():
            embeddings = labse_model(**inputs).pooler_output
        labse_ref_mt = F.cosine_similarity(embeddings[0].unsqueeze(0), embeddings[1].unsqueeze(0)).item()

        # --- MuRIL ---
        inputs = muril_tokenizer([ref, mt], padding=True, truncation=True, return_tensors="pt").to(device)
        with torch.no_grad():
            outputs = muril_model(**inputs)
            embeddings = outputs.last_hidden_state.mean(dim=1)
        muril_ref_mt = F.cosine_similarity(embeddings[0].unsqueeze(0), embeddings[1].unsqueeze(0)).item()

        results.append({
            "mt": mt,
            "labse_ref_mt": labse_ref_mt,
            "muril_ref_mt": muril_ref_mt
        })
    
    df_scores = pd.DataFrame(results)
    
    # System-level averages
    system_scores = {
        "labse_ref_mt": df_scores["labse_ref_mt"].mean(),
        "muril_ref_mt": df_scores["muril_ref_mt"].mean()
    }
    
    return system_scores

In [4]:
labse = {}
muril = {}

## Original

In [5]:
file_name = "shalaka_original_outputs.csv"
mode = "original"

In [6]:
df = pd.read_csv(file_name)

predictions = df['prediction'].tolist()
references = df['gemini'].tolist()

scores = compute_ref_based_scores(predictions, references)
labse[mode] = scores['labse_ref_mt']
muril[mode] = scores['muril_ref_mt']

print(f"LaBSE and MuRIL Scores for {mode}:")
print(scores)

LaBSE and MuRIL Scores for original:
{'labse_ref_mt': 0.9124792781141069, 'muril_ref_mt': 0.9971744263613666}


## Only Punctuations

In [7]:
file_name = "shalaka_only_punct_outputs.csv"
mode = "only_punct"

In [8]:
df = pd.read_csv(file_name)

predictions = df['prediction'].tolist()
references = df['gemini'].tolist()

scores = compute_ref_based_scores(predictions, references)
labse[mode] = scores['labse_ref_mt']
muril[mode] = scores['muril_ref_mt']

print(f"LaBSE and MuRIL Scores for {mode}:")
print(scores)

LaBSE and MuRIL Scores for only_punct:
{'labse_ref_mt': 0.9145167414788846, 'muril_ref_mt': 0.9982276989354028}


## Without Punctuations

In [9]:
file_name = "shalaka_without_punct_outputs.csv"
mode = "without_punct"

In [10]:
df = pd.read_csv(file_name)

predictions = df['prediction'].tolist()
references = df['gemini'].tolist()

scores = compute_ref_based_scores(predictions, references)
labse[mode] = scores['labse_ref_mt']
muril[mode] = scores['muril_ref_mt']

print(f"LaBSE and MuRIL Scores for {mode}:")
print(scores)

LaBSE and MuRIL Scores for without_punct:
{'labse_ref_mt': 0.9121896315504003, 'muril_ref_mt': 0.9982888632350497}


## Combined

In [11]:
file_name = "shalaka_combined_outputs.csv"
mode = "combined"

In [12]:
df = pd.read_csv(file_name)

predictions = df['prediction'].tolist()
references = df['gemini'].tolist()

scores = compute_ref_based_scores(predictions, references)
labse[mode] = scores['labse_ref_mt']
muril[mode] = scores['muril_ref_mt']

print(f"LaBSE and MuRIL Scores for {mode}:")
print(scores)

LaBSE and MuRIL Scores for combined:
{'labse_ref_mt': 0.9090791543324789, 'muril_ref_mt': 0.9983516655586384}


## Combined - LR & Epochs Changed

In [13]:
file_name = "shalaka_combined_le_outputs.csv"
mode = "combined_le"

In [14]:
df = pd.read_csv(file_name)

predictions = df['prediction'].tolist()
references = df['gemini'].tolist()

scores = compute_ref_based_scores(predictions, references)
labse[mode] = scores['labse_ref_mt']
muril[mode] = scores['muril_ref_mt']

print(f"LaBSE and MuRIL Scores for {mode}:")
print(scores)

LaBSE and MuRIL Scores for combined_le:
{'labse_ref_mt': 0.911981858589031, 'muril_ref_mt': 0.9983177008452239}


## Combined - LR & Epochs & Dataset Changed

In [15]:
file_name = "shalaka_combined_led_outputs.csv"
mode = "combined_led"

In [16]:
df = pd.read_csv(file_name)

predictions = df['prediction'].tolist()
references = df['gemini'].tolist()

scores = compute_ref_based_scores(predictions, references)
labse[mode] = scores['labse_ref_mt']
muril[mode] = scores['muril_ref_mt']

print(f"LaBSE and MuRIL Scores for {mode}:")
print(scores)

LaBSE and MuRIL Scores for combined_led:
{'labse_ref_mt': 0.9047032550529197, 'muril_ref_mt': 0.998309498583829}


## GPT-5

In [17]:
file_name = "shalaka_gpt_outputs.csv"
mode = "gpt"

In [18]:
df = pd.read_csv(file_name)

predictions = df['prediction'].tolist()
references = df['gemini'].tolist()

scores = compute_ref_based_scores(predictions, references)
labse[mode] = scores['labse_ref_mt']
muril[mode] = scores['muril_ref_mt']

print(f"LaBSE and MuRIL Scores for {mode}:")
print(scores)

LaBSE and MuRIL Scores for gpt:
{'labse_ref_mt': 0.9095758188653875, 'muril_ref_mt': 0.9978245452598289}


## BERT

In [19]:
file_name = "approach1_bert_outputs.csv"
mode = "bert"

In [20]:
df = pd.read_csv(file_name)

predictions = df['prediction'].tolist()
references = df['gemini'].tolist()

scores = compute_ref_based_scores(predictions, references)
labse[mode] = scores['labse_ref_mt']
muril[mode] = scores['muril_ref_mt']

print(f"LaBSE and MuRIL Scores for {mode}:")
print(scores)

LaBSE and MuRIL Scores for bert:
{'labse_ref_mt': 0.9199125038252937, 'muril_ref_mt': 0.9983612729443444}


## MPNet

In [21]:
file_name = "approach1_mpnet_outputs.csv"
mode = "mpnet"

In [22]:
df = pd.read_csv(file_name)

predictions = df['prediction'].tolist()
references = df['gemini'].tolist()

scores = compute_ref_based_scores(predictions, references)
labse[mode] = scores['labse_ref_mt']
muril[mode] = scores['muril_ref_mt']

print(f"LaBSE and MuRIL Scores for {mode}:")
print(scores)

LaBSE and MuRIL Scores for mpnet:
{'labse_ref_mt': 0.9209621195439939, 'muril_ref_mt': 0.9983330644943096}


## T5

In [23]:
file_name = "approach1_t5_outputs.csv"
mode = "t5"

In [24]:
df = pd.read_csv(file_name)

predictions = df['prediction'].tolist()
references = df['gemini'].tolist()

scores = compute_ref_based_scores(predictions, references)
labse[mode] = scores['labse_ref_mt']
muril[mode] = scores['muril_ref_mt']

print(f"LaBSE and MuRIL Scores for {mode}:")
print(scores)

LaBSE and MuRIL Scores for t5:
{'labse_ref_mt': 0.923039291743879, 'muril_ref_mt': 0.998339714827361}


In [25]:
for key, val in labse.items():
    print(f"{key} : {val:.4f}")

original : 0.9125
only_punct : 0.9145
without_punct : 0.9122
combined : 0.9091
combined_le : 0.9120
combined_led : 0.9047
gpt : 0.9096
bert : 0.9199
mpnet : 0.9210
t5 : 0.9230


In [28]:
for key, val in muril.items():
    print(f"{key} : {val:.5f}")

original : 0.99717
only_punct : 0.99823
without_punct : 0.99829
combined : 0.99835
combined_le : 0.99832
combined_led : 0.99831
gpt : 0.99782
bert : 0.99836
mpnet : 0.99833
t5 : 0.99834
