In [4]:
from transformers import AutoTokenizer, AutoModel
import torch
import torch.nn.functional as F
import pandas as pd

In [5]:
import os
os.getcwd()

'/workspace/Approach1/perf-outputs'

In [6]:
labse_tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/LaBSE")
labse_model = AutoModel.from_pretrained("sentence-transformers/LaBSE")

muril_tokenizer = AutoTokenizer.from_pretrained("google/muril-base-cased")
muril_model = AutoModel.from_pretrained("google/muril-base-cased")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
labse_model.to(device)
muril_model.to(device)

BertModel(
  (embeddings): BertEmbeddings(
    (word_embeddings): Embedding(197285, 768, padding_idx=0)
    (position_embeddings): Embedding(512, 768)
    (token_type_embeddings): Embedding(2, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): BertEncoder(
    (layer): ModuleList(
      (0-11): 12 x BertLayer(
        (attention): BertAttention(
          (self): BertSdpaSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): BertSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=Fals

In [7]:
def compute_ref_based_scores(mt_texts, ref_texts, method="max"):
    results = []
    
    for i, mt in enumerate(mt_texts):
        ref = ref_texts[i]
        
        labse_ref_scores = []
        muril_ref_scores = []
        
        # --- LaBSE ---
        inputs = labse_tokenizer([ref, mt], padding=True, truncation=True, return_tensors="pt").to(device)
        with torch.no_grad():
            embeddings = labse_model(**inputs).pooler_output
        labse_ref_mt = F.cosine_similarity(embeddings[0].unsqueeze(0), embeddings[1].unsqueeze(0)).item()

        # --- MuRIL ---
        inputs = muril_tokenizer([ref, mt], padding=True, truncation=True, return_tensors="pt").to(device)
        with torch.no_grad():
            outputs = muril_model(**inputs)
            embeddings = outputs.last_hidden_state.mean(dim=1)
        muril_ref_mt = F.cosine_similarity(embeddings[0].unsqueeze(0), embeddings[1].unsqueeze(0)).item()

        results.append({
            "mt": mt,
            "labse_ref_mt": labse_ref_mt,
            "muril_ref_mt": muril_ref_mt
        })
    
    df_scores = pd.DataFrame(results)
    
    # System-level averages
    system_scores = {
        "labse_ref_mt": df_scores["labse_ref_mt"].mean(),
        "muril_ref_mt": df_scores["muril_ref_mt"].mean()
    }
    
    return system_scores

In [8]:
labse = {}
muril = {}

## T5 Punctuation Restoration

In [6]:
file_name = "approach1_eng_to_eng_t5_outputs_punct_restor_data.csv"
mode = "t5_punct"

In [7]:
df = pd.read_csv(file_name)

predictions = df['prediction'].tolist()
references = df['gt'].tolist()

scores = compute_ref_based_scores(predictions, references)
labse[mode] = scores['labse_ref_mt']
muril[mode] = scores['muril_ref_mt']

print(f"LaBSE and MuRIL Scores for {mode}:")
print(scores)

LaBSE and MuRIL Scores for t5_punct:
{'labse_ref_mt': 0.9895218774622835, 'muril_ref_mt': 0.9998911888104498}


## T5 Seq-to-Seq

In [8]:
file_name = "approach1_eng_to_eng_t5_outputs_mar_data.csv"
mode = "t5"

In [9]:
df = pd.read_csv(file_name)

predictions = df['prediction'].tolist()
references = df['gt'].tolist()

scores = compute_ref_based_scores(predictions, references)
labse[mode] = scores['labse_ref_mt']
muril[mode] = scores['muril_ref_mt']

print(f"LaBSE and MuRIL Scores for {mode}:")
print(scores)

LaBSE and MuRIL Scores for t5:
{'labse_ref_mt': 0.9862364155274851, 'muril_ref_mt': 0.9998385266021446}


## IndicTrans2 Sentences Meant

In [10]:
file_name = "sent_meant_outputs.csv"
mode = "original_meant"

In [11]:
df = pd.read_csv(file_name)

predictions = df['prediction'].tolist()
references = df['gemini'].tolist()

scores = compute_ref_based_scores(predictions, references)
labse[mode] = scores['labse_ref_mt']
muril[mode] = scores['muril_ref_mt']

print(f"LaBSE and MuRIL Scores for {mode}:")
print(scores)

LaBSE and MuRIL Scores for original_meant:
{'labse_ref_mt': 0.9312928307939459, 'muril_ref_mt': 0.9983996759962153}


## Cadence Approach1

In [9]:
file_name = "cadence_outputs.csv"
mode = "cadence"

In [10]:
df = pd.read_csv(file_name)

predictions = df['prediction_mar'].tolist()
references = df['gt_mar'].tolist()

scores = compute_ref_based_scores(predictions, references)
labse[mode] = scores['labse_ref_mt']
muril[mode] = scores['muril_ref_mt']

print(f"LaBSE and MuRIL Scores for {mode}:")
print(scores)

LaBSE and MuRIL Scores for cadence:
{'labse_ref_mt': 0.9209856909734232, 'muril_ref_mt': 0.9983644054995643}


## Cadence Punctuation Restoration

In [12]:
file_name = "approach1_eng_to_eng_cadence_outputs_punct_restor_data.csv"
mode = "cadence_approach1"

In [13]:
df = pd.read_csv(file_name)

predictions = df['prediction'].tolist()
references = df['gt'].tolist()

scores = compute_ref_based_scores(predictions, references)
labse[mode] = scores['labse_ref_mt']
muril[mode] = scores['muril_ref_mt']

print(f"LaBSE and MuRIL Scores for {mode}:")
print(scores)

LaBSE and MuRIL Scores for cadence_approach1:
{'labse_ref_mt': 0.9833641383383009, 'muril_ref_mt': 0.999806100571597}


## DeepSeek

In [15]:
file_name = "shalaka_deepseek_outputs.csv"
mode = "deepseek"

In [16]:
df = pd.read_csv(file_name)

predictions = df['prediction'].tolist()
references = df['gt'].tolist()

scores = compute_ref_based_scores(predictions, references)
labse[mode] = scores['labse_ref_mt']
muril[mode] = scores['muril_ref_mt']

print(f"LaBSE and MuRIL Scores for {mode}:")
print(scores)

LaBSE and MuRIL Scores for deepseek:
{'labse_ref_mt': 0.9197003432997951, 'muril_ref_mt': 0.9981370844222881}


In [19]:
for key, val in labse.items():
    print(f"{key} : {val:.4f}")

cadence : 0.9210
cadence_approach1 : 0.9834
deepseek : 0.9197


In [17]:
for key, val in muril.items():
    print(f"{key} : {val:.6f}")

cadence : 0.998364
cadence_approach1 : 0.999806
deepseek : 0.998137
