In [1]:
from transformers import AutoTokenizer, AutoModel
import torch
import torch.nn.functional as F
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# pip install --upgrade torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu

Looking in indexes: https://download.pytorch.org/whl/cpu, https://pypi.ngc.nvidia.com
Collecting torch
  Downloading https://download.pytorch.org/whl/cpu/torch-2.9.1%2Bcpu-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (29 kB)
Collecting torchvision
  Downloading https://download.pytorch.org/whl/cpu/torchvision-0.24.1%2Bcpu-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (5.9 kB)
Collecting torchaudio
  Downloading https://download.pytorch.org/whl/cpu/torchaudio-2.9.1%2Bcpu-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (6.9 kB)
Collecting sympy>=1.13.3 (from torch)
  Downloading sympy-1.14.0-py3-none-any.whl.metadata (12 kB)
Downloading https://download.pytorch.org/whl/cpu/torch-2.9.1%2Bcpu-cp310-cp310-manylinux_2_28_x86_64.whl (184.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m184.4/184.4 MB[0m [31m100.2 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hDownloading https://download.pytorch.org/whl/cpu/torchvision-0.24.1%2Bcpu-cp310-cp310-manylinux_2_28_x86_6

In [3]:
!nvidia-smi

Fri Dec 26 12:11:02 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.163.01             Driver Version: 550.163.01     CUDA Version: 12.5     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA A100 80GB PCIe          Off |   00000000:17:00.0 Off |                    0 |
| N/A   59C    P0             73W /  300W |    4219MiB /  81920MiB |      0%      Default |
|                                         |                        |             Disabled |
+-----------------------------------------+------------------------+----------------------+
|   1  NVIDIA A100 80GB PCIe          Off |   00

In [4]:
import os
print(os.getcwd())
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

/workspace/Prompting/responses/ready_for_metrics


In [5]:
labse_tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/LaBSE")
labse_model = AutoModel.from_pretrained("sentence-transformers/LaBSE")

muril_tokenizer = AutoTokenizer.from_pretrained("google/muril-base-cased")
muril_model = AutoModel.from_pretrained("google/muril-base-cased")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
labse_model.to(device)
muril_model.to(device)

BertModel(
  (embeddings): BertEmbeddings(
    (word_embeddings): Embedding(197285, 768, padding_idx=0)
    (position_embeddings): Embedding(512, 768)
    (token_type_embeddings): Embedding(2, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): BertEncoder(
    (layer): ModuleList(
      (0-11): 12 x BertLayer(
        (attention): BertAttention(
          (self): BertSdpaSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): BertSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=Fals

In [13]:
import torch
import torch.nn.functional as F
import pandas as pd

def compute_ref_based_scores(mt_texts, ref_texts, device=None):
    """
    Compute reference-based similarity scores (LaBSE and MuRIL) between MT outputs and references.
    
    Args:
        mt_texts (list of str): Machine-translated sentences.
        ref_texts (list of str): Reference sentences.
        device (torch.device): Torch device (CPU/GPU). If None, auto-select.
    
    Returns:
        dict: Average scores {'labse_ref_mt': ..., 'muril_ref_mt': ...}
    """
    
    if device is None:
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    labse_model.eval()
    muril_model.eval()
    
    labse_scores = []
    muril_scores = []

    for mt, ref in zip(mt_texts, ref_texts):
        # Skip if any text is empty or NaN
        if not mt or not ref or pd.isna(mt) or pd.isna(ref):
            continue
        
        # --- LaBSE ---
        try:
            inputs = labse_tokenizer(ref, mt, padding=True, truncation=True, return_tensors="pt").to(device)
            with torch.no_grad():
                embeddings = labse_model(**inputs).pooler_output
                # Ensure we have 2 embeddings
                if embeddings.shape[0] >= 2:
                    cos_sim = F.cosine_similarity(embeddings[0:1], embeddings[1:2]).item()
                    labse_scores.append(cos_sim)
        except Exception as e:
            print(f"Skipping LaBSE pair due to error: {e}")
            continue
        
        # --- MuRIL ---
        try:
            inputs = muril_tokenizer(ref, mt, padding=True, truncation=True, return_tensors="pt").to(device)
            with torch.no_grad():
                embeddings = muril_model(**inputs).pooler_output
                if embeddings.shape[0] >= 2:
                    cos_sim = F.cosine_similarity(embeddings[0:1], embeddings[1:2]).item()
                    muril_scores.append(cos_sim)
        except Exception as e:
            print(f"Skipping MuRIL pair due to error: {e}")
            continue

    return {
        'labse_ref_mt': sum(labse_scores)/len(labse_scores) if labse_scores else 0.0,
        'muril_ref_mt': sum(muril_scores)/len(muril_scores) if muril_scores else 0.0
    }


In [7]:
labse = {}
muril = {}

## Original

In [8]:
file_name = "llama_original.csv"

In [9]:
df = pd.read_csv(file_name)
df.head()

Unnamed: 0,Sentence,Response,sent_meant,gt
0,"Chanting, the choir raised the volume as the c...","श्रद्धांजली घेतल्यानंतर, गायिका घोषवाण्याने प्...","Chanting, the choir raised the volume as the c...","धर्मगुरू प्रार्थना म्हणत असताना, घोष करणाऱ्या ..."
1,A six-month-old calf was submitted for examina...,एकूण सहा महिन्यांचा वाढतोळा ज्याला परीक्षेसाठी...,A six-month-old calf was submitted for examina...,तपासणीसाठी आणलेल्या सहा महिन्यांच्या एका वासरा...
2,Planning authorities should provide alternativ...,व्यवस्थापन अधिकारी लहान व्यवसायासाठी घरगुती क्...,Planning authorities should provide alternativ...,नियोजन प्राधिकरणांनी लहान व्यवसायांसाठी पर्याय...
3,"As the machine develops, the forms we use to r...","येत्या वेळी मशीन विकसित होत असताना, आम्ही पूर्...","As the machine develops, the forms we use to r...","जसजशी यंत्रणा विकसित होईल, तसतसे मागील प्रकल्प..."
4,"As mentioned first, impressions can be mislead...",पहिल्या वेळी सांगितल्याप्रमाणे भावना वाईट वाटू...,"As mentioned first, impressions can be mislead...","आधी सांगितल्याप्रमाणे, पहिली छाप फसवी असू शकते."


In [15]:
predictions = df['Response'].tolist()
references = df['gt'].tolist()

scores = compute_ref_based_scores(predictions, references)
labse = scores['labse_ref_mt']
muril= scores['muril_ref_mt']

print(f"LaBSE and MuRIL Scores for {file_name}:")
print(labse, muril)

LaBSE and MuRIL Scores for llama_original.csv:
0.0 0.0


## Only Punctuations

In [7]:
file_name = "with_outputs.csv"
mode = "with"

In [8]:
df = pd.read_csv(file_name)

predictions = df['prediction_mar'].tolist()
references = df['gt_mar'].tolist()

scores = compute_ref_based_scores(predictions, references)
labse[mode] = scores['labse_ref_mt']
muril[mode] = scores['muril_ref_mt']

print(f"LaBSE and MuRIL Scores for {mode}:")
print(scores)

LaBSE and MuRIL Scores for with:
{'labse_ref_mt': 0.9197827294812637, 'muril_ref_mt': 0.9981659788903645}


## Without Punctuations

In [9]:
file_name = "without_outputs.csv"
mode = "without"

In [10]:
df = pd.read_csv(file_name)

predictions = df['prediction_mar'].tolist()
references = df['gt_mar'].tolist()

scores = compute_ref_based_scores(predictions, references)
labse[mode] = scores['labse_ref_mt']
muril[mode] = scores['muril_ref_mt']

print(f"LaBSE and MuRIL Scores for {mode}:")
print(scores)

LaBSE and MuRIL Scores for without:
{'labse_ref_mt': 0.9201303059861079, 'muril_ref_mt': 0.9981740449472081}


## Combined - LR & Epochs Changed

In [11]:
file_name = "combined_2x_outputs.csv"
mode = "combined_2x"

In [12]:
df = pd.read_csv(file_name)

predictions = df['prediction_mar'].tolist()
references = df['gt_mar'].tolist()

scores = compute_ref_based_scores(predictions, references)
labse[mode] = scores['labse_ref_mt']
muril[mode] = scores['muril_ref_mt']

print(f"LaBSE and MuRIL Scores for {mode}:")
print(scores)

LaBSE and MuRIL Scores for combined_2x:
{'labse_ref_mt': 0.9204223514679323, 'muril_ref_mt': 0.9981520064735604}


## Combined - LR & Epochs & Dataset Changed

In [13]:
file_name = "combined_x_outputs.csv"
mode = "combined_x"

In [14]:
df = pd.read_csv(file_name)

predictions = df['prediction_mar'].tolist()
references = df['gt_mar'].tolist()

scores = compute_ref_based_scores(predictions, references)
labse[mode] = scores['labse_ref_mt']
muril[mode] = scores['muril_ref_mt']

print(f"LaBSE and MuRIL Scores for {mode}:")
print(scores)

LaBSE and MuRIL Scores for combined_x:
{'labse_ref_mt': 0.9217669360018302, 'muril_ref_mt': 0.9981642475458181}


## T5

In [15]:
file_name = "t5_outputs.csv"
mode = "t5"

In [16]:
df = pd.read_csv(file_name)

predictions = df['prediction_mar'].tolist()
references = df['gt_mar'].tolist()

scores = compute_ref_based_scores(predictions, references)
labse[mode] = scores['labse_ref_mt']
muril[mode] = scores['muril_ref_mt']

print(f"LaBSE and MuRIL Scores for {mode}:")
print(scores)

LaBSE and MuRIL Scores for t5:
{'labse_ref_mt': 0.9212691990159818, 'muril_ref_mt': 0.9981615691031951}


In [17]:
for key, val in labse.items():
    print(f"{key} : {val:.4f}")

original : 0.9226
with : 0.9198
without : 0.9201
combined_2x : 0.9204
combined_x : 0.9218
t5 : 0.9213


In [21]:
for key, val in muril.items():
    print(f"{key} : {val:.6f}")

original : 0.998117
with : 0.998166
without : 0.998174
combined_2x : 0.998152
combined_x : 0.998164
t5 : 0.998162
