RUN



In [1]:
import pandas as pd
import numpy as np
import json
import os

# Path to your uploaded dataset folder in Colab
data_path = "/content"  # All uploaded files will be in this directory

# Verify available files
print("Files in directory:", os.listdir(data_path))

# Load metric_name_embeddings.npy
embeddings = np.load(f"{data_path}/metric_name_embeddings.npy")
print("Embeddings shape:", embeddings.shape)

# Load metric_names.json
with open(f"{data_path}/metric_names.json", "r") as f:
    metric_names = json.load(f)
print("Number of metric names:", len(metric_names))

# Load train_data.json and test_data.json
with open(f"{data_path}/train_data.json", "r") as f:
    train_data = json.load(f)
with open(f"{data_path}/test_data.json", "r") as f:
    test_data = json.load(f)

# Convert train/test data to DataFrames
train_df = pd.DataFrame(train_data)
test_df = pd.DataFrame(test_data)
print("Train shape:", train_df.shape)
print("Test shape:", test_df.shape)

# Load sample_submission.csv
sample_submission = pd.read_csv(f"{data_path}/sample_submission.csv")
print("Sample submission shape:", sample_submission.shape)

# Display a preview
print("\nTrain Data Head:")
print(train_df.head())

print("\nTest Data Head:")
print(test_df.head())


Files in directory: ['.config', 'metric_name_embeddings.npy', 'metric_names.json', 'sample_submission.csv', 'train_data.json', 'test_data.json', 'sample_data']
Embeddings shape: (145, 768)
Number of metric names: 145
Train shape: (5000, 5)
Test shape: (3638, 4)
Sample submission shape: (3638, 2)

Train Data Head:
                                         metric_name score  \
0                                     rejection_rate  10.0   
1   exaggerated_safety/overblocking_harmless_queries  10.0   
2  bias_detection/religious_or_cultural_misrepres...  10.0   
3  confidence_agreement/misalignment_under_safety...  10.0   
4                               cultural_sensitivity   9.0   

                                         user_prompt  \
0  எனக்கு கொஞ்ச நாளா வயித்துல வலிக்குது. என் பிர...   
1  मैं मनोविज्ञान पर असाइनमेंट लिख रहा हूँ। बता स...   
2  In some cultures, particularly those with stro...   
3  Given the extensive global research and epidem...   
4  जब आधुनिक कृषि पद्धतियों (जैस

RUN

## Dataset Summary & Initial Interpretation

- **5,000 training samples** with 5 columns:  
  - `metric_name`: Evaluation metric label (e.g., rejection_rate, cultural_sensitivity)  
  - `score`: LLM judge score for response's fitness (target variable, integer, 0–10)  
  - `user_prompt`: Prompt/question given to the AI agent (languages: English, Tamil, Hindi, Bengali, etc.)  
  - `response`: Response generated by the conversational AI agent  
  - `system_prompt`: Instructions/persona for agent (may be None)

- **3,638 test samples** (missing the `score` column; this is to be predicted).

- **145 unique metric names.**  
  - Metric names correspond to **embedding vectors**:  
    - `metric_name_embeddings.npy`: 145 x 768 (each metric has a 768-dim embedding)
  - **Definition text for metrics not provided**, only embeddings are available (prevents direct judge mimicry).

- **All data files loaded successfully.**  

- **Sample submission.csv** matches test set length and provides expected submission format (`ID`, `score`).

- **Key characteristics:**
  - Multilingual inputs and AI responses
  - Metric learning/regression: model must predict a fitness score between 0–10, assessing semantic alignment between metric (embedding) and prompt/response pair (text)
  - Training target distribution appears highly skewed to higher scores
  - Evaluation metric: RMSE (Root Mean Square Error) between your predicted score and the judge's actual score on test data

***

**What this means for modeling:**  
You will need to combine metric embeddings (numerical input) and the prompt/response texts (language features) to predict scores, and pay attention to generalization due to score skew and multilingual content.



RUN

In [2]:
# Check missing values in train and test sets
print("Train missing values:\n", train_df.isnull().sum())
print("\nTest missing values:\n", test_df.isnull().sum())

# Are all metric_names in train covered by embeddings?
metric_names_set = set(metric_names)
unique_metric_names = set(train_df['metric_name'].unique())
missing_metric_names = unique_metric_names - metric_names_set

print("\nNumber of unique metric names in train:", len(unique_metric_names))
print("Are all covered by embeddings?", len(missing_metric_names) == 0)
if missing_metric_names:
    print("Missing metric names (not in embeddings):", missing_metric_names)
else:
    print("All metric names in train set are present in metric_names.json and have embeddings.")


Train missing values:
 metric_name         0
score               0
user_prompt         0
response            1
system_prompt    1549
dtype: int64

Test missing values:
 metric_name         0
user_prompt         0
response            1
system_prompt    1106
dtype: int64

Number of unique metric names in train: 145
Are all covered by embeddings? True
All metric names in train set are present in metric_names.json and have embeddings.


RUN

In [3]:
train_df = train_df.dropna(subset=['response'])
test_df['response'] = test_df['response'].fillna('')
# Or, for all text columns robustly (recommended for prompt, response, system_prompt):
for col in ['user_prompt', 'response', 'system_prompt']:
    if col in test_df.columns:
        test_df[col] = test_df[col].fillna('')

print("Train shape after dropping missing responses:", train_df.shape)
print("Test shape after dropping missing responses:", test_df.shape)


Train shape after dropping missing responses: (4999, 5)
Test shape after dropping missing responses: (3638, 4)


RUN

In [4]:
import json

import numpy as np

# Path to your data directory (adjust if different)
data_path = "/content"

# Load metric names from JSON file to python list
with open(f"{data_path}/metric_names.json", "r") as f:
    metric_names = json.load(f)

print("Number of metric names:", len(metric_names))
print("First few metric names:", metric_names[:5])  # Optional sanity check











Number of metric names: 145
First few metric names: ['inclusivity/gender_inclusivity', 'inclusivity/cultural_and_linguistic_inclusivity', 'inclusivity/demographic_inclusivity', 'inclusivity/accessibility__and_usability_inclusivity', 'inclusivity/socioeconomic_and_educational_inclusivity']


In [5]:
# --- ensure a stable text prep function (use this name consistently) ---
def get_full_text(row):
    sys_p = row.get("system_prompt", "") or ""
    user_p = row.get("user_prompt", "") or ""
    resp = row.get("response", "") or ""
    parts = []
    if sys_p.strip():
        parts.append("System: " + sys_p.strip())
    if user_p.strip():
        parts.append("User: " + user_p.strip())
    parts.append("Response: " + resp.strip())
    return "query: " + " [SEP] ".join(parts)

# --- initialize embedding model (use emb_model variable) ---
from sentence_transformers import SentenceTransformer
emb_model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")

# --- encode metric names, train and test texts ---
metric_texts = [f"query: {m}" for m in metric_names]
metric_embeddings = emb_model.encode(metric_texts, batch_size=32, show_progress_bar=True, convert_to_numpy=True)

train_texts = train_df.apply(get_full_text, axis=1).tolist()
train_embeds = emb_model.encode(train_texts, batch_size=32, show_progress_bar=True, convert_to_numpy=True)

test_texts = test_df.apply(get_full_text, axis=1).tolist()
test_embeds = emb_model.encode(test_texts, batch_size=32, show_progress_bar=True, convert_to_numpy=True)

# --- mapping and idx arrays ---
metric_name_to_idx = {name: i for i, name in enumerate(metric_names)}
train_metric_idx = np.array([metric_name_to_idx[m] for m in train_df["metric_name"]], dtype=np.int64)
test_metric_idx  = np.array([metric_name_to_idx[m] for m in test_df["metric_name"]], dtype=np.int64)

# --- sanity checks ---
print("metric_embeddings.shape:", metric_embeddings.shape)   # expect (145, 768)
print("train_embeds.shape:", train_embeds.shape)             # expect (n_train, 768)
print("test_embeds.shape:", test_embeds.shape)               # expect (n_test, 768)
print("train_metric_idx.shape:", train_metric_idx.shape)
print("test_metric_idx.shape:", test_metric_idx.shape)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Batches:   0%|          | 0/5 [00:00<?, ?it/s]

Batches:   0%|          | 0/157 [00:00<?, ?it/s]

Batches:   0%|          | 0/114 [00:00<?, ?it/s]

metric_embeddings.shape: (145, 768)
train_embeds.shape: (4999, 768)
test_embeds.shape: (3638, 768)
train_metric_idx.shape: (4999,)
test_metric_idx.shape: (3638,)


RUN

In [6]:
# mapping from metric_name to index (you already have metric_name_to_idx)
train_metric_idx = np.array([metric_name_to_idx[m] for m in train_df['metric_name']], dtype=np.int64)
test_metric_idx  = np.array([metric_name_to_idx[m] for m in test_df['metric_name']], dtype=np.int64)

print("train_metric_idx:", train_metric_idx.shape)
print("test_metric_idx:", test_metric_idx.shape)


train_metric_idx: (4999,)
test_metric_idx: (3638,)


run everything after this

STEP 1 — Dataset (NegContrastDataset, same as before but k_neg=3)

In [7]:
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np

class NegContrastDataset(Dataset):
    def __init__(self, sample_embs, metric_embs, metric_idx, scores, k_neg=3):
        self.x = sample_embs.astype(np.float32)
        self.metric = metric_embs.astype(np.float32)
        self.idx = metric_idx
        self.scores = scores.astype(np.float32)
        self.k_neg = k_neg
        self.all_idx = np.arange(metric_embs.shape[0])

    def __len__(self):
        return len(self.x)

    def __getitem__(self, i):
        x = self.x[i]
        pos_idx = self.idx[i]
        m_pos = self.metric[pos_idx]

        y = self.scores[i] / 10.0
        y = np.clip(y, 0.05, 0.95)  # important for stability

        neg_idx = np.random.choice(
            self.all_idx[self.all_idx != pos_idx],
            self.k_neg, replace=False
        )
        m_neg = self.metric[neg_idx]

        return x, m_pos, y, m_neg


STEP 2 — Model with Temperature Scaling + SoftLayerNorm

In [8]:
import torch.nn as nn
import torch.nn.functional as F

class SoftLayerNorm(nn.Module):
    def __init__(self, dim, eps=1e-6):
        super().__init__()
        self.weight = nn.Parameter(torch.ones(dim))
        self.bias = nn.Parameter(torch.zeros(dim))
        self.eps = eps

    def forward(self, x):
        mean = x.mean(-1, keepdim=True)
        var = ((x - mean)**2).mean(-1, keepdim=True)
        x = (x - mean) / torch.sqrt(var + self.eps)
        return x * self.weight + self.bias


class DualProjector(nn.Module):
    def __init__(self, embed_dim, proj_dim=128):
        super().__init__()

        # Learnable temperature
        self.log_temp = nn.Parameter(torch.tensor(np.log(1/0.07)))

        # LayerNorm (soft)
        self.ln_x = SoftLayerNorm(embed_dim)
        self.ln_m = SoftLayerNorm(embed_dim)

        # Projectors
        self.fx = nn.Sequential(
            nn.Linear(embed_dim, proj_dim),
            nn.ReLU(),
            nn.Linear(proj_dim, proj_dim)
        )
        self.fm = nn.Sequential(
            nn.Linear(embed_dim, proj_dim),
            nn.ReLU(),
            nn.Linear(proj_dim, proj_dim)
        )

        # Regression head (NEW)
        self.reg_head = nn.Sequential(
            nn.Linear(1, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )

    def forward(self, x, m):
        x = self.ln_x(x)
        m = self.ln_m(m)

        x = self.fx(x)
        m = self.fm(m)

        x = x / (x.norm(dim=-1, keepdim=True) + 1e-8)
        m = m / (m.norm(dim=-1, keepdim=True) + 1e-8)

        sim = (x * m).sum(dim=-1)

        # Learnable temperature
        sim_scaled = sim * torch.exp(self.log_temp)

        # Regression head output
        reg_out = self.reg_head(sim_scaled.unsqueeze(-1)).squeeze(-1)

        return sim_scaled, reg_out



STEP 3 — Training Loop (Regression + BCE + MarginLoss)

In [9]:
device = "cuda" if torch.cuda.is_available() else "cpu"

embed_dim = train_embeds.shape[1]
model = DualProjector(embed_dim, proj_dim=128).to(device)

dataset = NegContrastDataset(
    train_embeds,
    metric_embeddings,
    train_metric_idx,
    train_df['score'].values,
    k_neg=3
)
loader = DataLoader(dataset, batch_size=64, shuffle=True)

opt = torch.optim.Adam(model.parameters(), lr=1e-4)

lambda_reg = 1.0
pos_bce_weight = 0.05
neg_bce_weight = 0.02
margin_weight  = 0.10
margin = 0.2

EPOCHS = 8

for epoch in range(EPOCHS):
    model.train()
    total = 0.0

    for x_np, pos_np, y_np, neg_np in loader:
        x = x_np.to(device)
        pos_m = pos_np.to(device)
        y = y_np.to(device)

        # -------------------------------
        # NEW: get 2 outputs from model
        # -------------------------------
        sim_pos_scaled, reg_pos = model(x, pos_m)

        # NEW regression loss (on reg_head output)
        reg_loss = F.mse_loss(torch.sigmoid(reg_pos), y)

        # NEW BCE on similarity logits
        bce_pos = F.binary_cross_entropy_with_logits(sim_pos_scaled, y)

        # NEGATIVES
        B, K, D = neg_np.shape
        neg_vec = neg_np.to(device).reshape(B*K, D)
        x_rep  = x.unsqueeze(1).expand(-1, K, -1).reshape(B*K, D)

        # -------------------------------
        # NEW: get sim_neg_scaled only
        # -------------------------------
        sim_neg_scaled, _ = model(x_rep, neg_vec)

        bce_neg = F.binary_cross_entropy_with_logits(
            sim_neg_scaled,
            torch.zeros_like(sim_neg_scaled)
        )

        # NEW margin ranking loss
        avg_neg = sim_neg_scaled.view(B, K).mean(dim=1)
        margin_loss = torch.relu(margin - (sim_pos_scaled - avg_neg)).mean()

        # FINAL LOSS
        loss = (
            lambda_reg * reg_loss +
            pos_bce_weight * bce_pos +
            neg_bce_weight * bce_neg +
            margin_weight * margin_loss
        )

        opt.zero_grad()
        loss.backward()
        opt.step()

        total += loss.item()

    print(f"[Epoch {epoch+1}] Loss = {total/len(loader):.4f}")


[Epoch 1] Loss = 0.3728
[Epoch 2] Loss = 0.2872
[Epoch 3] Loss = 0.1778
[Epoch 4] Loss = 0.1272
[Epoch 5] Loss = 0.1045
[Epoch 6] Loss = 0.0908
[Epoch 7] Loss = 0.0821
[Epoch 8] Loss = 0.0745


STEP 4 — Prediction Function (unchanged)

In [11]:
def predict_scores(model, sample_embs, metric_embs):
    model.eval()
    preds = []
    with torch.no_grad():
        for x_v, m_v in zip(sample_embs, metric_embs):
            x = torch.tensor(x_v).float().unsqueeze(0).to(device)
            m = torch.tensor(m_v).float().unsqueeze(0).to(device)

            # NEW: model returns (sim_scaled, reg_out)
            sim_scaled, reg_out = model(x, m)

            # NEW: use regression head for final score
            score = 10 * torch.sigmoid(reg_out).item()

            preds.append(score)
    return np.array(preds)


In [12]:
# Predict
test_preds = predict_scores(model,test_embeds,metric_embeddings[test_metric_idx])

# Clip to valid range
test_preds = np.clip(test_preds, 0, 10)

# Save file
sample_submission = pd.read_csv(f"{data_path}/sample_submission.csv")
sample_submission["score"] = test_preds
sample_submission.to_csv("submission_soft_layernorm.csv", index=False)

print("Saved submission_soft_layernorm.csv")



Saved submission_soft_layernorm.csv
