In [None]:
!pip -q install sentence-transformers


In [None]:
!nvidia-smi


Sat Jan 10 15:57:39 2026       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   51C    P8             10W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [None]:
!ls

dev.json  sample_data  train.json


In [None]:
%%writefile sbert_rank_task5.py
import argparse
import json
import random
from collections import defaultdict

import numpy as np
import torch
from scipy.stats import spearmanr

from sentence_transformers import SentenceTransformer, InputExample, losses
from torch.utils.data import DataLoader


def set_seed(seed: int):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)


def load_json(path: str):
    with open(path, "r", encoding="utf-8") as f:
        return json.load(f)


def story_key(sample):
    return (
        sample["precontext"].strip(),
        sample["sentence"].strip(),
        sample["ending"].strip(),
    )


def build_story_text(s):
    ending = s["ending"].strip()
    if ending == "":
        ending = "[NO ENDING]"
    return f"{s['precontext'].strip()} {s['sentence'].strip()} {ending}"


def build_sense_text(s):
    ex = s.get("example_sentence", "").strip()
    if ex:
        return f"{s['judged_meaning'].strip()}. {ex}"
    return s["judged_meaning"].strip()


def build_pairs(samples):
    """
    Create training pairs: same story, two senses.
    Label = 1 if (story, senseA) should be closer than (story, senseB)
    We implement this as a ranking loss using MultipleNegativesRankingLoss:
      anchor = story
      positive = higher-rated sense
      negatives = other senses from batch
    """
    grouped = defaultdict(list)
    for s in samples:
        grouped[story_key(s)].append(s)

    examples = []
    skipped = 0
    for _, items in grouped.items():
        if len(items) < 2:
            skipped += 1
            continue

        items = sorted(items, key=lambda x: float(x["average"]), reverse=True)
        a, b = items[0], items[1]

        if float(a["average"]) == float(b["average"]):
            # no ranking signal
            continue

        story = build_story_text(a)
        pos = build_sense_text(a)   # higher avg
        # We only need (anchor, positive) for MultipleNegativesRankingLoss
        examples.append(InputExample(texts=[story, pos]))

    return examples, skipped


@torch.no_grad()
def eval_spearman(model: SentenceTransformer, samples, batch_size=64):
    stories = [build_story_text(s) for s in samples]
    senses = [build_sense_text(s) for s in samples]

    emb_story = model.encode(stories, batch_size=batch_size, normalize_embeddings=True, convert_to_numpy=True)
    emb_sense = model.encode(senses, batch_size=batch_size, normalize_embeddings=True, convert_to_numpy=True)

    cos = (emb_story * emb_sense).sum(axis=1)
    # Map cosine to [1,5] (rough)
    preds = 1.0 + 4.0 * np.clip(cos, 0.0, 1.0)
    gold = np.array([float(s["average"]) for s in samples], dtype=float)

    sp = spearmanr(preds, gold).correlation
    return float(sp), preds


def acc_within_sd(preds, samples):
    correct = 0
    for p, s in zip(preds, samples):
        gold = float(s["average"])
        sd = float(s["stdev"])
        if abs(float(p) - gold) <= max(1.0, sd):
            correct += 1
    return correct / len(samples)


def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("--train", default="train.json")
    ap.add_argument("--dev", default="dev.json")
    ap.add_argument("--model", default="sentence-transformers/all-MiniLM-L6-v2")
    ap.add_argument("--epochs", type=int, default=3)
    ap.add_argument("--batch_size", type=int, default=32)
    ap.add_argument("--lr", type=float, default=2e-5)
    ap.add_argument("--seed", type=int, default=42)
    ap.add_argument("--out_dir", default="sbert_out")
    args = ap.parse_args()

    set_seed(args.seed)

    device = "cuda" if torch.cuda.is_available() else "cpu"
    print("Device:", device)

    train_data = list(load_json(args.train).values())
    dev_data = list(load_json(args.dev).values())

    train_examples, skipped = build_pairs(train_data)
    print(f"Train rows: {len(train_data)} | Train examples: {len(train_examples)} | skipped groups: {skipped}")

    model = SentenceTransformer(args.model, device=device)

    train_loader = DataLoader(train_examples, shuffle=True, batch_size=args.batch_size, drop_last=True)

    # Ranking-style loss: story should be close to correct sense vs other senses in batch
    train_loss = losses.MultipleNegativesRankingLoss(model)

    # Evaluate before training
    sp0, preds0 = eval_spearman(model, dev_data)
    acc0 = acc_within_sd(preds0, dev_data)
    print(f"[BEFORE] Spearman={sp0:.4f} | Acc@SD={acc0:.4f}")

    warmup_steps = int(len(train_loader) * args.epochs * 0.1)

    model.fit(
        train_objectives=[(train_loader, train_loss)],
        epochs=args.epochs,
        warmup_steps=warmup_steps,
        optimizer_params={"lr": args.lr},
        output_path=args.out_dir,
        show_progress_bar=True
    )

    # Load best saved model
    best_model = SentenceTransformer(args.out_dir, device=device)
    sp, preds = eval_spearman(best_model, dev_data)
    acc = acc_within_sd(preds, dev_data)
    print(f"[FINAL] Spearman={sp:.6f} | Acc@SD={acc:.6f}")


if __name__ == "__main__":
    main()


Overwriting sbert_rank_task5.py


In [None]:
!python sbert_rank_task5.py --train train.json --dev dev.json --epochs 5 --batch_size 32


2026-01-10 16:05:49.392485: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1768061149.427979    2608 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1768061149.438504    2608 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1768061149.464670    2608 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1768061149.464710    2608 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1768061149.464719    2608 computation_placer.cc:177] computation placer alr

In [None]:
!pip -q install -U sentence-transformers scipy scikit-learn

import json
import numpy as np
from scipy.stats import spearmanr

import torch
from torch.utils.data import DataLoader

from sentence_transformers import SentenceTransformer, InputExample
from sentence_transformers.losses import CosineSimilarityLoss

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", DEVICE)

TRAIN_PATH = "train.json"
DEV_PATH   = "dev.json"

def load_json(path):
    with open(path, "r", encoding="utf-8") as f:
        return json.load(f)

def build_story_text(s):
    ending = s.get("ending", "").strip()
    if ending == "":
        ending = "[NO ENDING]"
    return f"{s['precontext'].strip()} {s['sentence'].strip()} {ending}"

def build_sense_text(s):
    ex = s.get("example_sentence", "").strip()
    if ex:
        return f"{s['judged_meaning'].strip()}. {ex}"
    return s["judged_meaning"].strip()

def scale_1to5_to_0to1(y):
    # cosine similarity loss expects targets in [-1,1] usually, but [0,1] works well for normalized embeddings
    y = float(y)
    return (y - 1.0) / 4.0

def accuracy_within_sd(preds, samples):
    correct = 0
    for p, s in zip(preds, samples):
        gold = float(s["average"])
        sd = float(s["stdev"])
        if abs(p - gold) <= max(1.0, sd):
            correct += 1
    return correct / len(samples)

@torch.no_grad()
def evaluate(model, dev_samples, batch_size=64):
    stories = [build_story_text(s) for s in dev_samples]
    senses  = [build_sense_text(s) for s in dev_samples]

    story_emb = model.encode(stories, batch_size=batch_size, convert_to_tensor=True,
                             normalize_embeddings=True, show_progress_bar=False)
    sense_emb = model.encode(senses, batch_size=batch_size, convert_to_tensor=True,
                             normalize_embeddings=True, show_progress_bar=False)

    cos = (story_emb * sense_emb).sum(dim=1).cpu().numpy()

    # map cosine in [-1,1] to 1..5
    preds = 1.0 + 4.0 * ((cos + 1.0) / 2.0)
    gold  = np.array([float(s["average"]) for s in dev_samples])

    sp = spearmanr(preds, gold).correlation
    acc = accuracy_within_sd(preds, dev_samples)
    return float(sp), float(acc)

train_data = load_json(TRAIN_PATH)
dev_data   = load_json(DEV_PATH)

train_samples = list(train_data.values())
dev_samples   = list(dev_data.values())

train_examples = [
    InputExample(
        texts=[build_story_text(s), build_sense_text(s)],
        label=scale_1to5_to_0to1(s["average"])
    )
    for s in train_samples
]

model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", device=DEVICE)

train_loader = DataLoader(train_examples, shuffle=True, batch_size=32)
train_loss = CosineSimilarityLoss(model)

# Before training
sp0, acc0 = evaluate(model, dev_samples)
print(f"[BEFORE] Spearman={sp0:.4f} | Acc@SD={acc0:.4f}")

# Train
model.fit(
    train_objectives=[(train_loader, train_loss)],
    epochs=5,
    warmup_steps=100,
    show_progress_bar=True
)

# After training
sp, acc = evaluate(model, dev_samples)
print(f"[FINAL] Spearman={sp:.6f} | Acc@SD={acc:.6f}")


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/8.9 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.9/8.9 MB[0m [31m28.2 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━[0m [32m7.5/8.9 MB[0m [31m110.0 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.9/8.9 MB[0m [31m86.7 MB/s[0m eta [36m0:00:00[0m
[?25hDevice: cuda


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


[BEFORE] Spearman=0.2385 | Acc@SD=0.5884


Computing widget examples:   0%|          | 0/1 [00:00<?, ?example/s]

  | |_| | '_ \/ _` / _` |  _/ -_)
[34m[1mwandb[0m: (1) Create a W&B account
[34m[1mwandb[0m: (2) Use an existing W&B account
[34m[1mwandb[0m: (3) Don't visualize my results
[34m[1mwandb[0m: Enter your choice:

 3


[34m[1mwandb[0m: You chose "Don't visualize my results"


Step,Training Loss


[FINAL] Spearman=0.447844 | Acc@SD=0.591837
