In [1]:
!pip install -q transformers datasets torch scikit-learn accelerate peft

In [2]:
# importing necessary libraries
import os
import torch
import numpy as np
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModel
from sklearn.metrics import accuracy_score, f1_score
from torch import nn
from torch.utils.data import DataLoader
from torch.optim import AdamW

In [3]:
# Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cuda


In [4]:
# load dataset
ds = load_dataset("McGill-NLP/stereoset", "intersentence")

README.md: 0.00B [00:00, ?B/s]

intersentence/validation-00000-of-00001.(…):   0%|          | 0.00/687k [00:00<?, ?B/s]

Generating validation split:   0%|          | 0/2123 [00:00<?, ? examples/s]

##Exploring the dataset

In [5]:
# Print the keys to see available splits in the DatasetDict
print(ds.keys())

dict_keys(['validation'])


In [6]:
# Access the 'validation' split
train_ds = ds['validation']

# Print the features (schema) of the split
print("Features of the 'validation' split:")
print(train_ds.features)

# Display the first few examples from the split
print("\nFirst 5 examples from the 'validation' split:")
import pandas as pd
display(pd.DataFrame(train_ds[:5]))

Features of the 'validation' split:
{'id': Value('string'), 'target': Value('string'), 'bias_type': Value('string'), 'context': Value('string'), 'sentences': {'sentence': List(Value('string')), 'id': List(Value('string')), 'labels': List({'label': List(ClassLabel(names=['anti-stereotype', 'stereotype', 'unrelated', 'related'])), 'human_id': List(Value('string'))}), 'gold_label': List(ClassLabel(names=['anti-stereotype', 'stereotype', 'unrelated']))}}

First 5 examples from the 'validation' split:


Unnamed: 0,id,target,bias_type,context,sentences
0,bb7a8bd19a8cfdf1381f60715adfdbb5,Ethiopia,race,Many people live in Ethiopia.,{'sentence': ['The people are fat and unathlet...
1,978bf5e0a11d3496ad4396918aff2dc1,Hispanic,race,My professor is a hispanic man,"{'sentence': ['He came here illegally', 'He is..."
2,75832d47f97f8fea124abf8734755d89,schoolgirl,gender,The schoolgirl is walking down the street.,{'sentence': ['The wind is blowing at 80 mph.'...
3,a26357229740125e61c4f6598bfba219,Cape Verde,race,We'll likely fly over Cape Verde on our flight...,{'sentence': ['The people that live there are ...
4,fcfb1ceefb702d51e39ea11a00ea9dfa,Russian,race,I know many people who are Russian.,"{'sentence': ['They love their vodka!', 'Last ..."


## Data preprocessing

> Given an LLM-style output sentence + context, predict whether it is
stereotypical, anti-stereotypical, or unrelated

> Each candidate sentence is treated as an independent classification instance, with labels derived from StereoSet’s gold annotations.



In [7]:
texts = []
labels = []
bias_types = []

for ex in ds["validation"]:
    context = ex["context"]
    bias_type = ex["bias_type"]

    sentences = ex["sentences"]["sentence"]
    gold_labels = ex["sentences"]["gold_label"]

    for sent, label in zip(sentences, gold_labels):
        combined = context + " <sep> " + sent
        texts.append(combined)
        labels.append(label)
        bias_types.append(bias_type)

print("Total samples:", len(texts))

Total samples: 6369


In [8]:
from collections import Counter

print("Label distribution:", Counter(labels))
print("Bias types:", Counter(bias_types))

Label distribution: Counter({0: 2123, 1: 2123, 2: 2123})
Bias types: Counter({'race': 2928, 'profession': 2481, 'gender': 726, 'religion': 234})


## Splitting the dataset into train/val/test

In [9]:
from sklearn.model_selection import train_test_split

X_temp, X_test, y_temp, y_test = train_test_split(
    texts,
    labels,
    test_size=0.15,
    random_state=42,
    stratify=labels
)

X_train, X_val, y_train, y_val = train_test_split(
    X_temp,
    y_temp,
    test_size=0.176,
    random_state=42,
    stratify=y_temp
)

print(f"Train: {len(X_train)} | Val: {len(X_val)} | Test: {len(X_test)}")

Train: 4460 | Val: 953 | Test: 956


## Tokenization

In [10]:
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(
    "sentence-transformers/all-MiniLM-L6-v2")


tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

In [11]:
# Tokenization process
def tokenize(texts):
    return tokenizer(
        texts,
        truncation=True,
        padding=True,
        max_length=128,
        return_tensors="pt"
    )

train_enc = tokenize(X_train)
val_enc   = tokenize(X_val)
test_enc  = tokenize(X_test)

##Dataset class
> The dataset class is a custom PyTorch Dataset, whose job is simply:

-hold tokenized tensors (input_ids, attention_mask)

-hold labels

-return one sample at a time to the DataLoader

In [12]:
from torch.utils.data import Dataset

class TextDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = torch.tensor(labels)

    def __getitem__(self, idx):
        item = {k: v[idx] for k, v in self.encodings.items()}
        item["labels"] = self.labels[idx]
        return item

    def __len__(self):
        return len(self.labels)

train_ds = TextDataset(train_enc, y_train)
val_ds   = TextDataset(val_enc, y_val)
test_ds  = TextDataset(test_enc, y_test)

## Data Loaders

In [13]:
train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)
val_loader   = DataLoader(val_ds, batch_size=32)
test_loader  = DataLoader(test_ds, batch_size=32)

# Model training

In [14]:
# Load MiniLM and freeze encoder
encoder = AutoModel.from_pretrained(
    "sentence-transformers/all-MiniLM-L6-v2"
)

for param in encoder.parameters():
    param.requires_grad = False

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

In [15]:
# Classification model
import torch.nn as nn
import torch.nn.functional as F

class FrozenMiniLMClassifier(nn.Module):
    def __init__(self, base_model, hidden_size=384, num_labels=3):
        super().__init__()
        self.encoder = base_model

        for param in self.encoder.parameters():
            param.requires_grad = False

        self.classifier = nn.Linear(hidden_size, num_labels)

    def forward(
        self,
        input_ids,
        attention_mask,
        token_type_ids=None,  # IMPORTANT
        labels=None
    ):
        outputs = self.encoder(
            input_ids=input_ids,
            attention_mask=attention_mask
        )

        pooled = outputs.last_hidden_state[:, 0]  # CLS token
        logits = self.classifier(pooled)

        loss = None
        if labels is not None:
            loss = F.cross_entropy(logits, labels)

        return loss, logits


In [16]:
# Initialize model
model = FrozenMiniLMClassifier(encoder).to(device)

In [17]:
# Optimizer
optimizer = AdamW(model.parameters(), lr=2e-4)

In [18]:
# Training model for 5 epochs
for epoch in range(5):
    model.train()
    total_loss = 0

    for batch in train_loader:
        batch = {k: v.to(device) for k, v in batch.items()}
        loss, _ = model(**batch)

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        total_loss += loss.item()

    avg_loss = total_loss / len(train_loader)
    print(f"Epoch {epoch+1} | Train Loss: {avg_loss:.4f}")

Epoch 1 | Train Loss: 1.0902
Epoch 2 | Train Loss: 1.0581
Epoch 3 | Train Loss: 1.0342
Epoch 4 | Train Loss: 1.0166
Epoch 5 | Train Loss: 1.0019


In [19]:
# Evaluation
model.eval()
preds, true = [], []

with torch.no_grad():
    for batch in test_loader:
        labels_batch = batch["labels"]
        batch = {k: v.to(device) for k, v in batch.items() if k != "labels"}

        _, logits = model(**batch)
        predictions = torch.argmax(logits, dim=1)

        preds.extend(predictions.cpu().numpy())
        true.extend(labels_batch.numpy())

print("Accuracy:", accuracy_score(true, preds))
print("Macro F1:", f1_score(true, preds, average="macro"))

Accuracy: 0.5428870292887029
Macro F1: 0.5421418709870621


## Save and download model

In [20]:
import os

os.makedirs("frozen_minilm", exist_ok=True)

torch.save(model.state_dict(), "frozen_minilm/model.pt")
tokenizer.save_pretrained("frozen_minilm")

print("Frozen MiniLM model saved.")

Frozen MiniLM model saved.


In [21]:
!zip -r frozen_minilm.zip frozen_minilm

  adding: frozen_minilm/ (stored 0%)
  adding: frozen_minilm/tokenizer_config.json (deflated 73%)
  adding: frozen_minilm/special_tokens_map.json (deflated 80%)
  adding: frozen_minilm/model.pt (deflated 9%)
  adding: frozen_minilm/tokenizer.json (deflated 71%)
  adding: frozen_minilm/vocab.txt (deflated 53%)


In [22]:
from google.colab import files
files.download("frozen_minilm.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>