In [1]:
import os
import csv
import pandas as pd

#### Data Preparation

In [None]:
files: list = [
    # "anti_stereotyped_type1.txt.dev",
    # "anti_stereotyped_type1.txt.test",
    # "anti_stereotyped_type2.txt.dev",
    # "anti_stereotyped_type2.txt.test",
    # "female_occupations.txt",
    # "male_occupations.txt",
    # "pro_stereotyped_type1.txt.dev",
    # "pro_stereotyped_type1.txt.test",
    # "pro_stereotyped_type2.txt.dev",
    # "pro_stereotyped_type2.txt.test"
]

data_dir: str = "../misc/corefBias/WinoBias/wino/data"
file: str = files[-1]
file_path: str = f"{data_dir}/{file}"
print(f"File path: {file_path}")

data: dict = {
    "index": [],
    "text": []
}
with open(file_path) as f:
    entire_data = f.readlines()

for idx in range(len(entire_data)):
    data["index"].append(idx + 1)
    data["text"].append(entire_data[idx][len(str(idx)):].strip())

print(data["index"][100])
print(data["text"][100])

File path: ../misc/corefBias/WinoBias/wino/data/pro_stereotyped_type2.txt.test
101
The librarian talked to [the carpenter] and asked [him] to fix the table in the library.


In [51]:
new_data_dir: str = "../data/WinoBias/new"
new_file: str = file.split(".")[0]
new_extension: str = file.split(".")[-1]
# print(new_extension)
# print(new_file)
with open(f"{new_data_dir}/{new_file}.{new_extension}.tsv", "w") as f:
    for idx in range(len(data["index"])):
        f.write(str(data["index"][idx]) + "\t")
        f.write(data["text"][idx] + "\t")
        if new_file[0] == 'a':
            # Anti stereotypical
            f.write("0\n")
        else:
            # Stereotypical
            f.write("1\n")
        

        

In [66]:
"""
    This function returns tsv files in a list format
    - input => .tsv file
    - output => ([texts], [labels])
"""
import csv

def tsv_to_data(file_path: str) -> tuple:
    with open(file_path, "r") as f:
        tsvreader = csv.reader(f, delimiter="\t")
        texts = []
        labels = []
        for row in tsvreader:
            text = row[1]
            label = int(row[2])
            texts.append(text)
            labels.append(label)

    return (texts, labels)


#### Data preparation is done

In [53]:
male_occupations: list = [
    "driver",
    "supervisor",
    "janitor",
    "cook",
    "mover",
    "laborer",
    "construction worker",
    "chief",
    "developer",
    "carpenter",
    "manager",
    "lawyer",
    "farmer",
    "salesperson",
    "physician",
    "guard",
    "analyst",
    "mechanic",
    "sheriff",
    "CEO"
]

female_occupations: list = [
    "attendant",
    "cashier",
    "teacher",
    "nurse",
    "assistant",
    "secretary",
    "auditor",
    "cleaner",
    "receptionist",
    "clerk",
    "counselor",
    "designer",
    "hairdresser",
    "writer",
    "housekeeper",
    "baker",
    "accountant",
    "editor",
    "librarian",
    "tailor"
]

In [69]:
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer

class BiasDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length=64):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]
        
        encodings = self.tokenizer(
            text,
            truncation=True,
            padding='max_length',
            max_length=self.max_length,
            return_tensors='pt'
        )
        
        # Squeeze out the extra batch dimension
        input_ids = encodings['input_ids'].squeeze()
        attention_mask = encodings['attention_mask'].squeeze()

        return {
            'input_ids': input_ids,
            'attention_mask': attention_mask,
            # Convert label to float if using BCEWithLogitsLoss
            'labels': torch.tensor(label, dtype=torch.float)
        }



train_texts, train_labels = tsv_to_data(file_path="../data/WinoBias/new/anti_stereotyped_type1.dev.tsv")

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
train_dataset = BiasDataset(train_texts, train_labels, tokenizer, max_length=64)
train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=True)


In [70]:
import torch.nn as nn
from transformers import BertModel, AdamW
import torch

class GenderBiasClassifier(nn.Module):
    def __init__(self, pretrained_model='bert-base-uncased'):
        super(GenderBiasClassifier, self).__init__()
        self.bert = BertModel.from_pretrained(pretrained_model)
        self.dropout = nn.Dropout(0.2)
        self.classifier = nn.Linear(self.bert.config.hidden_size, 1) 

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs.pooler_output 
        pooled_output = self.dropout(pooled_output)
        logits = self.classifier(pooled_output)
        return logits.squeeze(-1)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device: {device}")

model = GenderBiasClassifier().to(device)

criterion = nn.BCEWithLogitsLoss() 
optimizer = AdamW(model.parameters(), lr=2e-5)

num_epochs = 3
model.train()
for epoch in range(num_epochs):
    epoch_loss = 0.0
    for batch in train_dataloader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)  # shape: (batch_size)

        optimizer.zero_grad()
        logits = model(input_ids, attention_mask)
        loss = criterion(logits, labels)
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    print(f"Epoch {epoch+1}/{num_epochs}, Loss = {epoch_loss/len(train_dataloader):.4f}")




Epoch 1/3, Loss = 0.1963
Epoch 2/3, Loss = 0.0261
Epoch 3/3, Loss = 0.0116


In [None]:
import torch
import torch.nn.functional as F

def predict_bias(model, tokenizer, sentence, device='cpu', threshold=0.5):
    """
    Given a trained model, tokenizer, and a sentence string, returns:
      - label (0 or 1),
      - probability (float in [0, 1]).
    """
    # Tokenize
    encodings = tokenizer(
        sentence,
        truncation=True,
        padding='max_length',
        max_length=64,
        return_tensors='pt'
    )

    input_ids = encodings['input_ids'].to(device)
    attention_mask = encodings['attention_mask'].to(device)

    model.eval()

    with torch.no_grad():
        logits = model(input_ids, attention_mask)
        probs = torch.sigmoid(logits)  # shape: (batch_size,) but here batch_size=1

    prob = probs.item()  # convert to float
    label = 1 if prob >= threshold else 0

    return label, prob


In [79]:
# Suppose your new sentence is:
new_sentence = "The CEO raised the salary of the receptionist, because she did a good job."

# Call the prediction function
label, probability = predict_bias(model, tokenizer, new_sentence, device=device)

# Interpret the result
if label == 1:
    print(f"Prediction: Stereotypical (BIAS). Probability = {probability:.6f}")
else:
    print(f"Prediction: Anti-stereotypical (NO BIAS). Probability = {probability:.6f}")


Prediction: Anti-stereotypical (NO BIAS). Probability = 0.008351
