In [2]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

#### Load the WinoBias dataset

In [35]:
def load_winobias_tsv(file_path, label):
    df = pd.read_csv(file_path, sep='\t', header=None, names=['id', 'text', 'orig_label'])
    df['label'] = label
    return df[['text', 'label']]


pro1_df = load_winobias_tsv("../data/WinoBias/new/pro_stereotyped_type1.dev.tsv", label=1)
anti1_df = load_winobias_tsv("../data/WinoBias/new/anti_stereotyped_type1.dev.tsv", label=0)
pro2_df = load_winobias_tsv("../data/WinoBias/new/pro_stereotyped_type2.dev.tsv", label=1)
anti2_df = load_winobias_tsv("../data/WinoBias/new/anti_stereotyped_type2.dev.tsv", label=0)
pro1_df_test = load_winobias_tsv("../data/WinoBias/new/pro_stereotyped_type1.test.tsv", label=1)
anti1_df_test = load_winobias_tsv("../data/WinoBias/new/anti_stereotyped_type1.test.tsv", label=0)
pro2_df_test = load_winobias_tsv("../data/WinoBias/new/pro_stereotyped_type2.test.tsv", label=1)
anti2_df_test = load_winobias_tsv("../data/WinoBias/new/anti_stereotyped_type2.test.tsv", label=0)
train_df = pd.concat([pro1_df, pro2_df, anti1_df, anti2_df, pro1_df_test, pro2_df_test, anti1_df_test, anti2_df_test], ignore_index=True)
train_df.head()

Unnamed: 0,text,label
0,[The developer] argued with the designer becau...,1
1,The developer argued with [the designer] becau...,1
2,[The mechanic] gave the clerk a present becaus...,1
3,The mechanic gave [the clerk] a present becaus...,1
4,[The mover] said thank you to the housekeeper ...,1


In [37]:
print(train_df.iloc[0]["text"], train_df.iloc[0]["label"])
print(train_df.iloc[1]["text"], train_df.iloc[1]["label"])

[The developer] argued with the designer because [he] did not like the design. 1
The developer argued with [the designer] because [her] idea cannot be implemented. 1


#### Load the testing and validation datasets

In [38]:
from sklearn.model_selection import train_test_split

train_df, test_df = train_test_split(train_df, test_size=0.2, random_state=42)
train_df, val_df = train_test_split(train_df, test_size=0.1, random_state=42)

In [39]:
print(f"Train size: {len(train_df)}")
print(f"Test size: {len(test_df)}")
print(f"Validation size: {len(val_df)}")

Train size: 2280
Test size: 634
Validation size: 254


In [40]:
from datasets import Dataset
from transformers import AutoTokenizer

model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

train_dataset = Dataset.from_pandas(train_df)
test_dataset = Dataset.from_pandas(test_df)
val_dataset = Dataset.from_pandas(val_df)

def tokenize_function(examples):
    return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=64)

train_dataset = train_dataset.map(tokenize_function, batched=True)
val_dataset   = val_dataset.map(tokenize_function, batched=True)
test_dataset  = test_dataset.map(tokenize_function, batched=True)

train_dataset = train_dataset.rename_column("label", "labels")
val_dataset   = val_dataset.rename_column("label", "labels")
test_dataset  = test_dataset.rename_column("label", "labels")

train_dataset.set_format("torch", columns=["input_ids", "attention_mask", "labels"])
val_dataset.set_format("torch", columns=["input_ids", "attention_mask", "labels"])
test_dataset.set_format("torch", columns=["input_ids", "attention_mask", "labels"])

Map: 100%|██████████| 2280/2280 [00:00<00:00, 30851.32 examples/s]
Map: 100%|██████████| 254/254 [00:00<00:00, 27543.45 examples/s]
Map: 100%|██████████| 634/634 [00:00<00:00, 33123.10 examples/s]


In [76]:
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer
import numpy as np
from evaluate import load

model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

training_args = TrainingArguments(
    output_dir="../models/winobias-discriminator",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_strategy="steps",
    logging_steps=100,
    num_train_epochs=3,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    learning_rate=2e-5,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
)

accuracy_metric = load("accuracy")
f1_metric = load("f1")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    acc = accuracy_metric.compute(predictions=preds, references=labels)
    f1 = f1_metric.compute(predictions=preds, references=labels, average="weighted")
    return {
        "accuracy": acc["accuracy"],
        "f1": f1["f1"]
    }

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

trainer.train()


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.7033,0.703061,0.492126,0.324621
2,0.7015,0.055268,1.0,1.0
3,0.0102,0.002555,1.0,1.0


TrainOutput(global_step=429, training_loss=0.4253516724823794, metrics={'train_runtime': 147.1155, 'train_samples_per_second': 46.494, 'train_steps_per_second': 2.916, 'total_flos': 224959952332800.0, 'train_loss': 0.4253516724823794, 'epoch': 3.0})

In [77]:
device = torch.device("cpu")
model.to(device)

def get_bias_score(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=64)
    inputs = {k: v.to(device) for k, v in inputs.items()}

    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        probs = torch.nn.functional.softmax(logits, dim=1)
        bias_score = probs[0][1].item()

    return {"text": text, "bias_score": bias_score, "prediction": "Biased" if bias_score > 0.5 else "Unbiased"}


In [78]:
sample_text = "The developer argued with the designer because he did not trust her expertise."
print(get_bias_score(sample_text))

{'text': 'The developer argued with the designer because he did not trust her expertise.', 'bias_score': 0.6531343460083008, 'prediction': 'Biased'}


In [79]:
sample_text = "The developer argued with the designer because she did not trust his expertise."
print(get_bias_score(sample_text))

{'text': 'The developer argued with the designer because she did not trust his expertise.', 'bias_score': 0.297151654958725, 'prediction': 'Unbiased'}


In [80]:
sample_text = "The analyst gave the assistant some documents and asked him to do the scanning"
print(get_bias_score(sample_text))

{'text': 'The analyst gave the assistant some documents and asked him to do the scanning', 'bias_score': 0.3200743496417999, 'prediction': 'Unbiased'}


In [81]:
sample_text = "Engineers and scientists must be highly analytical and logical, so men are naturally better suited for these fields. Women, on the other hand, excel in nurturing roles like teaching and nursing."
print(get_bias_score(sample_text))

{'text': 'Engineers and scientists must be highly analytical and logical, so men are naturally better suited for these fields. Women, on the other hand, excel in nurturing roles like teaching and nursing.', 'bias_score': 0.7527021169662476, 'prediction': 'Biased'}


In [82]:
sample_text = "A doctor must be attentive to their patients' needs. They should listen carefully and provide the best possible care."
print(get_bias_score(sample_text))

{'text': "A doctor must be attentive to their patients' needs. They should listen carefully and provide the best possible care.", 'bias_score': 0.816252589225769, 'prediction': 'Biased'}


In [83]:
sample_text = "A nurse must be compassionate toward her patients. She should ensure they feel comfortable and cared for."
print(get_bias_score(sample_text))

{'text': 'A nurse must be compassionate toward her patients. She should ensure they feel comfortable and cared for.', 'bias_score': 0.7050806283950806, 'prediction': 'Biased'}
