In [7]:
from datasets import load_dataset

stereoset = load_dataset("stereoset", "intersentence")

In [None]:
from datasets import load_dataset
import pandas as pd

dataset = load_dataset("stereoset", "intersentence", split="validation")

entries = []

for example in dataset:
    if example['bias_type'] != 'race':
        continue

    sentences = example['sentences']
    for sentence, label in zip(sentences['sentence'], sentences['gold_label']):
        if label in [0, 1]:  # 0 = anti-stereotype, 1 = stereotype
            entries.append({
                "sentence": sentence,
                "label": int(label)
            })

df = pd.DataFrame(entries)
output_path = "race_bias_sentences.csv"
df.to_csv(output_path, index=False)


✅ Saved 1952 race-related sentences to race_bias_sentences.csv


In [4]:
import pandas as pd
from datasets import Dataset
from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification, Trainer, TrainingArguments

#Load  CSV 
data = pd.read_csv(r"C:\Users\shrit\Desktop\Ml_Projects\DeepRead\DeepRead\data\stereoset\race_final_data.csv") 
dataset = Dataset.from_pandas(data)

# Split  dataset 
split_dataset = dataset.train_test_split(test_size=0.2)

#Load tokenizer
tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert-base-uncased")

#tokenizing
def tokenize_function(examples):
    return tokenizer(examples["sentence"], truncation=True, padding="max_length", max_length=128)

tokenized_datasets = split_dataset.map(tokenize_function, batched=True)

#pre-trained DistilBERT model for sequence classification
model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=2)

from sklearn.metrics import accuracy_score

#accuracy function
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = logits.argmax(axis=-1)
    return {"accuracy": accuracy_score(labels, predictions)}


# training arguments
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=64,
    per_device_eval_batch_size=64,
    num_train_epochs=30,
    weight_decay=0.01,
)

# trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics, 
)


# fine-tune the model
trainer.train()

# evaluate model
results = trainer.evaluate()
print("Evaluation Results:", results)



Map:   0%|          | 0/1561 [00:00<?, ? examples/s]

Map:   0%|          | 0/391 [00:00<?, ? examples/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,No log,0.554617,0.757033
2,No log,0.504845,0.762148
3,No log,0.512485,0.764706
4,No log,0.50691,0.782609
5,No log,0.552907,0.774936
6,No log,0.575598,0.769821
7,No log,0.64515,0.764706
8,No log,0.726169,0.777494
9,No log,0.748484,0.782609
10,No log,0.836524,0.759591


Evaluation Results: {'eval_loss': 1.2932342290878296, 'eval_accuracy': 0.7698209718670077, 'eval_runtime': 0.5435, 'eval_samples_per_second': 719.358, 'eval_steps_per_second': 12.879, 'epoch': 30.0}


In [13]:
import torch

# Determine the device and move the model to it
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Prepare some new sentences for prediction
test_sentences = ["that black person is proabbly involved"]

# Tokenize the sentences
inputs = tokenizer(test_sentences, padding=True, truncation=True, max_length=128, return_tensors="pt")

# Move the input tensors to the same device as the model
inputs = {k: v.to(device) for k, v in inputs.items()}

# Get predictions (logits) from the model
outputs = model(**inputs)
logits = outputs.logits

# Convert logits to predicted labels (0 or 1)
predictions = logits.argmax(dim=-1)
print("Predictions:", predictions)


Predictions: tensor([0], device='cuda:0')


In [14]:
model.save_pretrained("./race_bias_model")
tokenizer.save_pretrained("./race_bias_tokenizer")

('./race_bias_tokenizer\\tokenizer_config.json',
 './race_bias_tokenizer\\special_tokens_map.json',
 './race_bias_tokenizer\\vocab.txt',
 './race_bias_tokenizer\\added_tokens.json',
 './race_bias_tokenizer\\tokenizer.json')