# The notebook is dedicated to fine-tuning models using only the 'Drag Race' transcript data.

Importing required libraries and modules.

In [1]:
!pip install transformers



In [2]:
import pandas as pd
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import torch

In [3]:
data = pd.read_csv('Notebook_6_7_dragrace_transcript_wrongpreds.csv')

Adding ground truth hate_label column to drag race transcript data

In [4]:
data['hate_label'] = 0

Importing required libraries and modules.

In [5]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments

# 1. Load the pre-trained model and tokenizer
model_name = "facebook/roberta-hate-speech-dynabench-r4-target"
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

Preprocessing the dataset

In [6]:
import re
import string

def preprocess_sentence(sentence):
  # no lowercasing or punctuation removal as assumed to carry semantic information
    sentence = re.sub(r'\\n', ' ', sentence)
    sentence = re.sub(r'\s+', ' ', sentence).strip()
    return sentence

# Apply the preprocess_sentence function to the 'sentences' column
data['sentences'] = data['sentences'].apply(preprocess_sentence)


Preparing the texts and labels for classification.

In [7]:
train_texts = data['sentences'].to_list()
train_labels = data['hate_label'].to_list()

Loading the dataset from Vidgen et al. (2021) from which the test dataset will be taken for evaluating the fine-tuned model's performance.

In [8]:
dataset2 = pd.read_csv('Notebook_7_Dynamically Generated Hate Dataset v0.2.3.csv')

In [9]:
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()

In [10]:
# Encode the original labels
encoded_labels = encoder.fit_transform(dataset2['label'])

# Create a mapping dictionary to reverse the labels
mapping = {'nothate': 0, 'hate': 1}

# Apply the mapping to reverse the labels
dataset2['Numeric_label'] = [mapping[label] for label in dataset2['label']]

Creating variables to store the test dataset texts and labels from Vidgen et al. (2021)

In [11]:
dataset_test = dataset2.loc[dataset2['split'] == 'test']
test_texts = dataset_test['text'][dataset_test['round.base'] == 4].values.tolist()
test_labels = dataset_test['Numeric_label'][dataset_test['round.base'] == 4].values.tolist()

Creating a custom Dataloader which will be used to process the train and test datasets in the pretrained RoBERTa model.

In [12]:
# Define a custom dataset class for hate speech detection using PyTorch
class HateSpeechDataset(torch.utils.data.Dataset):

    # Initialize the dataset object
    def __init__(self, texts, labels, tokenizer, max_len):
        # Store the list of textual samples
        self.texts = texts
        # Store the list of labels corresponding to each text sample
        self.labels = labels
        # Store the tokenizer instance which will convert text to tokens
        self.tokenizer = tokenizer
        # Store the maximum token length for sequences
        self.max_len = max_len

    # Return the total number of samples in the dataset
    def __len__(self):
        return len(self.texts)

    # Fetch and return a single data sample given its index
    def __getitem__(self, item):
        # Retrieve the text and its corresponding label using the provided index
        text = self.texts[item]
        label = self.labels[item]

        # Tokenize the text using the provided tokenizer
        # This converts the text to a format suitable for model input
        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,   # Add special tokens like [CLS], [SEP]
            max_length=self.max_len,   # Ensure the sequence doesn't exceed the max length
            padding='max_length',      # Pad short sequences to the max length
            truncation=True,           # Truncate sequences exceeding the max length
            return_tensors='pt'        # Return data as PyTorch tensors
        )

        # Return a dictionary containing the tokenized data and the label
        return {
            # The token IDs of the text
            'input_ids': encoding['input_ids'].flatten(),
            # A mask to indicate real tokens (1) vs padded tokens (0)
            'attention_mask': encoding['attention_mask'].flatten(),
            # The corresponding label of the text sample
            'labels': torch.tensor(label, dtype=torch.long)
        }


Creating the train and test datasets using the custom dataloader function and defining the maximum length of texts to be tokenized.

In [13]:
max_len = 128
train_dataset = HateSpeechDataset(train_texts, train_labels, tokenizer, max_len)
test_dataset = HateSpeechDataset(test_texts, test_labels, tokenizer, max_len)

In [14]:
!pip install accelerate -U transformers[torch]



The training loop for the fine-tuning process using the Huggingface Trainer calss. and the evaluation of the fine-tuned model using the test dataset.

In [15]:
from sklearn.metrics import accuracy_score, f1_score
from transformers import EvalPrediction
import numpy as np

# Define evaluation metrics function to include the accuracy and F1 scores using the prediction classes from the model
def compute_metrics(p: EvalPrediction):
    preds = np.argmax(p.predictions, axis=1)
    return {
        'accuracy': accuracy_score(p.label_ids, preds),
        'f1': f1_score(p.label_ids, preds, average='weighted')
    }

# Step 5: Fine-tuning the Model using the Trainer class
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

from transformers import EarlyStoppingCallback

training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=500,
    evaluation_strategy='steps',
    eval_steps=500,
    load_best_model_at_end=True,  # Set load_best_model_at_end to True
    # Remove 'early_stopping_patience' from here
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    compute_metrics=compute_metrics,
)

# Train the model
trainer.train()



Step,Training Loss,Validation Loss


TrainOutput(global_step=195, training_loss=0.5042786035782252, metrics={'train_runtime': 72.5781, 'train_samples_per_second': 42.658, 'train_steps_per_second': 2.687, 'total_flos': 203647956848640.0, 'train_loss': 0.5042786035782252, 'epoch': 3.0})

In [16]:
# Step 6: Evaluate on Test Set from Vidgen et al
eval_result = trainer.evaluate(test_dataset)
print(f"Test Accuracy: {eval_result['eval_accuracy']:.4f}, Test F1: {eval_result['eval_f1']:.4f}")


Test Accuracy: 0.5419, Test F1: 0.4005


In [17]:
dataset3 = pd.read_csv('Notebook_7_drag_transcript_testset.csv')

Preprocessing the test dataset

In [19]:
import re
import string

def preprocess_sentence(sentence):
  # no lowercasing or punctuation removal as assumed to carry semantic information
    sentence = re.sub(r'\\n', ' ', sentence)
    sentence = re.sub(r'\s+', ' ', sentence).strip()
    return sentence

# Apply the preprocess_sentence function to the 'sentences' column
dataset3['Sentence'] = dataset3['Sentence'].apply(preprocess_sentence)


In [21]:
test_texts2 = dataset3['Sentence'].values.tolist()
test_labels2 = dataset3['hate_label'].values.tolist()

In [22]:
test_dataset2 = HateSpeechDataset(test_texts2, test_labels2, tokenizer, max_len)

In [23]:
# Step 7: Evaluate on Test Set from Drag_race_testset
eval_result2 = trainer.evaluate(test_dataset2)
print(f"Test Accuracy: {eval_result2['eval_accuracy']:.4f}, Test F1: {eval_result2['eval_f1']:.4f}")


Test Accuracy: 1.0000, Test F1: 1.0000
