# Load Data

In [1]:
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import torch

# Load data (must upload first if running on colab)
try:
    train_df = pd.read_csv('/content/train_sampled.csv')
    dev_df = pd.read_csv('/content/dev_sampled.csv')
    test_df = pd.read_csv('/content/test_sampled.csv')
except:
    # If Running on Ravi's laptop
    train_df = pd.read_csv('data/train_sampled.csv')
    dev_df = pd.read_csv('data/dev_sampled.csv')
    test_df = pd.read_csv('data/test_sampled.csv')

# Check
train_df.head()

Unnamed: 0,label,text
0,0,"This KS isn't for a game, it's to sponsor his ..."
1,0,`NEWLINE_TOKENNEWLINE_TOKEN== Ownership of thi...
2,0,"""\nYesterday I made some quick changes to the ..."
3,1,#obama fucking massage sex vids https://t.co/d...
4,0,""" \n\n ==Disputed fair use rationale for Image..."


# Device Setup

In [2]:
import sys
device = None
if "google.colab" in sys.modules:
    # Running in Colab
    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
else:
    # Not in Colab (e.g., Mac)
    device = torch.device("mps") if torch.backends.mps.is_available() else torch.device("cpu")

print("Using device:", device)

Using device: mps


0 = text does NOT contain hateful speech

1 = text contains hateful speech

# Simple Baseline: Majority-class Classifier

## Model Setup & Training

In [3]:
# Define a Majority Class Model
class MajorityClassModel:
    def __init__(self):
        self.majority_class = None

    # Fit a Majority Class Model to the dataset
    def fit(self, y_train):
        self.majority_class = y_train.mode()[0]
        print(f"Majority class in training set = {self.majority_class}")

    # Predict the majority class for any given samples
    def predict(self, X):
        return [self.majority_class] * len(X)

# Train a Majority Class Model
model = MajorityClassModel()
model.fit(train_df["label"])

Majority class in training set = 0


## Evaluation

In [4]:
# --- Compute Metrics (Accuracy, Precision, Recall, and F1 Score) ---
def compute_metrics(y_true, y_pred):
    acc = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, zero_division=0)
    recall = recall_score(y_true, y_pred, zero_division=0)
    f1 = f1_score(y_true, y_pred, zero_division=0)
    return acc, precision, recall, f1

In [5]:
# Predict on train, dev, and test values ---
y_pred_train = model.predict(train_df)
y_true_train = train_df["label"].values

y_pred_dev = model.predict(dev_df)
y_true_dev = dev_df['label'].values

y_pred_test = model.predict(test_df)
y_true_test = test_df['label'].values

# Obtain Accuracy, Precision, Recall, and F1 Score for each Split (i.e. Train, Dev, and Test)
train_acc, train_prec, train_rec, train_f1 = compute_metrics(y_true_train, y_pred_train)
dev_acc, dev_prec, dev_rec, dev_f1 = compute_metrics(y_true_dev, y_pred_dev)
test_acc, test_prec, test_rec, test_f1 = compute_metrics(y_true_test, y_pred_test)

# Store metrics in Pandas Dataframe
metrics_df = pd.DataFrame({
    "Split": ["Train", "Dev", "Test"],
    "Accuracy": [train_acc, dev_acc, test_acc],
    "Precision": [train_prec, dev_prec, test_prec],
    "Recall": [train_rec, dev_rec, test_rec],
    "F1": [train_f1, dev_f1, test_f1]
})

metrics_df.round(4)

Unnamed: 0,Split,Accuracy,Precision,Recall,F1
0,Train,0.794,0.0,0.0,0.0
1,Dev,0.7829,0.0,0.0,0.0
2,Test,0.7856,0.0,0.0,0.0


# Stronger Baseline: MetaHateBERT

## Model Setup & Evaluation

In [6]:
# Dont use tensorflow as I was having issues with tensor flow when importing hugging face libraries
import os
os.environ["USE_TF"] = "0"
from transformers import pipeline

# Define model pipeline
classifier = pipeline(
    "text-classification",
    model="irlab-udc/MetaHateBERT",
    device=device,
    truncation=True,
    padding=True,
    max_length=512
)

# Predict on train and test values
label_map_pred = {"LABEL_0": 0, "LABEL_1": 1}

y_pred_train = [label_map_pred[classifier(text, truncation=True, max_length=512)[0]['label']] for text in train_df["text"].tolist()]
y_pred_dev = [label_map_pred[classifier(text, truncation=True, max_length=512)[0]['label']] for text in dev_df["text"].tolist()]
y_pred_test = [label_map_pred[classifier(text, truncation=True, max_length=512)[0]['label']] for text in test_df["text"].tolist()]

y_true_train = train_df["label"].values
y_true_dev = dev_df["label"].values
y_true_test  = test_df["label"].values


train_acc, train_prec, train_rec, train_f1 = compute_metrics(y_true_train, y_pred_train)
dev_acc, dev_prec, dev_rec, dev_f1 = compute_metrics(y_true_dev, y_pred_dev)
test_acc, test_prec, test_rec, test_f1 = compute_metrics(y_true_test, y_pred_test)

metrics_df = pd.DataFrame({
    "Split": ["Train", "Dev", "Test"],
    "Accuracy": [train_acc, dev_acc, test_acc],
    "Precision": [train_prec, dev_prec, test_prec],
    "Recall": [train_rec, dev_rec, test_rec],
    "F1": [train_f1, dev_f1, test_f1]
})

metrics_df.round(4)

Device set to use mps


Unnamed: 0,Split,Accuracy,Precision,Recall,F1
0,Train,0.9365,0.8613,0.8248,0.8427
1,Dev,0.9401,0.8811,0.8368,0.8584
2,Test,0.9273,0.8679,0.7797,0.8214


In [7]:
# Demo of MetaHateBERT
print(classifier("I hate Americans, they're the absolute worst."))

[{'label': 'LABEL_1', 'score': 0.9514635801315308}]
