In [13]:
import pandas as pd
import torch
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix, ConfusionMatrixDisplay
from transformers import AutoModelForSequenceClassification, AutoTokenizer




In [2]:
df = pd.read_csv("../datasets/train/final_labels.csv")
df = df[['body', 'level_1']].dropna()

In [3]:
label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['level_1'])  # e.g., Nonmisogynistic = 1, Misogynistic = 0

dataset = Dataset.from_pandas(df[['body', 'label']])

dataset = dataset.train_test_split(test_size=0.2)
train_dataset = dataset['train']
test_dataset = dataset['test']

In [9]:
# model_name = "MilaNLProc/bert-base-uncased-ear-misogyny"
# tokenizer = AutoTokenizer.from_pretrained(model_name)
# model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

model = AutoModelForSequenceClassification.from_pretrained("misogyny-classifier")
tokenizer = AutoTokenizer.from_pretrained("misogyny-classifier")

# 6. Tokenize the data
def tokenize_function(example):
    return tokenizer(example["body"], padding="max_length", truncation=True, max_length=512)

#train_dataset = train_dataset.map(tokenize_function, batched=True)
test_dataset = test_dataset.map(tokenize_function, batched=True)


Map: 100%|██████████| 1311/1311 [00:01<00:00, 1124.87 examples/s]


In [10]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = torch.argmax(torch.tensor(logits), dim=-1)
    return {
        "accuracy": accuracy_score(labels, predictions),
        "f1": f1_score(labels, predictions),
        "precision": precision_score(labels, predictions),
        "recall": recall_score(labels, predictions),
    }

# 8. Training arguments
# training_args = TrainingArguments(
#     output_dir="./results",
#     evaluation_strategy="epoch",
#     save_strategy="epoch",
#     learning_rate=2e-5,
#     per_device_train_batch_size=16,
#     per_device_eval_batch_size=16,
#     num_train_epochs=4,
#     weight_decay=0.01,
#     logging_dir='./logs',
#     save_total_limit=1,
#     load_best_model_at_end=True,
#     metric_for_best_model="f1"
# )

training_args = TrainingArguments(output_dir="./eval", per_device_eval_batch_size=16)

# 9. Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    #train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics,
)

# 10. Train
# trainer.train()

In [11]:
# # 11. Evaluate
eval_result = trainer.evaluate()
print("Evaluation Results:", eval_result)

# # (Optional) Save model
# trainer.save_model("misogyny-classifier")
# tokenizer.save_pretrained("misogyny-classifier")

Evaluation Results: {'eval_loss': 0.09249640256166458, 'eval_model_preparation_time': 0.0042, 'eval_accuracy': 0.9801678108314263, 'eval_f1': 0.9889737065309584, 'eval_precision': 0.9806560134566863, 'eval_recall': 0.9974337040205303, 'eval_runtime': 1011.7358, 'eval_samples_per_second': 1.296, 'eval_steps_per_second': 0.081}


In [None]:

# 1. Tokenize test texts
test_texts = test_dataset["body"]
true_labels = test_dataset["label"]

# 2. Tokenize the texts for prediction
inputs = tokenizer(test_texts, padding=True, truncation=True, return_tensors="pt", max_length=512)

# 3. Run model on inputs
model.eval()
with torch.no_grad():
    outputs = model(**inputs)
    predictions = torch.argmax(outputs.logits, dim=-1).numpy()

# 4. Get true labels
true_labels = torch.tensor(true_labels).numpy()

# 5. Compute confusion matrix
cm = confusion_matrix(true_labels, predictions)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=["Misogynistic", "Not Misogynistic"])

# 6. Plot it
fig, ax = plt.subplots(figsize=(5,5))
disp.plot(ax=ax, cmap="Blues")
plt.title("Confusion Matrix")
plt.show()
