In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from datasets import load_metric, Dataset, DatasetDict
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
    EarlyStoppingCallback,
    set_seed
)
from sklearn.utils import class_weight

In [None]:
class WeightedTrainer(Trainer):
    def __init__(self, class_weights, **kwargs):
        super().__init__(**kwargs)
        self.class_weights = class_weights
    
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.get("labels").long()
        outputs = model(**inputs)
        logits = outputs.get("logits")
        
        loss_fct = nn.CrossEntropyLoss(weight=self.class_weights)
        loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
        return (loss, outputs) if return_outputs else loss
    

acc = load_metric("accuracy")
f1 = load_metric("f1")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    
    return {
        "accuracy": acc.compute(predictions=predictions, references=labels)["accuracy"],
        "f1": f1.compute(predictions=predictions, references=labels)["f1"],
    } 

In [None]:
max_seq_length = 128
set_seed(42)

## EVALITA 18

In [None]:
model_name = "bert-base-cased" 
output_dir = model_name + "_ami18"

tokenizer = AutoTokenizer.from_pretrained(model_name)

def preprocess_text(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=max_seq_length)

In [None]:
train = pd.read_csv("data/miso_train.tsv", sep="\t")
validation = pd.read_csv("data/miso_dev.tsv", sep="\t")
test = pd.read_csv("data/miso_test.tsv", sep="\t")

raw_datasets = DatasetDict(
    train=Dataset.from_pandas(train),
    validation=Dataset.from_pandas(validation),
    test=Dataset.from_pandas(test)
)
raw_datasets = raw_datasets.rename_column("misogynous", "label")

proc_datasets = raw_datasets.map(preprocess_text, batched=True)

In [None]:
model = AutoModelForSequenceClassification.from_pretrained(model_name)

In [None]:
training_args = TrainingArguments(
    output_dir=output_dir,
    overwrite_output_dir=True,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    gradient_accumulation_steps=2,
    num_train_epochs=3,
    lr_scheduler_type="linear",
    warmup_ratio=0.1,
    load_best_model_at_end=True,
    save_total_limit=2,
    evaluation_strategy="steps",
    eval_steps=50,
    logging_steps=50,
    save_steps=50,
    save_strategy="steps",
    dataloader_num_workers=4,
    report_to="wandb",
    metric_for_best_model="loss"
)

In [None]:
class_weights = class_weight.compute_class_weight(
    'balanced',
    classes=np.unique(proc_datasets["train"]["label"]),
    y=np.array(proc_datasets["train"]["label"])
)
class_weights = torch.tensor(class_weights, device="cuda", dtype=torch.float32)

In [None]:
early_stopping = EarlyStoppingCallback(early_stopping_patience=2)

trainer = WeightedTrainer(
    class_weights=class_weights,
    model=model,
    args=training_args,
    train_dataset=proc_datasets["train"],
    eval_dataset=proc_datasets["validation"],
    callbacks=[early_stopping],
    compute_metrics=compute_metrics
)

In [None]:
trainer.train()

In [None]:
trainer.save_model(output_dir)

In [None]:
predictions = trainer.predict(test_dataset=proc_datasets["test"])
predictions.metrics

## EVALITA 20

In [None]:
model_name = "dbmdz/bert-base-italian-cased"
output_dir = model_name + "_ami20"

tokenizer = AutoTokenizer.from_pretrained(model_name)

def preprocess_text(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=max_seq_length)

In [None]:
train = pd.read_csv("data/AMI2020_training_raw_90.csv")
validation = pd.read_csv("data/AMI2020_validation_raw_10.csv")
test = pd.read_csv("data/AMI2020_test_raw_gt.tsv", sep="\t")

raw_datasets = DatasetDict(
    train=Dataset.from_pandas(train),
    validation=Dataset.from_pandas(validation),
    test=Dataset.from_pandas(test)
)
raw_datasets = raw_datasets.rename_column("misogynous", "label")

proc_datasets = raw_datasets.map(preprocess_text, batched=True)

In [None]:
model = AutoModelForSequenceClassification.from_pretrained(model_name)

In [None]:
training_args = TrainingArguments(
    output_dir=output_dir,
    overwrite_output_dir=True,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    gradient_accumulation_steps=2,
    num_train_epochs=3,
    lr_scheduler_type="linear",
    warmup_ratio=0.1,
    load_best_model_at_end=True,
    save_total_limit=2,
    evaluation_strategy="steps",
    eval_steps=50,
    logging_steps=50,
    save_steps=50,
    save_strategy="steps",
    dataloader_num_workers=4,
    report_to="none",
    metric_for_best_model="loss"
)

In [None]:
class_weights = class_weight.compute_class_weight(
    'balanced',
    classes=np.unique(proc_datasets["train"]["label"]),
    y=np.array(proc_datasets["train"]["label"])
)
class_weights = torch.tensor(class_weights, device="cuda", dtype=torch.float32)

In [None]:
early_stopping = EarlyStoppingCallback(early_stopping_patience=2)

trainer = WeightedTrainer(
    class_weights=class_weights,
    model=model,
    args=training_args,
    train_dataset=proc_datasets["train"],
    eval_dataset=proc_datasets["validation"],
    callbacks=[early_stopping],
    compute_metrics=compute_metrics
)

In [None]:
trainer.train()

In [None]:
trainer.save_model(output_dir)

In [None]:
predictions = trainer.predict(test_dataset=proc_datasets["test"])
predictions.metrics