# Imports

In [1]:
#Neural net
import transformers
import torch
import torch.nn.functional as F
from torch import nn
import pandas as pd
import peft
from sklearn.metrics import classification_report
from sklearn.model_selection import StratifiedKFold
#Other modules
import tqdm
import os
from abc import ABC, abstractclassmethod
from pysentimiento.preprocessing import preprocess_tweet


os.environ["TOKENIZERS_PARALLELISM"] = "true"

device = "cuda" if torch.cuda.is_available() else "cpu"

  from .autonotebook import tqdm as notebook_tqdm
  from pandas.core import (


In [2]:
task_1 = {
    "Approach": [],
    "Fold": [],
    "Data":[],
    "f1": [],
    "recall": [],
    "precision": [],
    "f1_0": [],
    "recall_0": [],
    "precision_0": [],
    "f1_1": [],
    "recall_1": [],
    "precision_1": [],
    "ce": [],
    "Epoch":[]
}

task_1_pd = pd.DataFrame(task_1)
task_1_pd.to_csv("task1_res.csv", index=False)

# Utils

In [3]:
def get_kf_splits(dataset, target_label, n_splits=3):
    skf = StratifiedKFold(n_splits=n_splits)
    return skf.split(dataset.data["text"], dataset.data[target_label])

In [4]:
def create_subset(dataset, idx):
    return torch.utils.data.Subset(dataset, idx)

# Read DataFrame

In [5]:
data = pd.read_csv('train.csv')

data["text"] = data.apply(lambda sample:preprocess_tweet(sample["text"]), axis=1)

# Dataset

In [6]:
class DETESTSDataset(torch.utils.data.Dataset):
    def __init__(self, data, add_context=False):
        self.data = data
        self.context = add_context

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = self.data.loc[idx]

        source = sample["source"]
        id = sample["id"]
        comment_id = sample["comment_id"]

        text = sample["text"]
        context = ""
        if self.context:
            if sample["level1"] != "0":
                context += " - "+ self.data[self.data["id"] == sample["level1"]]["text"].values[0]

            if sample["level2"] != "0":
                context += " - " + self.data[self.data["comment_id"] == sample["level2"]]["text"].values[0]

            if sample["level3"] != "0": #comprobar que no es el mismoo que level2?
                context += " - " + self.data[self.data["comment_id"] == sample["level3"]]["text"].values[0]

        #context = context if context != "" else None

        stereotype_hard = sample["stereotype"]
        stereotype_soft = sample["stereotype_soft"]

        stereotype_annotators = torch.tensor([
            sample["stereotype_a1"],
            sample["stereotype_a2"],
            sample["stereotype_a3"],
        ])

        implicit_hard = sample["implicit"]
        implicit_soft = sample["implicit_soft"]
        
        implicit_annotators = torch.tensor([
            sample["implicit_a1"],
            sample["implicit_a2"],
            sample["implicit_a3"],
        ])


        return {
            "idx": idx,
            "source": source,
            "id": id,
            "comment_id": comment_id,
            "text": (text, context),
            "stereotype_hard": stereotype_hard,
            "stereotype_soft": stereotype_soft,
            "stereotype_annotators": stereotype_annotators,
            "implicit_hard": implicit_hard,
            "implicit_soft": implicit_soft,
            "implicit_annotators": implicit_annotators,
        }

data = pd.read_csv('train.csv')
detest = DETESTSDataset(data)

# Model definition

In [7]:
class AnnotatorHead(nn.Module):
    """This class represents the classification head to append at the end of our Transformer"""
    def __init__(self, in_neurons, out_neurons):
        super(AnnotatorHead, self).__init__()
        self.fc = torch.nn.Sequential(
                torch.nn.Linear(in_features=in_neurons, out_features=64),
                torch.nn.ReLU(),
                torch.nn.BatchNorm1d(64),
                torch.nn.Linear(in_features=64, out_features=32),
                torch.nn.ReLU(),
                torch.nn.BatchNorm1d(32),
                torch.nn.Linear(in_features=32, out_features=out_neurons),
        )

    def forward(self, x):
        return self.fc(x)


class TransformerModel(nn.Module):
    """This class is our Transformer model for classification"""
    def __init__(
        self,
        _transformer,
        _tokenizer,
        _lora_cfg,
        num_annotators=1,
        device=device,
        **kwargs
    ):
        super(TransformerModel, self).__init__()
        self.tokenizer = _tokenizer
        self.base_transformer = peft.LoraModel(_transformer, _lora_cfg, "default")
        #Get hidden size
        self.hidden_size = self.base_transformer.config.to_dict().get("hidden_size")
        #Output layer of MLP
        self.mlp_output_size = kwargs.get("output_neurons_mlp", 2)
        #All the MLP in output layer
        self.annotators = nn.ModuleList(
            [
                AnnotatorHead(self.hidden_size, self.mlp_output_size)
                for _ in range(num_annotators)
            ]
        )

        self.device = device
        self.to(device)

    def get_num_parameters(self):
        return sum(p.numel() for p in self.parameters())

    def forward(self, x):
        text, context = x[0], x[1]
        #Tokenize text
        x = self.tokenizer(
            text, context, padding="max_length", truncation=True, return_tensors="pt"
        ).to(self.device)
        #Input to our transformer
        x = self.base_transformer(**x).last_hidden_state[:, 0]
        #Get the output of each annotator
        y = [ann(x) for ann in self.annotators]
        return torch.cat(y, dim=0)

# Custom Loss Functions

In [8]:
class BCEWithLogitsLossMultitask(nn.Module):
    """ Custom Loss function for multi-task scenario"""
    def __init__(self):
        super().__init__()
        self.bce = torch.nn.BCEWithLogitsLoss(reduction='none')
    
    def forward(self, predictions, target):
        
        #Each annotator produces an output for a sample. The loss for a sample is calculated wrt the loss of each annotator
        return  torch.sum(self.bce(predictions, target), dim=1)

In [9]:
class CEWithLogitsLoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.ce = torch.nn.CrossEntropyLoss(reduction='none')
    
    def forward(self, predictions, target):
        target = target.unsqueeze(dim=1)
        target = torch.cat((1 - target, target), dim=1)
        return self.ce(predictions, target)

# Score evaluator

In [10]:
from abc import ABC, abstractclassmethod
import torch
from sklearn.metrics import classification_report

class ResultsParser:
    def __init__(self, task) -> None:
        self.task = task
    
    def parse(self, results):
        res = {}
        if self.task.startswith("stereotype"):
            #Class 0
            res["precision_0"] = results["0.0"]["precision"]
            res["recall_0"] = results["0.0"]["recall"]
            res["f1_0"] = results["0.0"]["f1-score"]
            #Class 1
            res["precision_1"] = results["1.0"]["precision"]
            res["recall_1"] = results["1.0"]["recall"]
            res["f1_1"] = results["1.0"]["f1-score"]
            #Macro
            res["f1"] = results["macro avg"]["f1-score"]
            res["precision"] = results["macro avg"]["precision"]
            res["recall"] = results["macro avg"]["recall"]
            #Cross Entropy
            res["ce"] = results["cross_entropy"]
        return res


class ScoreEvaluator(ABC):
    def __init__(self, task) -> None:
        self.task = task
        self.results_parser = ResultsParser(self.task)

    @abstractclassmethod
    def get_precision_recall_f1(self, target, preds):
        pass

    @abstractclassmethod
    def get_ce_score(self, target, preds):
        pass

    def get_results_task(self, target_hard, target_soft, preds):
        res = {}
        if self.task.startswith("stereotype"):
            f1_scores = self.get_precision_recall_f1(target_hard, preds)
            ce_score = self.get_ce_score(target_soft, preds)
            res = {"cross_entropy": ce_score, **f1_scores}
        
        return self.results_parser.parse(res)


class HardScoreEvaluator(ScoreEvaluator):
    def __init__(self, task) -> None:
        super().__init__(task)
        self.bce = torch.nn.BCELoss()

    def get_precision_recall_f1(self, target, preds):
        # Convert logits to probabilities and get the classes
        preds = torch.sigmoid(preds) >= 0.5
        # Compute F1-score
        res = classification_report(target, preds, output_dict=True)
        return res

    def get_ce_score(self, target, preds):
        # Logits -> Probabilities
        preds = torch.sigmoid(preds)
        # BCE
        return self.bce(preds, target).item()


class SoftScoreEvaluator(ScoreEvaluator):
    def __init__(self, task):
        super().__init__(task)
        self.task = task
        self.ce = torch.nn.CrossEntropyLoss()

    def get_precision_recall_f1(self, target, preds):
        # Logits -> Softmax probabilities -> Get most probable class
        preds = torch.argmax(preds.softmax(dim=1), dim=1)
        #F1-score
        res = classification_report(target, preds, output_dict=True)
        return res

    def get_ce_score(self, target, preds):
        #Add an extra dimension to targets
        target = target.unsqueeze(dim=1)
        #Probabilities for both classes
        target = torch.cat((1 - target, target), dim=1)
        #Cross entropy automatically applies softmax
        return self.ce(preds, target).item()


class MultiTaskScoreEvaluator(ScoreEvaluator):
    def __init__(self, task):
        self.task = task
        self.ce = torch.nn.CrossEntropyLoss()

    def get_precision_recall_f1(target, preds):
        #From logits to  probs
        preds = torch.sigmoid(preds)
        #Get number of votes for each class
        preds_annotator_votes_zero = (preds < 0.5).sum(dim=1)
        preds_annotator_votes_ones = (preds >= 0.5).sum(dim=1)
        #Majority voting
        preds = torch.where(preds_annotator_votes_ones >= preds_annotator_votes_zero, 1, 0)
        res = classification_report(target, preds, output_dict=True)
        return res

    def get_ce_score(self, target, preds):
        #Add extra dimension to targets
        target.unsqueeze_(dim=1)
        #Concatenate outputs
        target = torch.cat((1 - target, target), dim=1)
        #Sigmoid to predictions
        preds = torch.sigmoid(preds)
        #Number of annotators for each class, add extra dimension
        preds_annotator_votes_zero = (preds < 0.5).sum(dim=1).unsqueeze(dim=1)
        preds_annotator_votes_ones = (preds >= 0.5).sum(dim=1).unsqueeze(dim=1)
        #(B, 2)
        preds = torch.cat((preds_annotator_votes_zero, preds_annotator_votes_ones), dim=1).float()
        return self.ce(preds, target).item()

# Statistics Recollector

In [11]:
class StatisticsRecollector:
    def __init__(self, pd_data, csv_path) -> None:
        self.pd_data = pd_data
        self.csv_path =  csv_path
    
    def add_data_to_dataframe(self, dict_row):
        self.pd_data = pd.concat([self.pd_data, pd.Series(dict_row).to_frame().T], ignore_index=True)
    
    def save_statistics(self):
        self.pd_data.to_csv(self.csv_path, index=False, mode='a', header=False)

# Train model

In [12]:
class Trainer:
    def __init__(
        self,
        model,
        opt,
        criterion,
        dataset,
        scorer,
        statistic_rc,
        accumulated_batch_size=8,
    ) -> None:
        self.model = model
        self.opt = opt
        self.criterion = criterion
        self.dataset = dataset
        self.accumulated_batch_size = (
            accumulated_batch_size / 8
        )  # My GPU only fits batch size of 8, so we are going to accumulate gradient
        self.model_default_weights_path = "model_default.pt"
        torch.save(self.model.state_dict(), self.model_default_weights_path)
        self.statistic_rc = statistic_rc
        self.scorer = scorer

    def dev(self, dev_dataloader, task_name, task_label, epoch):
        dev_loss = 0
        target_preds_hard = torch.tensor([])
        target_preds_soft = torch.tensor([])
        dev_preds = torch.tensor([])
        self.model.eval()

        with tqdm.tqdm(
            iter(dev_dataloader), desc="Dev epoch " + str(epoch), unit="batch"
        ) as tepoch:
            with torch.no_grad():
                for batch_idx, batch in enumerate(tepoch):
                    # Data
                    text = batch["text"]
                    # Label
                    task_hard_labels = batch[task_name + "_hard"].float()
                    task_soft_labels = batch[task_name + "_soft"].float()
                    # label to perform training
                    task_target_labels = (
                        batch[task_name + "_" + task_label].float().to(device)
                    )
                    
                    # Forward
                    preds = self.model(text).squeeze()
                    # Compute loss and propagate bacckward
                    loss = self.criterion(preds, task_target_labels)
                    # Accumulate loss
                    dev_loss += loss.mean()
                    # Store outputs
                    dev_preds = torch.cat((dev_preds, preds.cpu()))
                    # Hard and soft labels
                    target_preds_hard = torch.cat((target_preds_hard, task_hard_labels))
                    target_preds_soft = torch.cat((target_preds_soft, task_soft_labels))
                res = self.scorer.get_results_task(
                    target_preds_hard, target_preds_soft, dev_preds
                )

                print(res)
                print(f"{dev_loss / len(dev_dataloader)}")

    def train(self, n_epochs, task_name, task_label):
        kfold_splitter = get_kf_splits(self.dataset, task_name)

        for fold, (train_idx, dev_idx) in enumerate(kfold_splitter):
            self.model.load_state_dict(torch.load(self.model_default_weights_path))
            print(f"Fold {fold}")
            # DataLoader
            train_loader = torch.utils.data.DataLoader(
                create_subset(detest, train_idx),
                batch_size=8,
                shuffle=True,
                num_workers=8,
            )
            dev_loader = torch.utils.data.DataLoader(
                create_subset(detest, dev_idx),
                batch_size=8,
                shuffle=True,
                num_workers=8,
            )

            train_best_f1_score = 0
            train_best_ce_score = 1000
            train_best_results = {}

            dev_best_f1_score = 0
            dev_best_ce_score = 1000
            dev_best_results = {}
            for epoch in range(n_epochs):
                with tqdm.tqdm(
                    iter(train_loader), desc="Train epoch " + str(epoch), unit="batch"
                ) as tepoch:
                    self.model.train()
                    # Train Loop
                    train_loss = 0
                    train_preds = torch.tensor([], dtype=torch.float)
                    target_preds_hard = torch.tensor([], dtype=torch.float)
                    target_preds_soft = torch.tensor([], dtype=torch.float)

                    for batch_idx, batch in enumerate(tepoch):
                        # Data
                        text = batch["text"]
                        # Label
                        task_hard_labels = batch[task_name + "_hard"].float()
                        task_soft_labels = batch[task_name + "_soft"].float()
                        # label to perform training
                        task_target_labels = (
                            batch[task_name + "_" + task_label].float().to(device)
                        )
                        # Forward
                        preds = self.model(text).squeeze()
                        # Compute loss and propagate bacckward
                        loss = self.criterion(preds, task_target_labels)
                        # Propagate backward
                        loss.mean().backward()
                        # Accumulate loss
                        train_loss += loss.mean()
                        # Store outputs
                        train_preds = torch.cat((train_preds, preds.cpu()))
                        # Hard and soft labels
                        target_preds_hard = torch.cat(
                            (target_preds_hard, task_hard_labels)
                        )
                        target_preds_soft = torch.cat(
                            (target_preds_soft, task_soft_labels)
                        )
                        # Update  gradients
                        if (batch_idx + 1) % self.accumulated_batch_size == 0:
                            self.opt.step()
                            self.opt.zero_grad()
                    results = self.scorer.get_results_task(
                        target_preds_hard, target_preds_soft, train_preds
                    )
                    print(results)
                    if (
                        results["f1"] >= train_best_f1_score
                        and results["ce"] <= train_best_ce_score
                    ):
                        train_best_f1_score = results["f1"]
                        train_best_ce_score = results["ce"]
                        train_best_results = {
                            "Approach": task_name + "_" + task_label,
                            "Epoch": epoch,
                            "Fold": fold,
                            "Data": "train",
                            **results,
                        }
                    print(f"TRAIN: {train_loss / len(train_loader)}")
                    # self.dev(dev_loader, task_name, task_label, epoch)

                    dev_loss = 0
                    target_preds_hard = torch.tensor([])
                    target_preds_soft = torch.tensor([])
                    dev_preds = torch.tensor([])
                    self.model.eval()

                    # Dev loop
                    with tqdm.tqdm(
                        iter(dev_loader), desc="Dev epoch " + str(epoch), unit="batch"
                    ) as tepoch:
                        with torch.no_grad():
                            for batch_idx, batch in enumerate(tepoch):
                                # Data
                                text = batch["text"]
                                # Label
                                task_hard_labels = batch[task_name + "_hard"].float()
                                task_soft_labels = batch[task_name + "_soft"].float()
                                # label to perform training
                                task_target_labels = (
                                    batch[task_name + "_" + task_label]
                                    .float()
                                    .to(device)
                                )
                                # Forward
                                preds = self.model(text).squeeze()
                                # Compute loss and propagate bacckward
                                loss = self.criterion(preds, task_target_labels)
                                # Accumulate loss
                                dev_loss += loss.mean()
                                # Store outputs
                                dev_preds = torch.cat((dev_preds, preds.cpu()))
                                # Hard and soft labels
                                target_preds_hard = torch.cat(
                                    (target_preds_hard, task_hard_labels)
                                )
                                target_preds_soft = torch.cat(
                                    (target_preds_soft, task_soft_labels)
                                )

                            results = self.scorer.get_results_task(
                                target_preds_hard, target_preds_soft, dev_preds
                            )
                            print(results)
                            if (
                                results["f1"] >= dev_best_f1_score
                                and results["ce"] <= dev_best_ce_score
                            ):
                                dev_best_f1_score = results["f1"]
                                dev_best_ce_score = results["ce"]
                                
                                dev_best_results = {
                                    "Approach": task_name + "_" + task_label,
                                    "Epoch": epoch,
                                    "Fold": fold,
                                    "Data": "dev",
                                    **results,
                                }
                            print(f"DEV: {dev_loss / len(dev_loader)}")
            self.statistic_rc.add_data_to_dataframe(train_best_results)
            self.statistic_rc.add_data_to_dataframe(dev_best_results)
        self.statistic_rc.save_statistics()

In [13]:
model_name = "dccuchile/bert-base-spanish-wwm-uncased"
tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)
transformer = transformers.AutoModel.from_pretrained(model_name)
lora_config = peft.LoraConfig(r=16, lora_alpha=16, lora_dropout=0.3)

model = TransformerModel(
    transformer, tokenizer, lora_config, num_annotators=1, **{"output_neurons_mlp": 2}
)
opt = torch.optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-6)
criterion = torch.nn.BCEWithLogitsLoss(reduction='none')
criterion = CEWithLogitsLoss()
scorer = SoftScoreEvaluator("stereotype_soft")
st_rc = StatisticsRecollector(pd.read_csv('task1_res.csv'), 'task1_res.csv')
trainer = Trainer(model, opt, criterion, detest, scorer, st_rc)
trainer.train(5, "stereotype", "soft")

Fold 0


Train epoch 0: 100%|██████████| 826/826 [06:38<00:00,  2.07batch/s]

{'precision_0': 0.8219329214474845, 'recall_0': 0.7653585370865009, 'f1_0': 0.7926375146292158, 'precision_1': 0.44884169884169883, 'recall_1': 0.5354058721934369, 'f1_1': 0.488317143607246, 'f1': 0.6404773291182309, 'precision': 0.6353873101445917, 'recall': 0.650382204639969, 'ce': 0.591110348701477}
TRAIN: 0.5909661650657654



Dev epoch 0: 100%|██████████| 413/413 [01:32<00:00,  4.44batch/s]

{'precision_0': 0.7867898699520877, 'recall_0': 0.9445357436318816, 'f1_0': 0.8584764749813294, 'precision_1': 0.6447368421052632, 'recall_1': 0.28225806451612906, 'f1_1': 0.3926282051282051, 'f1': 0.6255523400547672, 'precision': 0.7157633560286754, 'recall': 0.6133969040740054, 'ce': 0.558488667011261}
DEV: 0.5585132241249084



Train epoch 1: 100%|██████████| 826/826 [06:41<00:00,  2.06batch/s]

{'precision_0': 0.8702209005947323, 'recall_0': 0.8417916581056092, 'f1_0': 0.8557702349869452, 'precision_1': 0.5938818565400844, 'recall_1': 0.6482440990213011, 'f1_1': 0.6198733828791633, 'f1': 0.7378218089330542, 'precision': 0.7320513785674083, 'recall': 0.7450178785634551, 'ce': 0.5126221179962158}
TRAIN: 0.5124653577804565



Dev epoch 1: 100%|██████████| 413/413 [01:31<00:00,  4.50batch/s]

{'precision_0': 0.8260200153964589, 'recall_0': 0.8816762530813476, 'f1_0': 0.8529411764705882, 'precision_1': 0.5909090909090909, 'recall_1': 0.4792626728110599, 'f1_1': 0.5292620865139949, 'f1': 0.6911016314922915, 'precision': 0.7084645531527749, 'recall': 0.6804694629462037, 'ce': 0.5486724376678467}
DEV: 0.5487388968467712



Train epoch 2: 100%|██████████| 826/826 [06:37<00:00,  2.08batch/s]

{'precision_0': 0.8812860676009893, 'recall_0': 0.878569960961578, 'f1_0': 0.8799259183043523, 'precision_1': 0.6626712328767124, 'recall_1': 0.6683937823834197, 'f1_1': 0.6655202063628547, 'f1': 0.7727230623336034, 'precision': 0.7719786502388508, 'recall': 0.7734818716724989, 'ce': 0.4701634645462036}
TRAIN: 0.47017231583595276



Dev epoch 2: 100%|██████████| 413/413 [01:48<00:00,  3.81batch/s]

{'precision_0': 0.8093205265030239, 'recall_0': 0.9346754313886606, 'f1_0': 0.8674928503336511, 'precision_1': 0.6761710794297352, 'recall_1': 0.3824884792626728, 'f1_1': 0.48859455481972036, 'f1': 0.6780437025766857, 'precision': 0.7427458029663796, 'recall': 0.6585819553256667, 'ce': 0.539480984210968}
DEV: 0.5394885540008545



Train epoch 3:   2%|▏         | 16/826 [02:00<1:41:28,  7.52s/batch]


KeyboardInterrupt: 