# Install and import what is necessary to carry out the training

In [None]:
#Importing libraries and mounting the drive where the dataset is stored
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns; sns.set_style(style = 'whitegrid')
%matplotlib inline
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pip install datasets
!pip install transformers==4.28.0  # Later versions are having problems with TrainingArguments
!pip install evaluate
!pip install wandb
import tensorflow as tf
import gc, torch
from datasets import load_dataset
import evaluate
from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments, EarlyStoppingCallback, DataCollatorWithPadding
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import classification_report

# To define common training functions to all models

In [None]:
def load_corpus_Salud(drop, n_labels, task):
  dataset = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/DatasetMH.csv", sep=";")[["Texto", "Polaridad", "Emociones"]]
  dataset.drop([2284, 2286], inplace=True) #Estos tweets no tienen emoción ni sentimiento asociado
  dataset.drop(dataset.tail(drop).index, inplace = True)
  dataset.columns = ["text", "label", "emotion"]
  if(task=="sentiment"):
    if(n_labels==2):
      dataset = dataset.loc[dataset["label"] != "Indeterminado"]
      dataset['label'] = [0 if item=="Negativa" else 1 for item in dataset['label']]
    else:
      dataset['label'] = [0 if item=="Negativa" else 1 if item=="Indeterminado" else 2 for item in dataset['label']]
    return dataset[["text", "label"]]
  else:
    dataset['label'] = [0 if item=="Amor/Admiración" else 1 if item=="Gratitud" else 2 if item=='Tristeza/Pena' else 3 if item=='Enfado/Desprecio/Burla' else 4 if item=="Comprensión/Empatía/Identificación"
                          else 5 for item in dataset['emotion']]
    return dataset[["text", "label"]]   # Mandatory for headers to be ["text", "label"] for trainer

#Label "Texto" is Text
#Label "Emociones" is Emotions

#Label "Positiva" is Positive
#Label "Negativa" is Negative
#Label "Indeterminado" is Neutral

#Label "Amor/Admiración" is Love/admiration
#Label "Gratitud" is Gratitude
#Label "Tristeza/Pena2 is Sadness
#Label "Enfado/Desprecio/Burla" is Anger/contempt/mockery
#Label "Comprensión/Empatía/Identificación" is Comprehension/empathy/identification
#Label "Indeterminado" is Neutral

def compute_metrics(eval_pred):
    metric1 = evaluate.load("accuracy")
    metric2 = evaluate.load("precision")
    metric3 = evaluate.load("recall")
    metric4 = evaluate.load("f1")
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)

    accuracy = metric1.compute(predictions=predictions, references=labels)["accuracy"]
    precision = metric2.compute(predictions=predictions, references=labels, average="weighted")["precision"]
    recall = metric3.compute(predictions=predictions, references=labels, average="weighted")["recall"]
    f1 = metric4.compute(predictions=predictions, references=labels, average="weighted")["f1"]
    return {"accuracy":accuracy, "precision":precision, "recall":recall, "f1":f1}


def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

def garbage_collector():
  torch.cuda.empty_cache()
  gc.collect()
  !nvidia-smi

earlyStop=EarlyStoppingCallback()

# Analysis of Polarity

In [None]:
#Label "Polaridad" is Polarity
#Label "Positiva" is Positive
#Label "Negativa" is Negative
#Label "Indeterminado" is Neutral

dataset = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/DatasetMH.csv", sep=";")[["Texto", "Polaridad"]]

print(dataset.head())
print(dataset["Polaridad"].unique())
plt.figure(figsize=(5, 3))
sns.countplot(x = 'Polarity', data = dataset, palette = 'rocket',
              order=['Positiva', 'Negativa', 'Indeterminado']);



### RoBERTuito Model with 2 labels: Positive, Negative

##### Tunning process with Wandb

In [None]:
import wandb

wandb.login()

In [None]:
id2label = {0: "NEG", 1: "POS"}
label2id = {"NEG": 0, "POS": 1}

#NEG is Negative Polarity
#POS is Positive Polarity

model = AutoModelForSequenceClassification.from_pretrained('pysentimiento/robertuito-sentiment-analysis', num_labels=2, id2label=id2label, label2id=label2id, ignore_mismatched_sizes=True)
tokenizer = AutoTokenizer.from_pretrained('pysentimiento/robertuito-sentiment-analysis')
data_collator = DataCollatorWithPadding(tokenizer)


dataset = load_corpus_Salud(drop=0, n_labels=2, task="sentiment")
train, test = train_test_split(dataset, stratify=dataset["label"])
train.to_csv("corpus_train.csv", index=False)
test.to_csv("corpus_test.csv", index=False)
train_test = load_dataset("csv", data_files={"train":"./corpus_train.csv", "test":"./corpus_test.csv"})
tokenized_datasets = train_test.map(tokenize_function, batched=True)

In [None]:
config = {
          "wandb":{
            "project": "wandb-2labels-MentalHealth",
            "api_key": "XXXXXXXXXXX"
        }
}


def wandb_hp_space(trial):
    return {
        "method": "random",
        "metric": {"name": "accuracy", "goal": "maximize"},
        "parameters": {
            "learning_rate": {"distribution": "uniform", "min": 1e-6, "max": 1e-4},
            "per_device_train_batch_size": {"values": [16, 32, 64, 128]},
            "per_device_eval_batch_size": {"values": [16, 32, 64, 128]},
            "num_train_epochs" : {"distribution": "int_uniform", "min": 5, "max": 15}
        },
    }

def model_init(trial):
    return AutoModelForSequenceClassification.from_pretrained(
        'pysentimiento/robertuito-sentiment-analysis',
        config=config
    )


training_args = TrainingArguments(report_to="wandb", output_dir="wandb-2labels-Twitch", overwrite_output_dir=True,
       evaluation_strategy="epoch", per_device_train_batch_size = 128, per_device_eval_batch_size=128, num_train_epochs = 10,
                                    fp16=True, save_strategy="epoch", load_best_model_at_end = True, metric_for_best_model="accuracy") #Necessary for earlyStop

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
    compute_metrics=compute_metrics,
    tokenizer=tokenizer,
    model_init=model_init,
    data_collator=data_collator,
    callbacks=[earlyStop]
)

best_trial = trainer.hyperparameter_search(
    direction="maximize",
    backend="wandb",
    hp_space=wandb_hp_space,
    n_trials=30
)


wandb.finish()

##### KFold Process

In [None]:
dataset = load_corpus_Salud(drop=0, n_labels=2, task="sentiment")
id2label = {0: "NEG", 1: "POS"}
label2id = {"NEG": 0, "POS": 1}

#NEG is Negative Polarity
#POS is Positive Polarity


splits=10

NEG = pd.DataFrame(columns=["precision", "recall", "f1-score", "support"])
POS = pd.DataFrame(columns=["precision", "recall", "f1-score", "support"])
macro = pd.DataFrame(columns=["precision", "recall", "f1-score", "support"])
weighted = pd.DataFrame(columns=["precision", "recall", "f1-score", "support"])
acc= pd.DataFrame(columns=["accuracy"])

i=0
kf = StratifiedKFold(n_splits = splits, random_state=10, shuffle=True)
for train, test in kf.split(X=dataset["text"], y=dataset["label"]):
  print("\n\n******************LOOP number", i, "**************************\n\n")


  #The model must be created again in each iteration because otherwise the following executions are made with the weights of previous executions, giving overfitting
  model = AutoModelForSequenceClassification.from_pretrained('pysentimiento/robertuito-sentiment-analysis', num_labels=2, id2label=id2label, label2id=label2id, ignore_mismatched_sizes=True)
  tokenizer = AutoTokenizer.from_pretrained('pysentimiento/robertuito-sentiment-analysis')
  data_collator = DataCollatorWithPadding(tokenizer)

  training_args = TrainingArguments(learning_rate=0.00008448, num_train_epochs=10, per_device_train_batch_size = 64, per_device_eval_batch_size=64, #best-sweep-20
                                  output_dir="best-sweep-20", evaluation_strategy="epoch", fp16=True, save_strategy="epoch", load_best_model_at_end = True, metric_for_best_model="accuracy") #necesario para earlyStop

  dataset.iloc[train].to_csv("corpus_train.csv", index=False)
  dataset.iloc[test].to_csv("corpus_test.csv", index=False)
  train_test = load_dataset("csv", data_files={"train":"./corpus_train.csv", "test":"./corpus_test.csv"})
  tokenized_datasets = train_test.map(tokenize_function, batched=True)


  trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_datasets["train"],
        eval_dataset=tokenized_datasets["test"],
        compute_metrics=compute_metrics,
        data_collator=data_collator,
        callbacks=[earlyStop]
  )
  trainer.train()
  pred=trainer.predict(tokenized_datasets["test"])
  logits=pred.predictions
  prob=tf.nn.softmax(logits)
  res=np.argmax(prob, axis=1) #predictions

  report = classification_report(tokenized_datasets["test"]["label"], res, output_dict=True)
  NEG.loc[i] = report["0"]
  POS.loc[i] = report["1"]
  macro.loc[i] = report["macro avg"]
  weighted.loc[i] = report["weighted avg"]
  acc.loc[i] = report["accuracy"]
  i+=1






In [None]:
NEG

Unnamed: 0,precision,recall,f1-score,support
0,0.90625,0.983051,0.943089,59
1,0.903226,0.949153,0.92562,59
2,0.848485,0.949153,0.896,59
3,0.901639,0.948276,0.92437,58
4,0.962264,0.87931,0.918919,58
5,1.0,0.896552,0.945455,58
6,0.981132,0.881356,0.928571,59
7,0.95,0.966102,0.957983,59
8,0.965517,0.949153,0.957265,59
9,0.9,0.915254,0.907563,59


In [None]:
print(np.mean(NEG, axis=0))

precision     0.931851
recall        0.931736
f1-score      0.930484
support      58.700000
dtype: float64


In [None]:
POS

Unnamed: 0,precision,recall,f1-score,support
0,0.993243,0.960784,0.976744,153
1,0.98,0.960784,0.970297,153
2,0.979452,0.934641,0.956522,153
3,0.98,0.960784,0.970297,153
4,0.955696,0.986928,0.971061,153
5,0.962264,1.0,0.980769,153
6,0.955696,0.993421,0.974194,152
7,0.986755,0.980263,0.983498,152
8,0.980392,0.986842,0.983607,152
9,0.966887,0.960526,0.963696,152


In [None]:
print(np.mean(POS, axis=0))

precision      0.974039
recall         0.972497
f1-score       0.973069
support      152.600000
dtype: float64


In [None]:
macro

Unnamed: 0,precision,recall,f1-score,support
0,0.949747,0.971918,0.959917,212
1,0.941613,0.954968,0.947958,212
2,0.913968,0.941897,0.926261,212
3,0.94082,0.95453,0.947333,211
4,0.95898,0.933119,0.94499,211
5,0.981132,0.948276,0.963112,211
6,0.968414,0.937388,0.951382,211
7,0.968377,0.973182,0.970741,211
8,0.972955,0.967997,0.970436,211
9,0.933444,0.93789,0.93563,211


In [None]:
print(np.mean(macro, axis=0))

precision      0.952945
recall         0.952117
f1-score       0.951776
support      211.300000
dtype: float64


In [None]:
weighted

Unnamed: 0,precision,recall,f1-score,support
0,0.969033,0.966981,0.967378,212
1,0.958634,0.957547,0.957863,212
2,0.943004,0.938679,0.939678,212
3,0.95846,0.957346,0.957672,211
4,0.957502,0.957346,0.956728,211
5,0.972637,0.971564,0.971062,211
6,0.962809,0.962085,0.961437,211
7,0.976478,0.976303,0.976364,211
8,0.976233,0.976303,0.976241,211
9,0.948184,0.947867,0.948,211


In [None]:
print(np.mean(weighted, axis=0))

precision      0.962297
recall         0.961202
f1-score       0.961242
support      211.300000
dtype: float64


In [None]:
acc

Unnamed: 0,accuracy
0,0.966981
1,0.957547
2,0.938679
3,0.957346
4,0.957346
5,0.971564
6,0.962085
7,0.976303
8,0.976303
9,0.947867


In [None]:
print(np.mean(acc, axis=0))

accuracy    0.961202
dtype: float64


### RoBERTuito Model with 3 labels: Positive, Negative, Neutral

##### Tunning Process with Wandb

In [None]:
import wandb

wandb.login()

In [None]:
id2label = {0: "NEG", 1: "NEU", 2: "POS"}
label2id = {"NEG": 0, "NEU": 1, "POS": 2}

#NEG is Negative Polarity
#POS is Positive Polarity
#NEU is Neutral Polarity

model = AutoModelForSequenceClassification.from_pretrained('pysentimiento/robertuito-sentiment-analysis', num_labels=3, id2label=id2label, label2id=label2id, ignore_mismatched_sizes=True)
tokenizer = AutoTokenizer.from_pretrained('pysentimiento/robertuito-sentiment-analysis')
data_collator = DataCollatorWithPadding(tokenizer)


dataset = load_corpus_Salud(drop=0, n_labels=3, task="sentiment")
train, test = train_test_split(dataset, stratify=dataset["label"])
train.to_csv("corpus_train.csv", index=False)
test.to_csv("corpus_test.csv", index=False)
train_test = load_dataset("csv", data_files={"train":"./corpus_train.csv", "test":"./corpus_test.csv"})
tokenized_datasets = train_test.map(tokenize_function, batched=True)

In [None]:
config = {
          "wandb":{
            "project": "wandb-3labels-MentalHealt",
            "api_key": "XXXXXXXXXXXX"
        }
}


def wandb_hp_space(trial):
    return {
        "method": "random",
        "metric": {"name": "accuracy", "goal": "maximize"},
        "parameters": {
            "learning_rate": {"distribution": "uniform", "min": 1e-6, "max": 1e-4},
            "per_device_train_batch_size": {"values": [16, 32, 64, 128]},
            "per_device_eval_batch_size": {"values": [16, 32, 64, 128]},
            "num_train_epochs" : {"distribution": "int_uniform", "min": 5, "max": 15}
        },
    }

def model_init(trial):
    return AutoModelForSequenceClassification.from_pretrained(
        'pysentimiento/robertuito-sentiment-analysis',
        config=config
    )


training_args = TrainingArguments(report_to="wandb", output_dir="wandb-3labels", overwrite_output_dir=True,
       evaluation_strategy="epoch", per_device_train_batch_size = 128, per_device_eval_batch_size=128, num_train_epochs = 10,
                                    fp16=True, save_strategy="epoch", load_best_model_at_end = True, metric_for_best_model="accuracy") #necesario para earlyStop

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
    compute_metrics=compute_metrics,
    tokenizer=tokenizer,
    model_init=model_init,
    data_collator=data_collator,
    callbacks=[earlyStop]
)

best_trial = trainer.hyperparameter_search(
    direction="maximize",
    backend="wandb",
    hp_space=wandb_hp_space,
    n_trials=30
)


wandb.finish()

##### KFold Process with the optimal hyperparameters

In [None]:
dataset = load_corpus_Salud(drop=0, n_labels=3, task="sentiment")
id2label = {0: "NEG", 1: "NEU", 2: "POS"}
label2id = {"NEG": 0, "NEU": 1, "POS": 2}




splits=10
#dataframes
NEG = pd.DataFrame(columns=["precision", "recall", "f1-score", "support"])
NEU = pd.DataFrame(columns=["precision", "recall", "f1-score", "support"])
POS = pd.DataFrame(columns=["precision", "recall", "f1-score", "support"])
macro = pd.DataFrame(columns=["precision", "recall", "f1-score", "support"])
weighted = pd.DataFrame(columns=["precision", "recall", "f1-score", "support"])
acc= pd.DataFrame(columns=["accuracy"])

i=0
kf = StratifiedKFold(n_splits = splits, random_state=10, shuffle=True)
for train, test in kf.split(X=dataset["text"], y=dataset["label"]):
  print("\n\n******************LOOP NUMBER", i, "**************************\n\n")


  #The model must be created again in each iteration because otherwise the following executions are made with the weights of previous executions, giving overfitting
  model = AutoModelForSequenceClassification.from_pretrained('pysentimiento/robertuito-sentiment-analysis', num_labels=3, id2label=id2label, label2id=label2id, ignore_mismatched_sizes=True)
  tokenizer = AutoTokenizer.from_pretrained('pysentimiento/robertuito-sentiment-analysis')
  data_collator = DataCollatorWithPadding(tokenizer)

  #These are the best values for the training arguments (selected in the previous tuning process with wandb)
  training_args = TrainingArguments(learning_rate=0.00008759, num_train_epochs=10, per_device_train_batch_size = 16, per_device_eval_batch_size=32, #best-sweep-17
                                  output_dir="best-sweep-17", evaluation_strategy="epoch", fp16=True, save_strategy="epoch", load_best_model_at_end = True, metric_for_best_model="accuracy") #necesario para earlyStop

  dataset.iloc[train].to_csv("corpus_train.csv", index=False)
  dataset.iloc[test].to_csv("corpus_test.csv", index=False)
  train_test = load_dataset("csv", data_files={"train":"./corpus_train.csv", "test":"./corpus_test.csv"})
  tokenized_datasets = train_test.map(tokenize_function, batched=True)


  trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_datasets["train"],
        eval_dataset=tokenized_datasets["test"],
        compute_metrics=compute_metrics,
        data_collator=data_collator,
        callbacks=[earlyStop]
  )
  trainer.train()
  pred=trainer.predict(tokenized_datasets["test"])
  logits=pred.predictions
  prob=tf.nn.softmax(logits)
  res=np.argmax(prob, axis=1) #predicciones

  report = classification_report(tokenized_datasets["test"]["label"], res, output_dict=True)
  NEG.loc[i] = report["0"]
  NEU.loc[i] = report["1"]
  POS.loc[i] = report["2"]
  macro.loc[i] = report["macro avg"]
  weighted.loc[i] = report["weighted avg"]
  acc.loc[i] = report["accuracy"]
  i+=1




In [None]:
NEG

Unnamed: 0,precision,recall,f1-score,support
0,0.859375,0.932203,0.894309,59
1,0.830769,0.915254,0.870968,59
2,0.847458,0.847458,0.847458,59
3,0.916667,0.948276,0.932203,58
4,0.960784,0.844828,0.899083,58
5,0.876923,0.982759,0.926829,58
6,0.918033,0.949153,0.933333,59
7,0.887097,0.932203,0.909091,59
8,0.981132,0.881356,0.928571,59
9,0.918033,0.949153,0.933333,59


In [None]:
print(np.mean(NEG, axis=0))

precision     0.899627
recall        0.918264
f1-score      0.907518
support      58.700000
dtype: float64


In [None]:
NEU

Unnamed: 0,precision,recall,f1-score,support
0,0.6875,0.647059,0.666667,17
1,1.0,0.294118,0.454545,17
2,0.888889,0.470588,0.615385,17
3,0.875,0.777778,0.823529,18
4,0.8125,0.722222,0.764706,18
5,0.833333,0.555556,0.666667,18
6,0.846154,0.647059,0.733333,17
7,0.769231,0.588235,0.666667,17
8,0.714286,0.588235,0.645161,17
9,0.875,0.823529,0.848485,17


In [None]:
print(np.mean(NEU, axis=0))

precision     0.830189
recall        0.611438
f1-score      0.688514
support      17.300000
dtype: float64


In [None]:
POS

Unnamed: 0,precision,recall,f1-score,support
0,0.979866,0.954248,0.966887,153
1,0.943396,0.980392,0.961538,153
2,0.931677,0.980392,0.955414,153
3,0.96732,0.96732,0.96732,153
4,0.925926,0.980392,0.952381,153
5,0.980263,0.973856,0.977049,153
6,0.961039,0.973684,0.96732,152
7,0.96732,0.973684,0.970492,152
8,0.944099,1.0,0.971246,152
9,0.966887,0.960526,0.963696,152


In [None]:
print(np.mean(POS, axis=0))

precision      0.956779
recall         0.974450
f1-score       0.965334
support      152.600000
dtype: float64


In [None]:
macro

Unnamed: 0,precision,recall,f1-score,support
0,0.842247,0.844504,0.842621,229
1,0.924722,0.729921,0.762351,229
2,0.889341,0.766146,0.806085,229
3,0.919662,0.897791,0.907684,229
4,0.899737,0.849147,0.872056,229
5,0.89684,0.83739,0.856848,229
6,0.908409,0.856632,0.877996,228
7,0.874549,0.831374,0.84875,228
8,0.879839,0.823197,0.848326,228
9,0.919973,0.911069,0.915172,228


In [None]:
print(np.mean(macro, axis=0))

precision      0.895532
recall         0.834717
f1-score       0.853789
support      228.600000
dtype: float64


In [None]:
weighted

Unnamed: 0,precision,recall,f1-score,support
0,0.927118,0.925764,0.925901,229
1,0.918581,0.912664,0.900567,229
2,0.906802,0.908297,0.902358,229
3,0.947234,0.947598,0.947124,229
4,0.925839,0.925764,0.92413,229
5,0.942541,0.943231,0.939933,229
6,0.941344,0.942982,0.941079,228
7,0.931791,0.934211,0.931949,228
8,0.936547,0.938596,0.93589,228
9,0.947394,0.947368,0.947249,228


In [None]:
print(np.mean(weighted, axis=0))

precision      0.932519
recall         0.932648
f1-score       0.929618
support      228.600000
dtype: float64


In [None]:
acc

Unnamed: 0,accuracy
0,0.925764
1,0.912664
2,0.908297
3,0.947598
4,0.925764
5,0.943231
6,0.942982
7,0.934211
8,0.938596
9,0.947368


In [None]:
print(np.mean(acc, axis=0))

accuracy    0.932648
dtype: float64
