# EMO task

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Dependencies

In [3]:
!pip install transformers -q
!pip install accelerate -U -q
!pip install datasets -q
!pip install torch-summary -q
!pip install graphviz -q
!pip install torchview -q
!pip install bertviz -q

repo_path = "https://raw.githubusercontent.com/HLT-Ghisolfi-Leuzzi-Testa/WASSA-2023/"
branch = "main"

[0m[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
aiobotocore 2.5.0 requires botocore<1.29.77,>=1.29.76, but you have botocore 1.29.165 which is incompatible.[0m[31m
[0m

In [9]:
utils_url = f"{repo_path}{branch}/utils.py"
evaluation_url = f"{repo_path}{branch}/evaluation.py"

import os
if os.path.exists("utils.py"):
  !rm "utils.py"
if os.path.exists("evaluation.py"):
  !rm "evaluation.py"

!wget {utils_url}
!wget {evaluation_url}

--2023-07-06 18:03:59--  https://raw.githubusercontent.com/HLT-Ghisolfi-Leuzzi-Testa/WASSA-2023/main/utils.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.109.133, 185.199.110.133, 185.199.108.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.109.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 19809 (19K) [text/plain]
Saving to: ‘utils.py’


2023-07-06 18:03:59 (28.3 MB/s) - ‘utils.py’ saved [19809/19809]

--2023-07-06 18:04:00--  https://raw.githubusercontent.com/HLT-Ghisolfi-Leuzzi-Testa/WASSA-2023/main/evaluation.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.108.133, 185.199.111.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 10675 (10K) [text/plain]
Saving to: ‘evaluation.py’


2023-07-06 18:04:00 (43.1 MB/s) - ‘evaluation.

In [11]:
import json
import torch
from torch import nn
from torch.nn import BCEWithLogitsLoss
import pandas as pd
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModel
from transformers import TrainingArguments, Trainer, TrainerCallback, EarlyStoppingCallback
from transformers import BertPreTrainedModel, BertModel
from transformers import RobertaModel,RobertaPreTrainedModel
from transformers.modeling_outputs import SequenceClassifierOutput
import importlib
import sys
from utils import *
importlib.reload(sys.modules['utils'])

<module 'utils' from '/kaggle/working/utils.py'>

In [12]:
# set CUDA if available
if torch.cuda.is_available():
    device = torch.device('cuda')
    print("======= CUDA Available =======")
else:
    device = torch.device('cpu')
    print("======= CUDA NOT Available, run on CPU =======")
device = torch.device('cpu') # otw goes out of memory



## Dataset

Dataset paths

In [13]:
TRAIN_DATA = f"{repo_path}{branch}/datasets/WASSA23_essay_level_internal_train_preproc.tsv" # f"{repo_path}{branch}/datasets/WASSA23_essay_level_internal_train_preproc.tsv"
VAL_DATA = f"{repo_path}{branch}/datasets/WASSA23_essay_level_internal_val_preproc.tsv"
DEV_DATA = f"{repo_path}{branch}/datasets/WASSA23_essay_level_dev_preproc.tsv"

Read dataframes

In [14]:
train_df = pd.read_csv(TRAIN_DATA, sep='\t')
val_df = pd.read_csv(VAL_DATA, sep='\t')
dev_df = pd.read_csv(DEV_DATA, sep='\t')

In [15]:
label_encoder = EmotionsLabelEncoder()
label_encoder.fit(train_df.emotion)

Optional subsample

In [16]:
train_df = train_df
val_df = val_df
dev_df = dev_df

In [17]:
y_train = label_encoder.encode(train_df.emotion)
y_val = label_encoder.encode(val_df.emotion)
y_dev = label_encoder.encode(dev_df.emotion)

Encode targets

In [18]:
features_list = ['anger_count', 'disgust_count', 'fear_count', 'joy_count', 'sadness_count', 'surprise_count', 'hope_count']
features_train =  np.array(train_df[features_list])
features_val =  np.array(val_df[features_list])
features_dev =  np.array(dev_df[features_list])

## Model

In [19]:
model_type ={
  'distilroberta-emotion':'j-hartmann/emotion-english-distilroberta-base',
  'roberta-emotion':'j-hartmann/emotion-english-roberta-large',
  'bert-base':'bert-base-cased'
}

In [20]:
NUM_LABELS = 8

model_config = {
    'model_id': 'bert_lexicon',
    'tokenizer_name': model_type.get('bert-base'),
    'model_name': model_type.get('bert-base'),
    'train_batch_size': 4,
    'val_batch_size': 4,
    'learning_rate': 5e-5,
    'weight_decay': 0,
    'epochs': 10,
    'seed': 42,
    'patience': 10,
    'early_stopping_threshold': 0
} # TODO: expand...

In [21]:
tokenizer = AutoTokenizer.from_pretrained(model_config['tokenizer_name'], truncation=True)

Downloading (…)okenizer_config.json:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/436k [00:00<?, ?B/s]

**Baseline model**

In [22]:
model = AutoModelForSequenceClassification.from_pretrained(
    'bert-base-cased',
    num_labels=NUM_LABELS,
    ignore_mismatched_sizes=True,
    problem_type="multi_label_classification")
model.to(device)

Downloading model.safetensors:   0%|          | 0.00/436M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initi

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(28996, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12,

**Custom "lexicon" Bert model**

In [23]:
"""num_features=0# features_train.shape[1]
empathy_lexicon = True
distress_lexicon = True
emotion_lexicon = True
class BertPerWordLexiconPooling(BertPreTrainedModel):
  def __init__(self, config):
    super().__init__(config)
    self.num_labels = config.num_labels
    self.config = config
    self.n_features = num_features
    self.empathy = 1 if empathy_lexicon else 0
    self.distress = 1 if distress_lexicon else 0
    self.emotion = 7 if emotion_lexicon else 0
    self.bert = BertModel(config)
    self.dropout = nn.Dropout(0.3)
    self.classifier = nn.Linear(config.hidden_size+self.n_features+self.empathy+self.distress+self.emotion, config.num_labels)
    self.post_init()

  def forward(
    self,
    input_ids=None,
    attention_mask=None,
    token_type_ids=None,
    position_ids=None,
    head_mask=None,
    inputs_embeds=None,
    labels=None,
    output_attentions=None,
    output_hidden_states=None,
    return_dict=None,
    features=None,
    empathy_values=None,
    distress_values=None,
    emotion_values=None,
  ):
    return_dict = return_dict if return_dict is not None else self.config.use_return_dict

    outputs = self.bert(
      input_ids,
      attention_mask=attention_mask,
      token_type_ids=token_type_ids,
      position_ids=position_ids,
      head_mask=head_mask,
      inputs_embeds=inputs_embeds,
      output_attentions=output_attentions,
      output_hidden_states=output_hidden_states,
      return_dict=return_dict,
    )

    if empathy_values is not None:
      output = torch.cat(
          (
            outputs.last_hidden_state,
            empathy_values.reshape(outputs.last_hidden_state.shape[0], outputs.last_hidden_state.shape[1], 1)
          ), dim=2)
    if distress_values is not None:
      output = torch.cat(
          (
            output,
            distress_values.reshape(output.shape[0], output.shape[1], 1)
          ), dim=2)
    if emotion_values is not None:
      output = torch.cat(
          (
            output,
            emotion_values.reshape(output.shape[0], output.shape[1], 7)
          ), dim=2)
    #pooled_output = output.sum(axis=1) / attention_mask.sum(axis=-1).unsqueeze(-1) # Questo funziona peggio
    input_mask_expanded = attention_mask.unsqueeze(-1).expand(output.size()).float()
    sum_embeddings = torch.sum(output * input_mask_expanded, 1)
    sum_mask = input_mask_expanded.sum(1)
    sum_mask = torch.clamp(sum_mask, min = 1e-9)
    pooled_output = sum_embeddings/sum_mask


    pooled_output = self.dropout(pooled_output)
    if features is not None:
      pooled_output = torch.cat((pooled_output, features), dim=1)
    logits = self.classifier(pooled_output)
    loss = None
    if labels is not None:
      loss_fct = BCEWithLogitsLoss()
      loss = loss_fct(logits, labels)
    if not return_dict:
      output = (logits,) + outputs[2:]
      return ((loss,) + output) if loss is not None else output

    return SequenceClassifierOutput(
      loss=loss,
      logits=logits,
      hidden_states=outputs.hidden_states,
      attentions=outputs.attentions,
    )"""

'num_features=0# features_train.shape[1]\nempathy_lexicon = True\ndistress_lexicon = True\nemotion_lexicon = True\nclass BertPerWordLexiconPooling(BertPreTrainedModel):\n  def __init__(self, config):\n    super().__init__(config)\n    self.num_labels = config.num_labels\n    self.config = config\n    self.n_features = num_features\n    self.empathy = 1 if empathy_lexicon else 0\n    self.distress = 1 if distress_lexicon else 0\n    self.emotion = 7 if emotion_lexicon else 0\n    self.bert = BertModel(config)\n    self.dropout = nn.Dropout(0.3)\n    self.classifier = nn.Linear(config.hidden_size+self.n_features+self.empathy+self.distress+self.emotion, config.num_labels)\n    self.post_init()\n\n  def forward(\n    self,\n    input_ids=None,\n    attention_mask=None,\n    token_type_ids=None,\n    position_ids=None,\n    head_mask=None,\n    inputs_embeds=None,\n    labels=None,\n    output_attentions=None,\n    output_hidden_states=None,\n    return_dict=None,\n    features=None,\n    e

In [24]:
num_features=0 # features_train.shape[1]
empathy_lexicon = True
distress_lexicon = True
emotion_lexicon = True
class BertPerWordLexicon(BertPreTrainedModel):
  def __init__(self, config):
    super().__init__(config)
    self.num_labels = config.num_labels
    self.config = config
    self.n_features = num_features
    self.empathy = 1 if empathy_lexicon else 0
    self.distress = 1 if distress_lexicon else 0
    self.emotion = 7 if emotion_lexicon else 0
    self.bert = BertModel(config)
    self.dropout = nn.Dropout(0.3)
    self.classifier_layer1 = nn.Linear((config.hidden_size+self.n_features+self.empathy+self.distress+self.emotion)*512,
                                       (config.hidden_size+self.n_features+self.empathy+self.distress+self.emotion)*int(512/2)) #TODO: cambiare 512
    self.tanh_layer = nn.Tanh()
    self.classifier_layer2 = nn.Linear((config.hidden_size+self.n_features+self.empathy+self.distress+self.emotion)*int(512/2), config.num_labels)
    self.post_init()

  def forward(
    self,
    input_ids=None,
    attention_mask=None,
    token_type_ids=None,
    position_ids=None,
    head_mask=None,
    inputs_embeds=None,
    labels=None,
    output_attentions=None,
    output_hidden_states=None,
    return_dict=None,
    features=None,
    empathy_values=None,
    distress_values=None,
    emotion_values=None,
  ):
    return_dict = return_dict if return_dict is not None else self.config.use_return_dict

    outputs = self.bert(
      input_ids,
      attention_mask=attention_mask,
      token_type_ids=token_type_ids,
      position_ids=position_ids,
      head_mask=head_mask,
      inputs_embeds=inputs_embeds,
      output_attentions=output_attentions,
      output_hidden_states=output_hidden_states,
      return_dict=return_dict,
    )

    if empathy_values is not None:
      output = torch.cat(
          (
            outputs.last_hidden_state,
            empathy_values.reshape(outputs.last_hidden_state.shape[0], outputs.last_hidden_state.shape[1], 1)
          ), dim=2)
    if distress_values is not None:
      output = torch.cat(
          (
            output,
            distress_values.reshape(output.shape[0], output.shape[1], 1)
          ), dim=2)
    if emotion_values is not None:
      output = torch.cat(
          (
            output,
            emotion_values.reshape(output.shape[0], output.shape[1], 7)
          ), dim=2)
    output_drop2 = self.dropout(output)
    output_reshaped = output_drop2.reshape(-1, 512*777) #TODO: parametrizzare

    output_drop2 = self.dropout(output_reshaped)
    if features is not None:
      output_drop = torch.cat((output_drop, features), dim=1)
    logits1 = self.classifier_layer1(output_drop)
    output1 = self.dropout(self.tanh_layer(logits1))
    logits = self.classifier_layer2(output1)

    loss = None
    if labels is not None:
      loss_fct = BCEWithLogitsLoss()
      loss = loss_fct(logits, labels)
    if not return_dict:
      output = (logits,) + outputs[2:]
      return ((loss,) + output) if loss is not None else output

    return SequenceClassifierOutput(
      loss=loss,
      logits=logits,
      hidden_states=outputs.hidden_states,
      attentions=outputs.attentions,
    )

In [1]:
model = BertPerWordLexicon.from_pretrained(model_type.get('bert-base'),num_labels=NUM_LABELS)
model.to(device)

NameError: name 'BertPerWordLexicon' is not defined

In [None]:
class WASSADataset(Dataset):
    '''
    This class is used to create a pytorch dataset for the EMO task.
    '''

    def __init__(
        self,
        tokenizer,
        essay,
        targets,
        features=None, # np.array([[], []])
        essay_EMP_lexicon=None, # {'empathy': [, ... ,], 'ditress': [, ..., ]}
        essay_EMO_lexicon=None, # {'anger': [, ... ,], ..., 'sadness': [, ..., ]}
        ):
        self.tokenizer = tokenizer
        self.essay = essay
        self.targets = targets
        self.essay_EMP_lexicon = essay_EMP_lexicon
        self.essay_EMO_lexicon = essay_EMO_lexicon
        self.features = features

    def __len__(self):
        return len(self.essay)

    def __getitem__(self, index):
      essay = str(self.essay[index])
      inputs = tokenizer(
          text=essay,
          add_special_tokens=True,
          padding='max_length',
          truncation=True,
          return_attention_mask=True,
          return_tensors='pt',
          return_token_type_ids=True
        )

      tokens_empathy = np.full(tokenizer.model_max_length, 4.0)
      tokens_distress = np.zeros(tokenizer.model_max_length)
      tokens_emotions = np.zeros((7, tokenizer.model_max_length))

      item = {
        'input_ids': inputs['input_ids'].flatten(),
        'attention_mask': inputs['attention_mask'].flatten(),
        'token_type_ids': inputs["token_type_ids"].flatten()
      }
      if self.features is not None:
        item['features'] = torch.FloatTensor(self.features[index])
      if self.targets is not None:
        item['labels'] = torch.FloatTensor(self.targets[index])
      if self.essay_EMP_lexicon is None and self.essay_EMO_lexicon is None:
        return item
      word_count=0
      first_char=True
      last_char_is_space=False
      for char_idx, char in enumerate(essay):
        token_idx = inputs.char_to_token(char_idx)
        if token_idx is None and not first_char:
          if not last_char_is_space:
            word_count+=1
            last_char_is_space=True
          continue
        elif last_char_is_space:
          last_char_is_space=False
        first_char=False
        if self.essay_EMP_lexicon is not None:
          """try:
            tokens_empathy[token_idx] = self.essay_EMP_lexicon[index]['empathy'][word_count]
          except IndexError:
            print(f"index: {index}")
            print(f"token_idx: {token_idx}")
            print(f"word count: {word_count} / {len(self.essay_EMP_lexicon[index]['empathy'])}")
            print(char_idx)
            print(char)"""
          tokens_empathy[token_idx] = self.essay_EMP_lexicon[index]['empathy'][word_count]
          tokens_distress[token_idx] = self.essay_EMP_lexicon[index]['distress'][word_count]
        if self.essay_EMO_lexicon is not None:
          for i, emotion in enumerate(self.essay_EMO_lexicon[index]):
            tokens_emotions[i][token_idx] = self.essay_EMO_lexicon[index][emotion][word_count]
      if self.essay_EMP_lexicon is not None:
        item['empathy_values'] = torch.FloatTensor(tokens_empathy)
        item['distress_values'] = torch.FloatTensor(tokens_distress)
        #print(tokens_empathy[:10])
        #print(tokens_distress[:10])
      if self.essay_EMO_lexicon is not None:
        item['emotion_values'] = torch.FloatTensor(tokens_emotions)
        #print(tokens_emotions[:,:10])
      return item

In [None]:
EMO_json_path_train = 'https://raw.githubusercontent.com/HLT-Ghisolfi-Leuzzi-Testa/WASSA-2023/main/lexicon/train_per_word_lexicon_EMO.json'
EMP_json_path_train = 'https://raw.githubusercontent.com/HLT-Ghisolfi-Leuzzi-Testa/WASSA-2023/main/lexicon/train_per_word_lexicon_EMP.json'
EMO_json_path_dev = 'https://raw.githubusercontent.com/HLT-Ghisolfi-Leuzzi-Testa/WASSA-2023/main/lexicon/dev_per_word_lexicon_EMO.json'
EMP_json_path_dev = 'https://raw.githubusercontent.com/HLT-Ghisolfi-Leuzzi-Testa/WASSA-2023/main/lexicon/dev_per_word_lexicon_EMP.json'
EMO_json_path_test = 'https://raw.githubusercontent.com/HLT-Ghisolfi-Leuzzi-Testa/WASSA-2023/main/lexicon/test_per_word_lexicon_EMO.json'
EMP_json_path_test = 'https://raw.githubusercontent.com/HLT-Ghisolfi-Leuzzi-Testa/WASSA-2023/main/lexicon/test_per_word_lexicon_EMP.json'

!wget {EMO_json_path_train}
!wget {EMP_json_path_train}
!wget {EMO_json_path_dev}
!wget {EMP_json_path_dev}
!wget {EMO_json_path_test}
!wget {EMP_json_path_test}

In [None]:
import json

with open('/content/train_per_word_lexicon_EMO.json') as json_file:
  essay_EMO_lexicon_train_dict = json.load(json_file)
with open('/content/train_per_word_lexicon_EMP.json') as json_file:
  essay_EMP_lexicon_train_dict = json.load(json_file)

with open('/content/dev_per_word_lexicon_EMO.json') as json_file:
  essay_EMO_lexicon_dev_dict = json.load(json_file)
with open('/content/dev_per_word_lexicon_EMP.json') as json_file:
  essay_EMP_lexicon_dev_dict = json.load(json_file)

with open('/content/test_per_word_lexicon_EMO.json') as json_file:
  essay_EMO_lexicon_test_dict = json.load(json_file)
with open('/content/test_per_word_lexicon_EMP.json') as json_file:
  essay_EMP_lexicon_test_dict = json.load(json_file)


In [None]:
essay_EMP_lexicon_train = [essay_EMP_lexicon_train_dict[str(id)] for id in train_df['essay_id']]
essay_EMO_lexicon_train = [essay_EMO_lexicon_train_dict[str(id)] for id in train_df['essay_id']]

essay_EMP_lexicon_val = [essay_EMP_lexicon_train_dict[str(id)] for id in val_df['essay_id']]
essay_EMO_lexicon_val = [essay_EMO_lexicon_train_dict[str(id)] for id in val_df['essay_id']]

essay_EMP_lexicon_dev = [essay_EMP_lexicon_dev_dict[str(id)] for id in dev_df['essay_id']]
essay_EMO_lexicon_dev = [essay_EMO_lexicon_dev_dict[str(id)] for id in dev_df['essay_id']]

In [None]:
train_set = WASSADataset(tokenizer=tokenizer, essay=train_df.essay, targets=y_train, features=None, essay_EMP_lexicon=essay_EMP_lexicon_train, essay_EMO_lexicon=essay_EMO_lexicon_train)
val_set = WASSADataset(tokenizer=tokenizer, essay=val_df.essay, targets=y_val, features=None, essay_EMP_lexicon=essay_EMP_lexicon_val, essay_EMO_lexicon=essay_EMO_lexicon_val)
dev_set = WASSADataset(tokenizer=tokenizer, essay=dev_df.essay, targets=y_dev, features=None, essay_EMP_lexicon=essay_EMP_lexicon_dev, essay_EMO_lexicon=essay_EMO_lexicon_dev)

## Training

Prepare datasets

In [None]:
"""train_set = EMODataset(tokenizer=tokenizer, essay=train_df.essay, targets=y_train)#, features=features_train)
val_set = EMODataset(tokenizer=tokenizer, essay=val_df.essay, targets=y_val)#, features=features_val)
dev_set = EMODataset(tokenizer=tokenizer, essay=dev_df.essay, targets=y_dev)#, features=features_dev)"""

In [None]:
"""train_set = EMODataset(tokenizer=tokenizer, essay=train_df.essay, targets=y_train, features=features_train)
val_set = EMODataset(tokenizer=tokenizer, essay=val_df.essay, targets=y_val, features=features_val)
dev_set = EMODataset(tokenizer=tokenizer, essay=dev_df.essay, targets=y_dev, features=features_dev)"""

Set up training

In [None]:
train_arguments = TrainingArguments(
    output_dir=f"./{model_config['model_name']}",
    per_device_train_batch_size=model_config['train_batch_size'],
    per_device_eval_batch_size=model_config['val_batch_size'],
    num_train_epochs=model_config['epochs'],
    evaluation_strategy="steps",
    save_strategy="steps",
    eval_steps = 300,
    save_steps = 300,
    learning_rate=model_config['learning_rate'],
    weight_decay=model_config['weight_decay'],
    load_best_model_at_end=True,
    metric_for_best_model='eval_loss',
    seed=model_config['seed'],
    logging_strategy = "epoch"
) # TODO: custom other params

In [None]:
trainer = Trainer(
    model=model,
    args=train_arguments,
    train_dataset=train_set,
    eval_dataset=val_set,
    tokenizer=tokenizer,
    compute_metrics=compute_EMO_metrics_trainer
)

Callbacks

In [None]:
class TrainerLoggingCallback(TrainerCallback):
    def __init__(self, log_path):
        self.log_path = log_path

    def on_log(self, args, state, control, logs=None, **kwargs):
        _ = logs.pop("total_flos", None)
        if state.is_local_process_zero: # whether this process is the main one in a distributed setting
            with open(self.log_path, "a") as f:
                f.write(json.dumps(logs) + "\n")

trainer.add_callback(EarlyStoppingCallback(
    early_stopping_patience=model_config['patience'],
    early_stopping_threshold=model_config['early_stopping_threshold']))

trainer.add_callback(TrainerLoggingCallback(model_config['model_id']+"_log.json"))

Start training

In [None]:
trainer.train()

In [None]:
# Access the training logs
train_logs = trainer.state.log_history

# Extract the loss values from the logs
train_loss_values = [log.get('loss') for log in train_logs if log.get('loss') is not None]
eval_loss_values = [log.get('eval_loss') for log in train_logs if log.get('eval_loss') is not None]
train_epochs = [log.get('epoch') for log in train_logs if log.get('loss') is not None]
eval_epochs = [log.get('epoch') for log in train_logs if log.get('eval_loss') is not None]

In [None]:
train_logs

In [None]:
plot_loss_curve(train_loss_values, eval_loss_values, loss_epochs, eval_epochs,"loss", f"{model_config['model_name']}_loss.png")

## Evaluation

In [None]:
def predict_emotions(results, gold_emotions):

  binarized_predictions = np.where(results.predictions >= 0.5, 1, 0)

  for i, bin_pred in enumerate(binarized_predictions):
    if np.all(bin_pred==0):
      binarized_predictions[i][np.argmax(results.predictions[i])] = 1

  predicted_emotions = label_encoder.decode(binarized_predictions)
  return predicted_emotions


In [None]:
trainer.state.best_model_checkpoint

In [None]:
outs = trainer.predict(dev_set)

In [None]:
gold_emotions = gold_emotions = label_encoder.decode(outs.label_ids)
predicted_emotions = predict_emotions(outs, gold_emotions)

In [None]:
write_EMO_predictions(predicted_emotions, model_config['model_id']+"_predictions_EMO.tsv")
challenge_metrics = compute_EMO_metrics(golds=gold_emotions, predictions=predicted_emotions)
write_dict_to_json(challenge_metrics, model_config['model_id']+"_dev_metrics.json")
challenge_metrics

In [None]:
plot_confusion_matrix(golds=gold_emotions, predictions=predicted_emotions, path=model_config['model_id']+"_confusion_matrix.pdf", title=model_config['model_id'])

In [None]:
plot_model_graph(model=model, input_data=tokenizer("Hello world!", return_tensors="pt"), path=model_config['model_id']+"_graph")

In [None]:
print_model_summary(model=model, path=model_config['model_id']+"_summary.txt")

## Save model on Google Drive

In [None]:
trainer.state.best_model_checkpoint

In [None]:
!mv $trainer.state.best_model_checkpoint /content/drive/MyDrive/hlt

## Inference

In [None]:
MODEL_PATH = "/content/drive/MyDrive/hlt/best-roberta"

In [None]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, truncation=True)
model = AutoModelForSequenceClassification.from_pretrained(
    MODEL_PATH,
    num_labels=NUM_LABELS,
    ignore_mismatched_sizes=True,
    problem_type="multi_label_classification")

load checkpoint

In [None]:
# Load the checkpoint file
checkpoint_file = "./bert-base-cased/checkpoint-1200"
#model = AutoModelForSequenceClassification.from_pretrained(checkpoint_file, num_labels=NUM_LABELS)
model = BertLexicon.from_pretrained(checkpoint_file, num_labels=NUM_LABELS)
trainer = Trainer(model=model)
# Perform prediction using the loaded checkpoint
predictions = trainer.predict(dev_set)

In [None]:
gold_emotions = label_encoder.decode(predictions.label_ids)
predicted_emotions = predict_emotions(predictions, gold_emotions)

In [None]:
write_EMO_predictions(predicted_emotions, model_config['model_id']+"_predictions_EMO.tsv")
challenge_metrics = compute_EMO_metrics(golds=gold_emotions, predictions=predicted_emotions)
write_dict_to_json(challenge_metrics, model_config['model_id']+"_dev_metrics.json")
challenge_metrics