In [125]:
#!pip install torch==1.13.0+cu116 --extra-index-url https://download.pytorch.org/whl/cu116
!pip install transformers==4.30.0
!pip install datasets==2.13.2
!pip install accelerate -U
!pip install evaluate



In [126]:
# system packages
from pathlib import Path
import shutil
import urllib
import tarfile
import sys
import os
# data and numerical management packages
import pandas as pd
import random
import numpy as np
import matplotlib.pyplot as plt

import torch
from torch.utils.data import DataLoader, Dataset
from transformers import BertTokenizer, BertModel, BertForSequenceClassification, AdamW, Trainer, TrainingArguments
from torch.nn.utils.rnn import pad_sequence

import warnings
warnings.filterwarnings('ignore')
# useful during debugging (progress bars)
from tqdm import tqdm
from transformers import set_seed

seed = 852
torch.manual_seed(seed)
random.seed(seed)
np.random.seed(seed)
set_seed(seed)


# Set seed for PyTorch on GPU (if available)
if torch.cuda.is_available():
    torch.cuda.manual_seed(seed)

In [127]:
dataset_folder = Path.cwd().joinpath('sample_data/MELD_train_efr.json')
# dataset_folder = '/kaggle/input/MELD_train_efr.json'
#dataset_path = dataset_folder.joinpath('/MELD_train_efr.json')
#dataset_folder = "/kaggle/input/plaplapla/MELD_train_efr.json"
df = pd.read_json(dataset_folder)
#df['triggers'] = df['triggers'].fillna(value=0, inplace=False)#.replace('None', 0.0)

In [128]:
df

Unnamed: 0,episode,speakers,emotions,utterances,triggers
0,utterance_0,"[Chandler, The Interviewer, Chandler, The Inte...","[neutral, neutral, neutral, neutral, surprise]",[also I was the point person on my company's t...,"[0.0, 0.0, 0.0, 1.0, 0.0]"
1,utterance_1,"[Chandler, The Interviewer, Chandler, The Inte...","[neutral, neutral, neutral, neutral, surprise,...",[also I was the point person on my company's t...,"[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0]"
2,utterance_2,"[Chandler, The Interviewer, Chandler, The Inte...","[neutral, neutral, neutral, neutral, surprise,...",[also I was the point person on my company's t...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ..."
3,utterance_3,"[Chandler, The Interviewer, Chandler, The Inte...","[neutral, neutral, neutral, neutral, surprise,...",[also I was the point person on my company's t...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
4,utterance_4,"[Joey, Rachel, Joey, Rachel]","[surprise, sadness, surprise, fear]",[But then who? The waitress I went out with la...,"[0.0, 0.0, 1.0, 0.0]"
...,...,...,...,...,...
3995,utterance_3995,"[Chandler, All, Monica, Chandler, Ross, Chandl...","[neutral, joy, neutral, neutral, surprise, dis...","[Hey., Hey!, So how was Joan?, I broke up with...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
3996,utterance_3996,"[Chandler, All, Monica, Chandler, Ross, Chandl...","[neutral, joy, neutral, neutral, surprise, dis...","[Hey., Hey!, So how was Joan?, I broke up with...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
3997,utterance_3997,"[Chandler, All, Monica, Chandler, Ross, Chandl...","[neutral, joy, neutral, neutral, surprise, dis...","[Hey., Hey!, So how was Joan?, I broke up with...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
3998,utterance_3998,"[Chandler, All, Monica, Chandler, Ross, Chandl...","[neutral, joy, neutral, neutral, surprise, dis...","[Hey., Hey!, So how was Joan?, I broke up with...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."


In [129]:
triggers = df['triggers']
for row in range(triggers.shape[0]):
    for trigger in range(len(triggers[row])):
        if triggers[row][trigger] == None:
            triggers[row][trigger] = 0.0

df['triggers'] = triggers

## Speakers start

In [78]:
speakers = df['speakers'].explode().unique()
speaker_to_idx = {}
idx_to_speaker = {}
for idx, speaker in enumerate(speakers):
  speaker_to_idx[speaker] = idx
  idx_to_speaker[idx] = speaker

In [79]:
encoded_speakers = []
for dialogue in df['speakers']:
  dialogue_speakers = []
  for speaker in dialogue:
    dialogue_speakers.append(speaker_to_idx[speaker])
  encoded_speakers.append(dialogue_speakers)

## Speakers end



In [130]:
emotions = df['emotions'].explode().unique()
emotions

array(['neutral', 'surprise', 'fear', 'sadness', 'joy', 'disgust',
       'anger'], dtype=object)

In [131]:
triggers = df['triggers'].explode().unique()
triggers

array([0.0, 1.0], dtype=object)

In [82]:
dialogues = df['utterances'][:3200]
max_len_utterance = 0
index = 0
utterances_len = []
for dialogue in dialogues:
  for utterance in dialogue:
     utterances_len.append(len(utterance.split()))
np.mean(np.array(utterances_len))

8.077553661956639

In [132]:
from sklearn.preprocessing import LabelBinarizer
sorted_emotions = sorted(emotions)  #sort the array because Binarizer will automatically do that for one hot encoding
label_binarizer = LabelBinarizer()
label_binarizer.fit(sorted_emotions)

dialogues = df['emotions']
one_hot_emotions = []
for dialogue_emotion in dialogues:
  dialogue_emotions_list = []
  for emotion in dialogue_emotion:
    encoded_emotion=label_binarizer.transform([emotion])
    dialogue_emotions_list.append(np.ravel(encoded_emotion).tolist())
  one_hot_emotions.append(dialogue_emotions_list)

In [133]:
df['emotions'] = one_hot_emotions

In [135]:
from sklearn.model_selection import train_test_split
train_data, test_data = train_test_split(df, train_size=0.5, shuffle=False)
#val_data, test_data = train_test_split(temp_data, test_size=0.5, shuffle=False)
#val_data.reset_index(drop=True, inplace=True)
test_data.reset_index(drop=True, inplace=True)

In [136]:
train_data

Unnamed: 0,episode,speakers,emotions,utterances,triggers
0,utterance_0,"[Chandler, The Interviewer, Chandler, The Inte...","[[0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 1, 0, 0],...",[also I was the point person on my company's t...,"[0.0, 0.0, 0.0, 1.0, 0.0]"
1,utterance_1,"[Chandler, The Interviewer, Chandler, The Inte...","[[0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 1, 0, 0],...",[also I was the point person on my company's t...,"[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0]"
2,utterance_2,"[Chandler, The Interviewer, Chandler, The Inte...","[[0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 1, 0, 0],...",[also I was the point person on my company's t...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ..."
3,utterance_3,"[Chandler, The Interviewer, Chandler, The Inte...","[[0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 1, 0, 0],...",[also I was the point person on my company's t...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
4,utterance_4,"[Joey, Rachel, Joey, Rachel]","[[0, 0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 0, 1, 0],...",[But then who? The waitress I went out with la...,"[0.0, 0.0, 1.0, 0.0]"
...,...,...,...,...,...
1995,utterance_1995,"[A Student, Ross, Ross]","[[0, 0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 0, 0, 1],...","[What's happening to your accent?, Come again?...","[0.0, 0.0, 0.0]"
1996,utterance_1996,"[A Student, Ross, Ross, Ross]","[[0, 0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 0, 0, 1],...","[What's happening to your accent?, Come again?...","[0.0, 0.0, 0.0, 1.0]"
1997,utterance_1997,"[A Student, Ross, Ross, Ross, Ross]","[[0, 0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 0, 0, 1],...","[What's happening to your accent?, Come again?...","[0.0, 0.0, 0.0, 0.0, 1.0]"
1998,utterance_1998,"[A Student, Ross, Ross, Ross, Ross, Ross]","[[0, 0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 0, 0, 1],...","[What's happening to your accent?, Come again?...","[0.0, 0.0, 0.0, 0.0, 0.0, 1.0]"


In [19]:
from transformers import AutoModelForSequenceClassification, RobertaModel
from transformers import TFAutoModelForSequenceClassification
from transformers import AutoTokenizer, AutoConfig

#tokenizer = AutoTokenizer.from_pretrained("j-hartmann/emotion-english-distilroberta-base")
tokenizer = AutoTokenizer.from_pretrained('roberta-base')

#tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
MODEL = 'roberta-base'#"cardiffnlp/twitter-roberta-base-sentiment-latest"
#tokenizer = AutoTokenizer.from_pretrained(MODEL)
#config = AutoConfig.from_pretrained(MODEL)
#model = RobertaModel.from_pretrained(MODEL)#AutoModelForSequenceClassification.from_pretrained(MODEL)


def tokenize_padding(speakers_list, utterances_list):
    tokenized_dialogues = []
    max_length = 128  # Adjust the maximum sequence length as needed
    for speakers, utterances in zip(speakers_list, utterances_list):#, total=len(utterances_list):
        dialogue_with_speakers = []
        for speaker, utterance in zip(speakers, utterances):
            dialogue_with_speakers.append("["+speaker+"] " + utterance)
        tokenized_dialogues.append(dialogue_with_speakers)


    tokenization = []
    for dialogue in tqdm(tokenized_dialogues):
      tokenized_dialogue = tokenizer(
          dialogue,
          #max_length=9,
          padding=True,
          truncation=False,
          return_tensors='pt'
      )
      tokenization.append(tokenized_dialogue)

    input_ids = [dialogue['input_ids'] for dialogue in tokenization]
    attention_mask = [dialogue['attention_mask'] for dialogue in tokenization]
    return input_ids,attention_mask

# Example usage with your dataset:
input_ids_train, attention_mask_train = tokenize_padding(train_data['speakers'], train_data['utterances'])
input_ids_val, attention_mask_val = tokenize_padding(val_data['speakers'], val_data['utterances'])
input_ids_test, attention_mask_test = tokenize_padding(test_data['speakers'], test_data['utterances'])

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

100%|██████████| 3200/3200 [00:03<00:00, 872.66it/s] 
100%|██████████| 400/400 [00:00<00:00, 1105.94it/s]
100%|██████████| 400/400 [00:00<00:00, 1102.41it/s]


In [20]:
#print(input_ids_train[0])
print(tokenizer.decode(input_ids_train[0][-1]))
print(train_data['utterances'][0][-1])


<s>[Chandler] My duties?  All right.</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
My duties?  All right.


In [21]:
"""tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
def tokenize_padding(dialogues):
  tokenized_dialogues = []

  for dialogue in tqdm(dialogues):
      tokenized_dialogue = tokenizer(
          dialogue,
          #max_length=9,
          padding=True,
          truncation=False,
          return_tensors='pt'
      )
      tokenized_dialogues.append(tokenized_dialogue)

  #padded_input_ids = pad_sequence([dialogue['input_ids'] for dialogue in tokenized_dialogues], batch_first=True)
  input_ids = [dialogue['input_ids'] for dialogue in tokenized_dialogues]
  attention_mask = [dialogue['attention_mask'] for dialogue in tokenized_dialogues]
  return input_ids,attention_mask

#padded_input_ids_train,padded_attention_mask_train = tokenize_padding(train_data['utterances'])
input_ids_train,attention_mask_train = tokenize_padding(train_data['utterances'])
input_ids_val,attention_mask_val = tokenize_padding(val_data['utterances'])
input_ids_test,attention_mask_test = tokenize_padding(test_data['utterances'])"""

"tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')\ndef tokenize_padding(dialogues):\n  tokenized_dialogues = []\n\n  for dialogue in tqdm(dialogues):\n      tokenized_dialogue = tokenizer(\n          dialogue,\n          #max_length=9,\n          padding=True,\n          truncation=False,\n          return_tensors='pt'\n      )\n      tokenized_dialogues.append(tokenized_dialogue)\n\n  #padded_input_ids = pad_sequence([dialogue['input_ids'] for dialogue in tokenized_dialogues], batch_first=True)\n  input_ids = [dialogue['input_ids'] for dialogue in tokenized_dialogues]\n  attention_mask = [dialogue['attention_mask'] for dialogue in tokenized_dialogues]\n  return input_ids,attention_mask\n\n#padded_input_ids_train,padded_attention_mask_train = tokenize_padding(train_data['utterances'])\ninput_ids_train,attention_mask_train = tokenize_padding(train_data['utterances'])\ninput_ids_val,attention_mask_val = tokenize_padding(val_data['utterances'])\ninput_ids_test,attention_mask

In [86]:
from sklearn.metrics import f1_score

def compute_sequence_f1(predictions, labels):
    # predictions and labels should be lists of tensors for each dialogue
    emotion_f1_scores = []
    trigger_f1_scores = []
    for emotion_pred, trigger_pred, emotion_lab, trigger_lab in zip(predictions[0], predictions[1], labels[0], labels[1]):
        emotion_predicted_classes = torch.argmax(emotion_pred, dim=1)
        trigger_predicted_classes = torch.argmax(trigger_pred, dim=1)
        emotion_true_classes = torch.argmax(emotion_lab, dim=1)
        trigger_true_classes = trigger_lab
        emotion_f1 = f1_score(emotion_true_classes.cpu().numpy(), emotion_predicted_classes.cpu().numpy(), average='macro')
        trigger_f1 = f1_score(trigger_true_classes.cpu().numpy(), trigger_predicted_classes.cpu().numpy(), average='macro')
        emotion_f1_scores.append(emotion_f1)
        trigger_f1_scores.append(trigger_f1)
    average_emotion_f1 = torch.tensor(emotion_f1_scores, dtype=torch.float32).mean()
    average_trigger_f1 = torch.tensor(trigger_f1_scores, dtype=torch.float32).mean()
    return average_emotion_f1, average_trigger_f1

def compute_unrolled_sequence_f1(predictions, labels):
    # Flatten all utterances and compute the F1 score
    all_emotion_predicted_classes = torch.argmax(torch.cat(predictions[0], dim=0), dim=1)
    all_trigger_predicted_classes = torch.argmax(torch.cat(predictions[1], dim=0), dim=1)
    all_emotion_true_classes = torch.argmax(torch.cat(labels[0], dim=0), dim=1)
    all_trigger_true_classes = torch.cat(labels[1], dim=0)
    unrolled_emotion_f1 = f1_score(all_emotion_true_classes.cpu().numpy(), all_emotion_predicted_classes.cpu().numpy(), average='macro')
    unrolled_trigger_f1 = f1_score(all_trigger_true_classes.cpu().numpy(), all_trigger_predicted_classes.cpu().numpy(), average='macro')
    unrolled_emotion_f1_tensor = torch.tensor(unrolled_emotion_f1, dtype=torch.float32)
    unrolled_trigger_f1_tensor = torch.tensor(unrolled_trigger_f1, dtype=torch.float32)
    return unrolled_emotion_f1_tensor, unrolled_trigger_f1_tensor

In [116]:
class CustomDataset(Dataset):
    def __init__(self, input_ids, attention_mask, emotions, triggers):
        self.input_ids = input_ids
        self.attention_mask = attention_mask
        self.emotions = emotions
        self.triggers = triggers

    def __len__(self):
        return len(self.emotions)

    def __getitem__(self, idx):
        input_ids = self.input_ids[idx]
        attention_mask = self.attention_mask[idx]
        emotion = self.emotions[idx]
        trigger = self.triggers[idx]
        emotion_labels = torch.tensor(emotion, dtype=torch.float32)
        trigger_label = torch.tensor(trigger, dtype=torch.long)
        return {
            'input_ids': input_ids,
            'attention_mask': attention_mask,
            'emotion_labels': emotion_labels,
            'trigger_label': trigger_label
        }

Old CustomBertModel with LSTM Layer

In [24]:
import torch
from transformers import BertModel


class CustomBERTModel(torch.nn.Module):
    def __init__(self, freeze_embeddings=True, hidden_size=128, num_layers=1, bidirectional=True):
        super(CustomBERTModel, self).__init__()
        #self.bert = BertModel.from_pretrained('bert-base-uncased')
        self.bert = RobertaModel.from_pretrained('roberta-base')

        if freeze_embeddings:
            for name, param in self.bert.named_parameters():
                if 'embeddings' in name:
                    param.requires_grad = False
        self.emotion_head = torch.nn.Linear(self.bert.config.hidden_size, len(emotions))

        # Linear layer for trigger classification
        self.trigger_head = torch.nn.Linear(self.bert.config.hidden_size, len(triggers))

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask, return_dict=True)

        pooled_output = outputs['pooler_output']


        emotion_logits = self.emotion_head(pooled_output)
        trigger_logits = self.trigger_head(pooled_output)

        return emotion_logits, trigger_logits

In [25]:
"""class CustomBERTModel(torch.nn.Module):
    def __init__(self, freeze_embeddings=True, hidden_size=128, num_layers=1, bidirectional=True):
        super(CustomBERTModel, self).__init__()
        self.bert = BertModel.from_pretrained('bert-base-uncased')
        if freeze_embeddings:
            for name, param in self.bert.named_parameters():
                if 'embeddings' in name:
                    param.requires_grad = False

        # GRU layer
        self.gru = torch.nn.GRU(input_size=self.bert.config.hidden_size,
                                hidden_size=hidden_size,
                                num_layers=num_layers,
                                bidirectional=bidirectional,
                                batch_first=True)

        self.emotion_head = torch.nn.Linear(hidden_size * 2 if bidirectional else hidden_size, len(emotions))

        # Linear layer for trigger classification
        self.trigger_head = torch.nn.Linear(hidden_size * 2 if bidirectional else hidden_size, len(triggers))

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask, return_dict=True)
        pooled_output = outputs['pooler_output']

        # Pass BERT output through GRU
        gru_output, _ = self.gru(pooled_output.unsqueeze(1))  # Adding the unsqueeze for the sequence dimension

        # Take the output from the last time step
        gru_output_last = gru_output[:, -1, :]

        emotion_logits = self.emotion_head(gru_output_last)
        trigger_logits = self.trigger_head(gru_output_last)

        return emotion_logits, trigger_logits"""

"class CustomBERTModel(torch.nn.Module):\n    def __init__(self, freeze_embeddings=True, hidden_size=128, num_layers=1, bidirectional=True):\n        super(CustomBERTModel, self).__init__()\n        self.bert = BertModel.from_pretrained('bert-base-uncased')\n        if freeze_embeddings:\n            for name, param in self.bert.named_parameters():\n                if 'embeddings' in name:\n                    param.requires_grad = False\n\n        # GRU layer\n        self.gru = torch.nn.GRU(input_size=self.bert.config.hidden_size,\n                                hidden_size=hidden_size,\n                                num_layers=num_layers,\n                                bidirectional=bidirectional,\n                                batch_first=True)\n\n        self.emotion_head = torch.nn.Linear(hidden_size * 2 if bidirectional else hidden_size, len(emotions))\n\n        # Linear layer for trigger classification\n        self.trigger_head = torch.nn.Linear(hidden_size * 2 if bidi

In [26]:
train_dataset = CustomDataset(input_ids_train, attention_mask_train, train_data['emotions'],
                              train_data['triggers'])
#validation_dataset = CustomDataset(input_ids_val, attention_mask_val, val_data['emotions'],
#                             val_data['triggers'])
test_dataset = CustomDataset(input_ids_test, attention_mask_test, test_data['emotions'],
                             test_data['triggers'])

In [94]:
from torch.nn.utils.rnn import pad_sequence

def collate_fn(batch):
    input_ids = [item['input_ids'] for item in batch]
    attention_mask = [item['attention_mask'] for item in batch]
    emotion_labels = [item['emotion_labels'] for item in batch]#torch.stack([item['emotion_labels'] for item in batch], dim=0)
    trigger_label = [item['trigger_label'] for item in batch]#torch.stack([item['trigger_label'] for item in batch], dim=0)

    #input_ids = pad_sequence([torch.stack(item['input_ids']) for item in batch], batch_first=True)
    #attention_mask = pad_sequence([torch.stack(item['attention_mask']) for item in batch], batch_first=True)
    return input_ids,attention_mask,emotion_labels,trigger_label
    #return {'input_ids': input_ids, 'attention_mask': attention_mask, 'emotion_labels': emotion_labels, 'trigger_label': trigger_label

In [41]:
def test_bert(mode,model):

# Usage in the eval loop
  sequence_f1_scores_emotion = []
  sequence_f1_scores_trigger = []
  unrolled_predictions_emotion = []
  unrolled_predictions_trigger = []
  unrolled_labels_emotion = []
  unrolled_labels_trigger = []
  sequence_f1_scores = []

  batch_size = 1
  #test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
  if mode == 'validation':
    loader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=False)
  elif mode == 'test':
    loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
  model.eval()
  with torch.no_grad():
      for batch in tqdm(loader, desc='Evaluation', leave=False):
          input_ids = batch['input_ids'].squeeze().to(device)
          attention_mask = batch['attention_mask'].squeeze().to(device)
          emotion_labels = batch['emotion_labels'].squeeze().to(device)
          trigger_label = batch['trigger_label'].squeeze().to(device)

          emotion_logits, trigger_logits = model(input_ids, attention_mask)

          # Store predictions and labels for later unrolled F1 computation
          unrolled_predictions_emotion.append(emotion_logits)
          unrolled_labels_emotion.append(emotion_labels)
          unrolled_predictions_trigger.append(trigger_logits)
          unrolled_labels_trigger.append(trigger_label)

          # Convert logits to probabilities and then to class predictions
          #predicted_classes = torch.argmax(emotion_logits, dim=1)
          #true_classes = torch.argmax(emotion_labels, dim=1)

          # Compute F1 for the current sequence (dialogue)
          #sequence_f1 = f1_score(true_classes.cpu().numpy(), predicted_classes.cpu().numpy(), average='macro')
          #sequence_f1_scores.append(sequence_f1)

  # Compute the average Sequence F1 for emotions and triggers
  average_sequence_f1_emotion, average_sequence_f1_trigger = compute_sequence_f1(
      [unrolled_predictions_emotion, unrolled_predictions_trigger],
      [unrolled_labels_emotion, unrolled_labels_trigger]
  )

  # Compute the Unrolled Sequence F1 for emotions and triggers
  unrolled_sequence_f1_emotion, unrolled_sequence_f1_trigger = compute_unrolled_sequence_f1(
      [unrolled_predictions_emotion, unrolled_predictions_trigger],
      [unrolled_labels_emotion, unrolled_labels_trigger]
  )
  model.train()
  # Print the F1 scores for emotions and triggers
  print(f"Average Sequence F1 (Emotion):  {average_sequence_f1_emotion:03f}")
  print(f"Average Sequence F1 (Trigger): {average_sequence_f1_trigger:03f}")
  print(f"Unrolled Sequence F1 (Emotion): {unrolled_sequence_f1_emotion.item():03f}")
  print(f"Unrolled Sequence F1 (Trigger): {unrolled_sequence_f1_trigger.item():03f}")
  return average_sequence_f1_emotion, average_sequence_f1_trigger, unrolled_sequence_f1_emotion, unrolled_sequence_f1_trigger

## Bert

In [29]:
criterion = torch.nn.CrossEntropyLoss() # can tinker with the loss function, change to a different one
#criterion_trigger = torch.nn.BCEWithLogitsLoss()


num_epochs = 5
batch_size = 16

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
freezed_embeddings = True

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
BERT_baseline = CustomBERTModel().to(device)
optimizer = torch.optim.AdamW(BERT_baseline.parameters(), lr=1e-5)
#optimizer = torch.optim.AdamW([{'params': BERT_baseline.emotion_head.parameters()}, {'params': BERT_baseline.trigger_head.parameters()}], lr=1e-5, weight_decay=1e-4)

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [30]:
for epoch in range(num_epochs):
    BERT_baseline.train()
    total_loss = 0.0

    for batch in tqdm(train_loader, desc=f'Epoch {epoch + 1}', leave=False):
        input_ids,attention_mask,emotion_labels,trigger_label = batch

        # Zero the gradients on the optimizer
        optimizer.zero_grad()

        for el in range(batch_size):
          emotion_loss = 0.0
          trigger_loss = 0.0

          input_ids_el = input_ids[el].squeeze().to(device)
          attention_mask_el = attention_mask[el].squeeze().to(device)
          emotion_labels_el = emotion_labels[el].squeeze().to(device)
          trigger_label_el = trigger_label[el].squeeze().to(device)

          # Forward pass
          emotion_logits, trigger_logits = BERT_baseline(input_ids_el, attention_mask_el)
          # Compute the loss for both emotion and trigger

          emotion_loss += criterion(emotion_logits, torch.argmax(emotion_labels_el, dim=1))
          #trigger_loss += criterion(torch.argmax(trigger_logits, dim=1).float(), trigger_label_el.float())
          trigger_loss += criterion(trigger_logits, trigger_label_el)

        # Combine losses for backpropagation
        loss = (emotion_loss + trigger_loss)/batch_size

        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    # Compute the average loss
    average_loss = total_loss / len(train_loader)
    print(f"Epoch {epoch + 1}, Average Loss: {average_loss}")
    test_bert('validation',BERT_baseline)



KeyboardInterrupt: 

In [None]:
_ = test_bert('test',BERT_baseline)

## GRIDSEARCH

In [None]:
import itertools
#best_loss = float('inf')
# Define the grid search parameters
lr_values = [1e-5, 5e-5]
hidden_size_values = [64, 128]
batch_size_values = [20, 32]

# Iterate through all combinations
for lr, hidden_size, batch_size in itertools.product(lr_values, hidden_size_values, batch_size_values):
    # Define your model, optimizer, and other necessary components with the current parameters
    custom_Bert_Model = CustomBERTModel(hidden_size=hidden_size)
    custom_Bert_Model = custom_Bert_Model.to(device)
    optimizer = torch.optim.AdamW(custom_Bert_Model.parameters(), lr=lr)

    # DataLoader with the current batch size
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)

    num_epochs = 7
    # Training loop and validation code here, using the current lr, hidden_size, and batch_size
    for epoch in range(num_epochs):
      custom_Bert_Model.train()
      total_loss = 0.0

      for batch in tqdm(train_loader, desc=f'Epoch {epoch + 1}', leave=False):
          input_ids,attention_mask,emotion_labels,trigger_label = batch

          # Zero the gradients on the optimizer
          optimizer.zero_grad()

          for el in range(batch_size):
            emotion_loss = 0.0
            trigger_loss = 0.0

            input_ids_el = input_ids[el].squeeze().to(device)
            attention_mask_el = attention_mask[el].squeeze().to(device)
            emotion_labels_el = emotion_labels[el].squeeze().to(device)
            trigger_label_el = trigger_label[el].squeeze().to(device)

            # Forward pass
            emotion_logits, trigger_logits = custom_Bert_Model(input_ids_el, attention_mask_el)
            # Compute the loss for both emotion and trigger

            emotion_loss += criterion_emotion(emotion_logits, torch.argmax(emotion_labels_el, dim=1))
            trigger_loss += criterion_trigger(torch.argmax(trigger_logits, dim=1).float(), trigger_label_el.float())

          # Combine losses for backpropagation
          loss = (emotion_loss + trigger_loss)/batch_size

          loss.backward()
          optimizer.step()

          total_loss += loss.item()
      # Compute the average loss
      average_loss = total_loss / len(train_loader)
      print(f"Epoch {epoch + 1}, Average Loss: {average_loss}")
      validation_loss = test_bert('validation')
      #best_loss = save_best_model(custom_Bert_Model, optimizer, epoch, validation_loss, best_loss)

    # Print or log the results for each combination
    print(f"LR: {lr}, Hidden Size: {hidden_size}, Batch Size: {batch_size}")#, Best Validation Loss: {best_loss}")
    test_loss = test_bert('test')


## Majority classifier

In [None]:
# Reinitialise train, val and test loaders without collate_fn for Majority and Random
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [None]:
import torch

def find_majority_class(train_loader):
    # Initialize counters
    emotion_counts = torch.zeros(7) # Assuming there are 7 unique emotions
    trigger_counts = torch.zeros(2) # There are 2 classes for triggers: present or not
    negative_trigger_counts = 0
    positive_trigger_counts = 0
    # Iterate over the training dataset to count the labels
    for batch in train_loader:
        emotion_labels = batch['emotion_labels'].squeeze()
        trigger_labels = batch['trigger_label'].squeeze()
        #print(trigger_labels,torch.sum(trigger_labels, dim=0),(trigger_labels == 0).sum())
        # Sum up the counts for each class
        positive_trigger_counts += torch.sum(trigger_labels, dim=0)
        # Count the zeros for the negative class (absence of a trigger)
        # Since one-hot encoding, the absence is just the inverse of the presence
        negative_trigger_counts += torch.sum(1 - trigger_labels, dim=0)
        emotion_counts += torch.sum(emotion_labels, dim=0)

    trigger_counts[0] = negative_trigger_counts
    trigger_counts[1] = positive_trigger_counts
    print(trigger_counts)
    print(emotion_counts)
    # Find the index with the maximum count for emotions and triggers
    majority_emotion = torch.zeros_like(emotion_counts)
    majority_emotion[torch.argmax(emotion_counts)] = 1
    majority_trigger = torch.zeros_like(trigger_counts)
    majority_trigger[torch.argmax(trigger_counts)] = 1

    return majority_emotion, majority_trigger

# Let's assume that 'train_loader' is a DataLoader for your training dataset
# You need to replace 'train_loader' with the actual DataLoader for your dataset
majority_emotion, majority_trigger = find_majority_class(train_loader)
majority_emotion, majority_trigger

In [None]:
from sklearn.metrics import f1_score
from tqdm.auto import tqdm

def majority_classifier(majority_emotion, majority_trigger, test_loader):
    all_emotion_predictions = []
    all_trigger_predictions = []
    all_emotion_labels = []
    all_trigger_labels = []

    with torch.no_grad():
        for batch in tqdm(test_loader, desc='Evaluation', leave=False):
            emotion_labels = batch['emotion_labels']
            trigger_labels = batch['trigger_label']

            for emotion_lab, trigger_lab in zip(emotion_labels, trigger_labels):
                # Ensure we have at least 1 dimension
                if emotion_lab.ndim == 1 and emotion_lab.size(0) == 1:
                    emotion_lab = emotion_lab.unsqueeze(0)
                if trigger_lab.ndim == 1 and trigger_lab.size(0) == 1:
                    trigger_lab = trigger_lab.unsqueeze(0)

                # Store the labels
                all_emotion_labels.append(emotion_lab)
                all_trigger_labels.append(trigger_lab)

                # Repeat the majority class prediction to match the number of utterances
                emotion_predictions = majority_emotion.repeat(emotion_lab.size(0), 1)
                #print(emotion_predictions)

                trigger_predictions = majority_trigger.repeat(trigger_lab.size(0), 1)

                # Store the predictions
                all_emotion_predictions.append(emotion_predictions)
                all_trigger_predictions.append(trigger_predictions)

    # Use the stored predictions and labels to calculate sequence F1 and unrolled sequence F1
    average_sequence_f1_emotion, average_sequence_f1_trigger = compute_sequence_f1(
        [all_emotion_predictions, all_trigger_predictions],
        [all_emotion_labels, all_trigger_labels]
    )

    unrolled_sequence_f1_emotion, unrolled_sequence_f1_trigger = compute_unrolled_sequence_f1(
        [all_emotion_predictions, all_trigger_predictions],
        [all_emotion_labels, all_trigger_labels]
    )

    return average_sequence_f1_emotion, average_sequence_f1_trigger, unrolled_sequence_f1_emotion, unrolled_sequence_f1_trigger

# Assume majority_emotion and majority_trigger are tensors of the majority class (one-hot encoded)
# and test_loader is your DataLoader instance for the test dataset.
average_f1_emotion, average_f1_trigger, unrolled_f1_emotion, unrolled_f1_trigger = majority_classifier(majority_emotion, majority_trigger, test_loader)

print(f"Average Sequence F1 (Emotion): {average_f1_emotion}")
print(f"Average Sequence F1 (Trigger): {average_f1_trigger}")
print(f"Unrolled Sequence F1 (Emotion): {unrolled_f1_emotion}")
print(f"Unrolled Sequence F1 (Trigger): {unrolled_f1_trigger}")

## Random

In [None]:
import torch

def random_classifier(test_loader, emotion_distribution, trigger_distribution):
    all_emotion_predictions = []
    all_trigger_predictions = []
    all_emotion_labels = []
    all_trigger_labels = []

    with torch.no_grad():
        for batch in tqdm(test_loader, desc='Evaluation', leave=False):
            emotion_labels = batch['emotion_labels']
            trigger_labels = batch['trigger_label']

            for emotion_lab, trigger_lab in zip(emotion_labels, trigger_labels):
                # Ensure we have at least 1 dimension
                if emotion_lab.ndim == 1 and emotion_lab.size(0) == 1:
                    emotion_lab = emotion_lab.unsqueeze(0)
                if trigger_lab.ndim == 1 and trigger_lab.size(0) == 1:
                    trigger_lab = trigger_lab.unsqueeze(0)

                # Store the labels
                all_emotion_labels.append(emotion_lab)
                all_trigger_labels.append(trigger_lab)

                # Generate random predictions for emotions
                random_emotion_predictions = torch.randint(0, 2, (emotion_lab.size(0), 7))  # Randomly 0 or 1 for each emotion
                all_emotion_predictions.append(random_emotion_predictions.float())

                # Generate random predictions for triggers based on the training distribution
                random_trigger_probs = torch.rand((trigger_lab.size(0), 1))
                random_trigger_predictions = (random_trigger_probs < trigger_distribution).long()  # Binary prediction based on distribution
                random_trigger_predictions = torch.cat((random_trigger_predictions, 1 - random_trigger_predictions), dim=1)  # Make it one-hot
                all_trigger_predictions.append(random_trigger_predictions.float())

    # Calculate the F1 scores using your metric functions
    average_sequence_f1_emotion, average_sequence_f1_trigger = compute_sequence_f1(
        [all_emotion_predictions, all_trigger_predictions],
        [all_emotion_labels, all_trigger_labels]
    )

    unrolled_sequence_f1_emotion, unrolled_sequence_f1_trigger = compute_unrolled_sequence_f1(
        [all_emotion_predictions, all_trigger_predictions],
        [all_emotion_labels, all_trigger_labels]
    )

    return average_sequence_f1_emotion, average_sequence_f1_trigger, unrolled_sequence_f1_emotion, unrolled_sequence_f1_trigger

# You need to provide the distribution for the trigger class from your training data
# For example, if 30% of your training samples have a trigger, trigger_distribution should be 0.3
trigger_distribution = 0.5  # Replace with your actual distribution

# Now call your random classifier function
average_f1_emotion, average_f1_trigger, unrolled_f1_emotion, unrolled_f1_trigger = random_classifier(
    test_loader,
    emotion_distribution=None,  # Not used currently as we're assuming a uniform distribution
    trigger_distribution=trigger_distribution
)

print(f"Random Classifier Average Sequence F1 (Emotion): {average_f1_emotion}")
print(f"Random Classifier Average Sequence F1 (Trigger): {average_f1_trigger}")
print(f"Random Classifier Unrolled Sequence F1 (Emotion): {unrolled_f1_emotion}")
print(f"Random Classifier Unrolled Sequence F1 (Trigger): {unrolled_f1_trigger}")

## Bert with context

In [None]:
import torch
from transformers import BertModel
import torch.nn as nn

class BERT_LSTM_5(torch.nn.Module):
    def __init__(self, num_emotions, num_triggers, freeze_embeddings=True):
        super(BERT_LSTM_5, self).__init__()
        self.bert = BertModel.from_pretrained('bert-base-uncased')

        if freeze_embeddings:
            for name, param in self.bert.named_parameters():
                if 'embeddings' in name:
                    param.requires_grad = False

        self.lstm_hidden_size = self.bert.config.hidden_size

        self.lstm = nn.LSTM(input_size=self.lstm_hidden_size,
                            hidden_size=self.lstm_hidden_size,
                            num_layers=1,
                            batch_first=True,
                            bidirectional=True)

        # Dropout for L1 Regularization
        self.dropout = nn.Dropout(p=0.1)

        # Instead of applying custom attention, we'll use the output from LSTM directly for classification
        self.emotion_head = nn.Linear(self.lstm_hidden_size * 2, num_emotions)  # *2 for bidirectional
        self.trigger_head = nn.Linear(self.lstm_hidden_size * 2, num_triggers)

    def forward(self, input_ids, attention_mask):
        bert_outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask, return_dict=True)
        bert_sequence_output = bert_outputs.last_hidden_state

        lstm_output, (h_n, c_n) = self.lstm(bert_sequence_output)

        # Instead of applying attention, use the last hidden states directly
        # Concatenate the final forward and backward hidden states
        h_n = torch.cat((h_n[-2,:,:], h_n[-1,:,:]), dim=1)

        # Apply dropout to the concatenated LSTM outputs
        h_n = self.dropout(h_n)

        emotion_logits = self.emotion_head(h_n)
        trigger_logits = self.trigger_head(h_n)

        return emotion_logits, trigger_logits


In [None]:
import torch
from torch.optim import AdamW
from torch.nn import CrossEntropyLoss, BCEWithLogitsLoss
from tqdm import tqdm
from torch.optim.lr_scheduler import ReduceLROnPlateau
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

num_epochs = 7
batch_size = 10

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)

num_emotions = 7
num_triggers = 2

# Instantiate the model
BERT_lstm = BERT_LSTM_5(num_emotions, num_triggers, freeze_embeddings=True).to(device)
BERT_baseline = CustomBERTModel().to(device)
# Define separate optimizers for shared model parts + emotion head, and trigger head
# you can comment out a specific params dict if you dont want to update it
optimizer_shared = AdamW([
    {'params': BERT_baseline.bert.parameters()},
    #{'params': BERT_lstm.lstm.parameters()},
    {'params': BERT_baseline.emotion_head.parameters()}
], lr=1e-5, weight_decay=1e-5)

optimizer_trigger = AdamW(BERT_baseline.trigger_head.parameters(), lr=1e-4, weight_decay=0)

optimizer_shared = torch.optim.AdamW([{'params': BERT_BASELINEmodel.emotion_head.parameters()}, {'params': BERT_BASELINEmodel.trigger_head.parameters()}], lr=1e-5, weight_decay=1e-4)
criterion_emotion = CrossEntropyLoss()
criterion_trigger = BCEWithLogitsLoss()

# Assuming the learning rate scheduler is applied to the shared optimizer
scheduler = ReduceLROnPlateau(optimizer_shared, mode='min', factor=0.5, patience=1, verbose=True)

In [None]:
# Training loop

for epoch in range(num_epochs):
    BERT_lstm.train()
    total_loss = 0.0
    loss_emotion = 0.0
    loss_trigger = 0.0

    for batch in tqdm(train_loader, desc=f'Epoch {epoch + 1}', leave=False):
        # collate_fn returns lists, so we do not call .to(device) here
        input_ids, attention_mask, emotion_labels, trigger_label = batch

        # Zero the gradients for both optimizers
        optimizer_shared.zero_grad()
        optimizer_trigger.zero_grad()

        emotion_loss = 0.0
        trigger_loss = 0.0
        for el in range(batch_size):  # Process each item in the batch
            # Convert list items to tensors and send them to the device
            input_ids_el = input_ids[el].squeeze().to(device)
            attention_mask_el = attention_mask[el].squeeze().to(device)
            emotion_labels_el = emotion_labels[el].squeeze().to(device)
            trigger_label_el = trigger_label[el].squeeze().to(device)

            # Forward pass
            emotion_logits, trigger_logits = BERT_baseline(input_ids_el, attention_mask_el)

            # Compute the loss for both emotion and trigger
            emotion_loss += criterion_emotion(emotion_logits, torch.argmax(emotion_labels_el, dim=1))
            trigger_loss += criterion_trigger(torch.argmax(trigger_logits, dim=1).float(), trigger_label_el.float())

        # Average the losses
        emotion_loss /= batch_size
        trigger_loss /= batch_size
        trigger_loss.requires_grad_()

        # print("emotion loss is", emotion_loss)
        # print("trigger loss is", trigger_loss)
        # print(trigger_loss.requires_grad)

        # Backpropagate losses and step optimizers
        emotion_loss.backward(retain_graph=True)
        optimizer_shared.step()

        trigger_loss.backward()
        optimizer_trigger.step()

        # Update total and individual losses
        total_loss += (emotion_loss.item() + trigger_loss.item())
        loss_emotion += emotion_loss.item()
        loss_trigger += trigger_loss.item()

    # Compute average loss for the epoch
    average_loss = total_loss / len(train_loader)
    average_emotion_loss = loss_emotion / len(train_loader)
    average_trigger_loss = loss_trigger / len(train_loader)
    print(f"----Evaluation scores on Validation set for Epoch {epoch+1}----")
    validation = test_bert('validation') # print out F1 scores using test_bert
    print("------------------------------------------------------")
    # Adjust learning rate based on the average loss
    scheduler.step(average_loss)

    print(f"Epoch {epoch + 1}, Total Average Loss: {average_loss}")
    print(f"Epoch {epoch + 1}, Average Emotion Loss: {average_emotion_loss}")
    print(f"Epoch {epoch + 1}, Average Trigger Loss: {average_trigger_loss}")

In [None]:
test_bert('test')

Old Initialisation and Training code with One optimizer

In [None]:
from torch.optim.lr_scheduler import ReduceLROnPlateau

num_emotions = 7
num_triggers = 2

# Instantiate the model
BERT_lstm = BERT_LSTM_5(num_emotions,num_triggers).to(device)
#optimizer = AdamW(filter(lambda p: p.requires_grad, custom_Bert_Model.parameters()), lr=5e-5)
# optimizer = torch.optim.SGD(BERT_lstm.parameters(), lr=1e-5)
optimizer = torch.optim.AdamW(BERT_lstm.parameters(), lr=1e-5, weight_decay=1e-4)
# optimizer = torch.optim.AdamW(BERT_lstm.parameters(), lr=1e-3,weight_decay = 5e-4)
criterion_emotion = torch.nn.CrossEntropyLoss()
criterion_trigger = torch.nn.BCEWithLogitsLoss()

# Define the learning rate scheduler
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=1, verbose=True)

In [None]:
num_epochs = 8
batch_size = 16
for epoch in range(num_epochs):
    BERT_lstm.train()
    total_loss = 0.0
    loss_emotion = 0.0
    loss_trigger = 0.0
    for batch in tqdm(train_loader, desc=f'Epoch {epoch + 1}', leave=False):
        input_ids,attention_mask,emotion_labels,trigger_label = batch

        # Zero the gradients on the optimizer
        optimizer.zero_grad()

        emotion_loss = 0.0
        trigger_loss = 0.0
        for el in range(batch_size):
            input_ids_el = input_ids[el].squeeze().to(device)
            attention_mask_el = attention_mask[el].squeeze().to(device)
            emotion_labels_el = emotion_labels[el].squeeze().to(device)
            trigger_label_el = trigger_label[el].squeeze().to(device)

            # Forward pass
            emotion_logits, trigger_logits = BERT_lstm(input_ids_el, attention_mask_el)
            # Compute the loss for both emotion and trigger

            emotion_loss += criterion_emotion(emotion_logits, torch.argmax(emotion_labels_el, dim=1))
            trigger_loss += criterion_trigger(torch.argmax(trigger_logits, dim=1).float(), trigger_label_el.float())

        # Combine losses for backpropagation
        loss = (emotion_loss + trigger_loss)/batch_size

        loss.backward()
        optimizer.step()

        loss_emotion += emotion_loss/batch_size
        loss_trigger += trigger_loss/batch_size
        total_loss += loss.item()

    # Compute the average loss
    average_loss = total_loss / len(train_loader)
    validation_loss = test_bert('validation')

    scheduler.step(average_loss)

    # Compute the average loss
    print(f"Epoch {epoch + 1}, Average Loss: {average_loss}")
    print(f"Epoch {epoch + 1}, Emotion Loss: {loss_emotion/len(train_loader)}")
    print(f"Epoch {epoch + 1}, Trigger Loss: {loss_trigger/len(train_loader)}")

In [None]:
test_bert('test')

Evaluation loop

In [None]:
# Usage in the eval loop
sequence_f1_scores_emotion = []
sequence_f1_scores_trigger = []
unrolled_predictions_emotion = []
unrolled_predictions_trigger = []
unrolled_labels_emotion = []
unrolled_labels_trigger = []
sequence_f1_scores = []

test_loader = # insert test loader code here, todo

with torch.no_grad():
    for batch in tqdm(test_loader, desc='Evaluation', leave=False):
        input_ids = batch['input_ids'].squeeze().to(device)
        attention_mask = batch['attention_mask'].squeeze().to(device)
        emotion_labels = batch['emotion_labels'].squeeze().to(device)
        trigger_label = batch['trigger_label'].squeeze().to(device)

        emotion_logits, trigger_logits = BERT_lstm(input_ids, attention_mask)

        # Store predictions and labels for later unrolled F1 computation
        unrolled_predictions_emotion.append(emotion_logits)
        unrolled_labels_emotion.append(emotion_labels)
        unrolled_predictions_trigger.append(trigger_logits)
        unrolled_labels_trigger.append(trigger_label)

        # Convert logits to probabilities and then to class predictions
        predicted_classes = torch.argmax(emotion_logits, dim=1)
        true_classes = torch.argmax(emotion_labels, dim=1)

        # Compute F1 for the current sequence (dialogue)
        sequence_f1 = f1_score(true_classes.cpu().numpy(), predicted_classes.cpu().numpy(), average='macro')
        sequence_f1_scores.append(sequence_f1)

# Compute the average Sequence F1 for emotions and triggers
average_sequence_f1_emotion, average_sequence_f1_trigger = compute_sequence_f1(
    [unrolled_predictions_emotion, unrolled_predictions_trigger],
    [unrolled_labels_emotion, unrolled_labels_trigger]
)

# Compute the Unrolled Sequence F1 for emotions and triggers
unrolled_sequence_f1_emotion, unrolled_sequence_f1_trigger = compute_unrolled_sequence_f1(
    [unrolled_predictions_emotion, unrolled_predictions_trigger],
    [unrolled_labels_emotion, unrolled_labels_trigger]
)

# Print the F1 scores for emotions and triggers
print(f"Average Sequence F1 (Emotion): {average_sequence_f1_emotion:.4f}")
print(f"Average Sequence F1 (Trigger): {average_sequence_f1_trigger:.4f}")
print(f"Unrolled Sequence F1 (Emotion): {unrolled_sequence_f1_emotion.item():.4f}")
print(f"Unrolled Sequence F1 (Trigger): {unrolled_sequence_f1_trigger.item():.4f}")

# Roberta

In [None]:
"""import torch
from transformers import RobertaModel, RobertaForSequenceClassification
import torch.nn as nn

class RoBERTa_LSTM(nn.Module):
    def __init__(self, num_emotions, num_triggers, freeze_embeddings=True):
        super(RoBERTa_LSTM, self).__init__()
        # Load the pretrained RoBERTa model
        #self.roberta = RobertaModel.from_pretrained('roberta-base')
        #self.roberta = RobertaModel.from_pretrained("j-hartmann/emotion-english-distilroberta-base")
        self.roberta = RobertaModel.from_pretrained('roberta-large')
        #self.roberta = RobertaForSequenceClassification.from_pretrained('roberta-base')
        # Optionally, freeze the embeddings layer to prevent fine-tuning
        if freeze_embeddings:
            for name, param in self.roberta.named_parameters():
                if 'embeddings' in name:
                    param.requires_grad = False
        """
        # LSTM configuration remains the same
        self.lstm_hidden_size = self.roberta.config.hidden_size
        self.lstm = nn.LSTM(input_size=self.lstm_hidden_size,
                            hidden_size=self.lstm_hidden_size,
                            num_layers=1,
                            batch_first=True,
                            bidirectional=True)

        self.dropout = nn.Dropout(p=0)  # Adjust dropout rate as needed

        # Classification heads for emotions and triggers
        """
        #self.emotion_head = nn.Linear(self.lstm_hidden_size * 2, num_emotions)  # *2 for bidirectional LSTM
        #self.trigger_head = nn.Linear(self.lstm_hidden_size * 2, num_triggers)
        self.emotion_head = nn.Linear(self.roberta.config.hidden_size, num_emotions)
        self.trigger_head = nn.Linear(self.roberta.config.hidden_size, num_triggers)

    def forward(self, input_ids, attention_mask):
        # Process input through RoBERTa
        """
        roberta_outputs = self.roberta(input_ids=input_ids, attention_mask=attention_mask)
        roberta_sequence_output = roberta_outputs.last_hidden_state

        # Process the output through LSTM
        lstm_output, (h_n, c_n) = self.lstm(roberta_sequence_output)

        # Use the final hidden states from LSTM for classification
        h_n = torch.cat((h_n[-2,:,:], h_n[-1,:,:]), dim=1)
        h_n = self.dropout(h_n)  # Apply dropout

        # Generate logits for each head
        emotion_logits = self.emotion_head(h_n)
        trigger_logits = self.trigger_head(h_n)"""
        outputs = self.roberta(input_ids=input_ids, attention_mask=attention_mask, return_dict=True)

        pooled_output = outputs['pooler_output']


        emotion_logits = self.emotion_head(pooled_output)
        trigger_logits = self.trigger_head(pooled_output)

        return emotion_logits, trigger_logits

        return emotion_logits, trigger_logits"""

In [None]:
import torch
from transformers import RobertaModel
import torch.nn as nn

class RoBERTa_LSTM(nn.Module):
    def __init__(self, num_emotions, num_triggers, freeze_embeddings=False):  # Fixed __init_ method
        super(RoBERTa_LSTM, self).__init__()  # Fixed __init_ method
        # Load the pretrained RoBERTa model
        self.roberta = RobertaModel.from_pretrained('roberta-base')

        # Optionally, freeze the embeddings layer to prevent fine-tuning
        if freeze_embeddings:
            for name, param in self.roberta.named_parameters():
                if 'embeddings' in name:
                    param.requires_grad = False

        # LSTM configuration remains the same
        self.lstm_hidden_size = self.roberta.config.hidden_size
        self.lstm = nn.LSTM(input_size=self.lstm_hidden_size,
                            hidden_size=self.lstm_hidden_size,
                            num_layers=1,
                            batch_first=True,
                            bidirectional=True)

        self.dropout = nn.Dropout(p=0.1)  # Adjust dropout rate as needed

        # Add batch normalization layers
        self.batchnorm_emotion = nn.BatchNorm1d(self.lstm_hidden_size * 2)
        self.batchnorm_trigger = nn.BatchNorm1d(self.lstm_hidden_size * 2)

        # Classification heads for emotions and triggers
        self.emotion_head = nn.Linear(self.lstm_hidden_size * 2, num_emotions)  # *2 for bidirectional LSTM
        self.trigger_head = nn.Linear(self.lstm_hidden_size * 2, num_triggers)

    def forward(self, input_ids, attention_mask):
        # Process input through RoBERTa
        roberta_outputs = self.roberta(input_ids=input_ids, attention_mask=attention_mask)
        roberta_sequence_output = roberta_outputs.last_hidden_state

        # Process the output through LSTM
        lstm_output, (h_n, c_n) = self.lstm(roberta_sequence_output)

        # Concatenate the final forward and backward hidden states and apply dropout
        h_n = torch.cat((h_n[-2,:,:], h_n[-1,:,:]), dim=1)
        h_n = self.dropout(h_n)

        # Apply batch normalization to the output of the LSTM
        emotion_bn = self.batchnorm_emotion(h_n)
        trigger_bn = self.batchnorm_trigger(h_n)

        # Generate logits for each head
        emotion_logits = self.emotion_head(emotion_bn)
        trigger_logits = self.trigger_head(trigger_bn)

        return emotion_logits, trigger_logits

In [None]:
from torch.optim.lr_scheduler import ReduceLROnPlateau
from pytorch_ranger import Ranger


num_emotions = 7
num_triggers = 2

# Instantiate the model
BERT_lstm = RoBERTa_LSTM(num_emotions,num_triggers).to(device)
model = RoBERTa_LSTM(num_emotions, num_triggers).to(device)
#optimizer = Ranger(model.parameters(), lr=1e-3)
#optimizer = Ranger(RoBERTa_LSTM.parameters(), lr=1e-3)
#optimizer = AdamW(filter(lambda p: p.requires_grad, custom_Bert_Model.parameters()), lr=5e-5)
# optimizer = torch.optim.SGD(BERT_lstm.parameters(), lr=1e-5)
optimizer = torch.optim.AdamW(BERT_lstm.parameters(), lr=1e-5, weight_decay=1e-4)
#optimizer = torch.optim.AdamW(BERT_lstm.parameters(), lr=1e-3,weight_decay = 5e-4)
criterion_emotion = torch.nn.CrossEntropyLoss()
criterion_trigger = torch.nn.BCEWithLogitsLoss()

# Define the learning rate scheduler
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=1, verbose=True)

# Roberta baseline

In [137]:
from transformers import AutoModelForSequenceClassification, RobertaModel, RobertaForSequenceClassification, RobertaForMultipleChoice
from transformers import TFAutoModelForSequenceClassification
from transformers import AutoTokenizer, AutoConfig


#tokenizer = AutoTokenizer.from_pretrained("j-hartmann/emotion-english-distilroberta-base")
tokenizer = AutoTokenizer.from_pretrained("roberta-base")
#tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
def tokenize_padding(speakers_list, utterances_list):
    tokenized_dialogues = []
    max_length = 128  # Adjust the maximum sequence length as needed
    for speakers, utterances in zip(speakers_list, utterances_list):#, total=len(utterances_list):
        dialogue_with_speakers = []
        for speaker, utterance in zip(speakers, utterances):
            dialogue_with_speakers.append(utterance)
        tokenized_dialogues.append(dialogue_with_speakers)


    tokenization = []
    for dialogue in tqdm(tokenized_dialogues):
      tokenized_dialogue = tokenizer(
          dialogue,
          #max_length=9,
          padding=True,
          truncation=False,
          return_tensors='pt'
      )
      tokenization.append(tokenized_dialogue)

    input_ids = [dialogue['input_ids'] for dialogue in tokenization]
    attention_mask = [dialogue['attention_mask'] for dialogue in tokenization]
    return input_ids,attention_mask

# Example usage with your dataset:
input_ids_train, attention_mask_train = tokenize_padding(train_data['speakers'], train_data['utterances'])
#input_ids_val, attention_mask_val = tokenize_padding(val_data['speakers'], val_data['utterances'])
input_ids_test, attention_mask_test = tokenize_padding(test_data['speakers'], test_data['utterances'])

train_dataset = CustomDataset(input_ids_train, attention_mask_train, train_data['emotions'],
                              train_data['triggers'])
#validation_dataset = CustomDataset(input_ids_val, attention_mask_val, val_data['emotions'],
 #                            val_data['triggers'])
test_dataset = CustomDataset(input_ids_test, attention_mask_test, test_data['emotions'],
                             test_data['triggers'])

100%|██████████| 2000/2000 [00:01<00:00, 1252.18it/s]
100%|██████████| 2000/2000 [00:01<00:00, 1317.88it/s]


In [138]:
import torch
from transformers import BertModel


class Roberta_baseline(torch.nn.Module):
    def __init__(self, freeze_embeddings=True):
        super(Roberta_baseline, self).__init__()
        self.roberta = RobertaModel.from_pretrained('roberta-base')
        #self.roberta = RobertaModel.from_pretrained('j-hartmann/emotion-english-distilroberta-base')

        if freeze_embeddings:
            for name, param in self.roberta.named_parameters():
                if 'embeddings' in name:
                    param.requires_grad = False
        self.emotion_head = torch.nn.Linear(self.roberta.config.hidden_size, len(emotions))
        self.trigger_head = torch.nn.Linear(self.roberta.config.hidden_size, len(triggers))

    def forward(self, input_ids, attention_mask):
        outputs = self.roberta(input_ids=input_ids, attention_mask=attention_mask, return_dict=True)
        pooled_output = outputs['pooler_output']

        emotion_logits = self.emotion_head(pooled_output)
        trigger_logits = self.trigger_head(pooled_output)

        return emotion_logits, trigger_logits


In [141]:
criterion = torch.nn.CrossEntropyLoss() # can tinker with the loss function, change to a different one
#criterion_trigger = torch.nn.BCEWithLogitsLoss()


num_epochs = 5
batch_size = 32

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
freezed_embeddings = False

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
roberta_baseline = Roberta_baseline().to(device)
optimizer = torch.optim.AdamW(roberta_baseline.parameters(), lr=1e-5)

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
for epoch in range(num_epochs):
    roberta_baseline.train()
    total_loss = 0.0

    for batch in tqdm(train_loader, desc=f'Epoch {epoch + 1}', leave=False):
        input_ids,attention_mask,emotion_labels,trigger_label = batch
        optimizer.zero_grad()
        emotion_loss = 0.0
        trigger_loss = 0.0
        for el in range(len(input_ids)):

          input_ids_el = input_ids[el].squeeze().to(device)
          attention_mask_el = attention_mask[el].squeeze().to(device)
          emotion_labels_el = emotion_labels[el].squeeze().to(device)
          trigger_label_el = trigger_label[el].squeeze().to(device)
          # Forward pass
          emotion_logits, trigger_logits = roberta_baseline(input_ids_el, attention_mask_el)

          emotion_loss += criterion(emotion_logits, torch.argmax(emotion_labels_el, dim=1))
          #trigger_loss += criterion(torch.argmax(trigger_logits, dim=1).float(), trigger_label_el.float())
          trigger_loss += criterion(trigger_logits, trigger_label_el)

        # Combine losses for backpropagation
        loss = (emotion_loss + trigger_loss)/len(input_ids)#batch_size

        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    # Compute the average loss
    average_loss = total_loss / len(train_loader)
    print(f"Epoch {epoch + 1}, Average Loss: {average_loss}")
    test_bert('test',roberta_baseline)



Epoch 1, Average Loss: 2.0428851010307434




Average Sequence F1 (Emotion):  0.388563
Average Sequence F1 (Trigger): 0.438173
Unrolled Sequence F1 (Emotion): 0.292144
Unrolled Sequence F1 (Trigger): 0.448390




Epoch 2, Average Loss: 1.5501572934408037




Average Sequence F1 (Emotion):  0.440026
Average Sequence F1 (Trigger): 0.438173
Unrolled Sequence F1 (Emotion): 0.364738
Unrolled Sequence F1 (Trigger): 0.448390




Epoch 3, Average Loss: 1.3078779076773024




Average Sequence F1 (Emotion):  0.433626
Average Sequence F1 (Trigger): 0.438173
Unrolled Sequence F1 (Emotion): 0.387289
Unrolled Sequence F1 (Trigger): 0.448390




Epoch 4, Average Loss: 1.104237905570439


Evaluation:  42%|████▏     | 835/2000 [00:13<00:16, 70.50it/s]

In [None]:
_ = test_bert('test',roberta_baseline)