In [None]:
#!pip install torch==1.13.0+cu116 --extra-index-url https://download.pytorch.org/whl/cu116
!pip install transformers==4.30.0
!pip install datasets==2.13.2
!pip install accelerate -U
!pip install evaluate

In [2]:
# system packages
from pathlib import Path
import shutil
import urllib
import tarfile
import sys
import os
# data and numerical management packages
import pandas as pd
import random
import numpy as np
import matplotlib.pyplot as plt

import torch
from torch.utils.data import DataLoader, Dataset
from transformers import BertTokenizer, BertModel, BertForSequenceClassification, AdamW, Trainer, TrainingArguments
from torch.nn.utils.rnn import pad_sequence

import warnings
warnings.filterwarnings('ignore')
# useful during debugging (progress bars)
from tqdm import tqdm
from transformers import set_seed

seed = 852

random.seed(seed)
np.random.seed(seed)
set_seed(seed)

In [82]:
dataset_folder = Path.cwd().joinpath("sample_data/MELD_train_efr.json")
#dataset_path = dataset_folder.joinpath('/MELD_train_efr.json')
#dataset_folder = "/kaggle/input/plaplapla/MELD_train_efr.json"
df = pd.read_json(dataset_folder)
#df['triggers'] = df['triggers'].fillna(value=0, inplace=False)#.replace('None', 0.0)

In [83]:
df

Unnamed: 0,episode,speakers,emotions,utterances,triggers
0,utterance_0,"[Chandler, The Interviewer, Chandler, The Inte...","[neutral, neutral, neutral, neutral, surprise]",[also I was the point person on my company's t...,"[0.0, 0.0, 0.0, 1.0, 0.0]"
1,utterance_1,"[Chandler, The Interviewer, Chandler, The Inte...","[neutral, neutral, neutral, neutral, surprise,...",[also I was the point person on my company's t...,"[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0]"
2,utterance_2,"[Chandler, The Interviewer, Chandler, The Inte...","[neutral, neutral, neutral, neutral, surprise,...",[also I was the point person on my company's t...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ..."
3,utterance_3,"[Chandler, The Interviewer, Chandler, The Inte...","[neutral, neutral, neutral, neutral, surprise,...",[also I was the point person on my company's t...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
4,utterance_4,"[Joey, Rachel, Joey, Rachel]","[surprise, sadness, surprise, fear]",[But then who? The waitress I went out with la...,"[0.0, 0.0, 1.0, 0.0]"
...,...,...,...,...,...
3995,utterance_3995,"[Chandler, All, Monica, Chandler, Ross, Chandl...","[neutral, joy, neutral, neutral, surprise, dis...","[Hey., Hey!, So how was Joan?, I broke up with...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
3996,utterance_3996,"[Chandler, All, Monica, Chandler, Ross, Chandl...","[neutral, joy, neutral, neutral, surprise, dis...","[Hey., Hey!, So how was Joan?, I broke up with...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
3997,utterance_3997,"[Chandler, All, Monica, Chandler, Ross, Chandl...","[neutral, joy, neutral, neutral, surprise, dis...","[Hey., Hey!, So how was Joan?, I broke up with...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
3998,utterance_3998,"[Chandler, All, Monica, Chandler, Ross, Chandl...","[neutral, joy, neutral, neutral, surprise, dis...","[Hey., Hey!, So how was Joan?, I broke up with...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."


In [84]:
triggers = df['triggers']
for row in range(triggers.shape[0]):
    for trigger in range(len(triggers[row])):
        if triggers[row][trigger] == None:
            triggers[row][trigger] = 0.0

df['triggers'] = triggers

In [85]:
emotions = df['emotions'].explode().unique()
emotions

array(['neutral', 'surprise', 'fear', 'sadness', 'joy', 'disgust',
       'anger'], dtype=object)

In [86]:
triggers = df['triggers'].explode().unique()
triggers

array([0.0, 1.0], dtype=object)

In [87]:
dialogues = df['utterances'][:3200]
#print(sentences)
max_len_utterance = 0
index = 0
utterances_len = []
for dialogue in dialogues:
  for utterance in dialogue:
    #print(utterance)
    #if len(utterance.split()) > max_len_utterance:
     # max_len_utterance = len(utterance.split())
     utterances_len.append(len(utterance.split()))
np.mean(np.array(utterances_len))

8.077553661956639

In [88]:
from sklearn.preprocessing import LabelBinarizer
sorted_emotions = sorted(emotions)  #sort the array because Binarizer will automatically do that for one hot encoding
label_binarizer = LabelBinarizer()
label_binarizer.fit(sorted_emotions)

dialogues = df['emotions']
one_hot_emotions = []
for dialogue_emotion in dialogues:
  dialogue_emotions_list = []
  for emotion in dialogue_emotion:
    encoded_emotion=label_binarizer.transform([emotion])
    dialogue_emotions_list.append(np.ravel(encoded_emotion).tolist())
  one_hot_emotions.append(dialogue_emotions_list)

In [89]:
df['emotions'] = one_hot_emotions

In [90]:
from sklearn.model_selection import train_test_split
train_data, temp_data = train_test_split(df, train_size=0.8, shuffle=False)
val_data, test_data = train_test_split(temp_data, test_size=0.5, shuffle=False)
val_data.reset_index(drop=True, inplace=True)
test_data.reset_index(drop=True, inplace=True)

In [91]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
def tokenize_padding(dialogues):
  tokenized_dialogues = []
  max_length = 10

  for dialogue in tqdm(dialogues):
      tokenized_dialogue = tokenizer(
          dialogue,
          max_length=9,
          padding='max_length',
          truncation=True,
          return_tensors='pt'
      )
      tokenized_dialogues.append(tokenized_dialogue)

  #padded_input_ids = pad_sequence([dialogue['input_ids'] for dialogue in tokenized_dialogues], batch_first=True)
  input_ids = [dialogue['input_ids'] for dialogue in tokenized_dialogues]
  attention_mask = [dialogue['attention_mask'] for dialogue in tokenized_dialogues]
  return input_ids,attention_mask

#padded_input_ids_train,padded_attention_mask_train = tokenize_padding(train_data['utterances'])
input_ids_train,attention_mask_train = tokenize_padding(train_data['utterances'])
input_ids_val,attention_mask_val = tokenize_padding(val_data['utterances'])
input_ids_test,attention_mask_test = tokenize_padding(test_data['utterances'])

100%|██████████| 3200/3200 [00:14<00:00, 220.19it/s]
100%|██████████| 400/400 [00:01<00:00, 389.36it/s]
100%|██████████| 400/400 [00:01<00:00, 396.44it/s]


In [142]:
from sklearn.metrics import f1_score

def compute_sequence_f1(predictions, labels):
    # predictions and labels should be lists of tensors for each dialogue
    emotion_f1_scores = []
    trigger_f1_scores = []
    for emotion_pred, trigger_pred, emotion_lab, trigger_lab in zip(predictions[0], predictions[1], labels[0], labels[1]):
        emotion_predicted_classes = torch.argmax(emotion_pred, dim=1)
        trigger_predicted_classes = torch.argmax(trigger_pred, dim=1)
        emotion_true_classes = torch.argmax(emotion_lab, dim=1)
        trigger_true_classes = trigger_lab
        emotion_f1 = f1_score(emotion_true_classes.cpu().numpy(), emotion_predicted_classes.cpu().numpy(), average='macro')
        trigger_f1 = f1_score(trigger_true_classes.cpu().numpy(), trigger_predicted_classes.cpu().numpy(), average='binary')
        emotion_f1_scores.append(emotion_f1)
        trigger_f1_scores.append(trigger_f1)
    average_emotion_f1 = torch.tensor(emotion_f1_scores, dtype=torch.float32).mean()
    average_trigger_f1 = torch.tensor(trigger_f1_scores, dtype=torch.float32).mean()
    return average_emotion_f1, average_trigger_f1

def compute_unrolled_sequence_f1(predictions, labels):
    # Flatten all utterances and compute the F1 score
    all_emotion_predicted_classes = torch.argmax(torch.cat(predictions[0], dim=0), dim=1)
    all_trigger_predicted_classes = torch.argmax(torch.cat(predictions[1], dim=0), dim=1)
    all_emotion_true_classes = torch.argmax(torch.cat(labels[0], dim=0), dim=1)
    all_trigger_true_classes = torch.cat(labels[1], dim=0)
    unrolled_emotion_f1 = f1_score(all_emotion_true_classes.cpu().numpy(), all_emotion_predicted_classes.cpu().numpy(), average='macro')
    unrolled_trigger_f1 = f1_score(all_trigger_true_classes.cpu().numpy(), all_trigger_predicted_classes.cpu().numpy(), average='binary')
    unrolled_emotion_f1_tensor = torch.tensor(unrolled_emotion_f1, dtype=torch.float32)
    unrolled_trigger_f1_tensor = torch.tensor(unrolled_trigger_f1, dtype=torch.float32)
    return unrolled_emotion_f1_tensor, unrolled_trigger_f1_tensor

In [93]:
class CustomDataset(Dataset):
    def __init__(self, input_ids, attention_mask, emotions, triggers):
        self.input_ids = input_ids
        self.attention_mask = attention_mask
        self.emotions = emotions
        self.triggers = triggers

    def __len__(self):
        return len(self.emotions)

    def __getitem__(self, idx):
        input_ids = self.input_ids[idx]
        attention_mask = self.attention_mask[idx]
        emotion = self.emotions[idx]
        trigger = self.triggers[idx]

        emotion_labels = torch.tensor(emotion, dtype=torch.float32)
        trigger_label = torch.tensor(trigger, dtype=torch.long)
        return {
            'input_ids': input_ids,
            'attention_mask': attention_mask,
            'emotion_labels': emotion_labels,
            'trigger_label': trigger_label
        }

In [133]:
class CustomBERTModel(torch.nn.Module):
    def __init__(self, freeze_embeddings=True,hidden_size=64, num_layers=1, bidirectional=False):
        super(CustomBERTModel, self).__init__()
        self.bert = BertModel.from_pretrained('bert-base-uncased')
        if freeze_embeddings:
            for name,param in self.bert.named_parameters():
                if 'embeddings' in name:
                    param.requires_grad = False


        #self.fc1 = torch.nn.Linear(self.bert.config.hidden_size, hidden_size)

        input_size =  input_size = self.bert.config.hidden_size

        self.lstm = torch.nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            bidirectional=bidirectional,
            batch_first=True
        )

        # Linear layer for emotion classification
        self.emotion_head = torch.nn.Linear(hidden_size * 2 if bidirectional else hidden_size, len(emotions))

        # Linear layer for trigger classification
        self.trigger_head = torch.nn.Linear(hidden_size * 2 if bidirectional else hidden_size, len(triggers))


    def forward(self, input_ids, attention_mask):
        """outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask, return_dict=True)
        pooled_output = outputs['pooler_output']
        # Emotion head
        emotion_logits = self.emotion_head(pooled_output)

        # Trigger head
        trigger_logits = self.trigger_head(pooled_output)
        return emotion_logits, trigger_logits"""
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask, return_dict=True)
        pooled_output = outputs['pooler_output']

        #x = torch.nn.functional.relu(self.fc1(pooled_output))

        lstm_input = pooled_output.unsqueeze(1).expand(-1, 9, -1)
        lstm_output, _ = self.lstm(lstm_input)
        # Extract the output from the last time step
        lstm_output = lstm_output[:, -1, :]
        # Emotion head
        emotion_logits = self.emotion_head(lstm_output)

        # Trigger head
        trigger_logits = self.trigger_head(lstm_output)

        return emotion_logits, trigger_logits

In [95]:
train_dataset = CustomDataset(input_ids_train, attention_mask_train, train_data['emotions'],
                              train_data['triggers'])
validation_dataset = CustomDataset(input_ids_val, attention_mask_val, val_data['emotions'],
                             val_data['triggers'])
test_dataset = CustomDataset(input_ids_test, attention_mask_test, test_data['emotions'],
                             test_data['triggers'])

In [96]:
from torch.nn.utils.rnn import pad_sequence

def collate_fn(batch):
    input_ids = [item['input_ids'] for item in batch]
    attention_mask = [item['attention_mask'] for item in batch]
    emotion_labels = [item['emotion_labels'] for item in batch]#torch.stack([item['emotion_labels'] for item in batch], dim=0)
    trigger_label = [item['trigger_label'] for item in batch]#torch.stack([item['trigger_label'] for item in batch], dim=0)

    #input_ids = pad_sequence([torch.stack(item['input_ids']) for item in batch], batch_first=True)
    #attention_mask = pad_sequence([torch.stack(item['attention_mask']) for item in batch], batch_first=True)
    return input_ids,attention_mask,emotion_labels,trigger_label
    #return {'input_ids': input_ids, 'attention_mask': attention_mask, 'emotion_labels': emotion_labels, 'trigger_label': trigger_label

## Bert

In [138]:
criterion_emotion = torch.nn.CrossEntropyLoss() # can tinker with the loss function, change to a different one
criterion_trigger = torch.nn.BCEWithLogitsLoss()

num_epochs = 20
batch_size = 16

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)
val_loader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)
freezed_embeddings = True
custom_Bert_Model = CustomBERTModel()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
custom_Bert_Model = custom_Bert_Model.to(device)
optimizer = torch.optim.Adam(custom_Bert_Model.parameters(), lr=5e-5)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [126]:
def test_bert(mode='validation'):
# Usage in the eval loop
  sequence_f1_scores_emotion = []
  sequence_f1_scores_trigger = []
  unrolled_predictions_emotion = []
  unrolled_predictions_trigger = []
  unrolled_labels_emotion = []
  unrolled_labels_trigger = []
  sequence_f1_scores = []

  batch_size = 1
  #test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
  if mode == 'validation':
    loader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=False)
  elif mode == 'test':
    loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

  with torch.no_grad():
      for batch in tqdm(loader, desc='Evaluation', leave=False):
          input_ids = batch['input_ids'].squeeze().to(device)
          attention_mask = batch['attention_mask'].squeeze().to(device)
          emotion_labels = batch['emotion_labels'].squeeze().to(device)
          trigger_label = batch['trigger_label'].squeeze().to(device)

          emotion_logits, trigger_logits = custom_Bert_Model(input_ids, attention_mask)

          # Store predictions and labels for later unrolled F1 computation
          unrolled_predictions_emotion.append(emotion_logits)
          unrolled_labels_emotion.append(emotion_labels)
          unrolled_predictions_trigger.append(trigger_logits)
          unrolled_labels_trigger.append(trigger_label)

          # Convert logits to probabilities and then to class predictions
          predicted_classes = torch.argmax(emotion_logits, dim=1)
          true_classes = torch.argmax(emotion_labels, dim=1)

          # Compute F1 for the current sequence (dialogue)
          sequence_f1 = f1_score(true_classes.cpu().numpy(), predicted_classes.cpu().numpy(), average='macro')
          sequence_f1_scores.append(sequence_f1)

  # Compute the average Sequence F1 for emotions and triggers
  average_sequence_f1_emotion, average_sequence_f1_trigger = compute_sequence_f1(
      [unrolled_predictions_emotion, unrolled_predictions_trigger],
      [unrolled_labels_emotion, unrolled_labels_trigger]
  )

  # Compute the Unrolled Sequence F1 for emotions and triggers
  unrolled_sequence_f1_emotion, unrolled_sequence_f1_trigger = compute_unrolled_sequence_f1(
      [unrolled_predictions_emotion, unrolled_predictions_trigger],
      [unrolled_labels_emotion, unrolled_labels_trigger]
  )

  # Print the F1 scores for emotions and triggers
  print(f"Average Sequence F1 (Emotion): {average_sequence_f1_emotion}")
  print(f"Average Sequence F1 (Trigger): {average_sequence_f1_trigger}")
  print(f"Unrolled Sequence F1 (Emotion): {unrolled_sequence_f1_emotion.item()}")
  print(f"Unrolled Sequence F1 (Trigger): {unrolled_sequence_f1_trigger.item()}")

In [140]:
for epoch in range(num_epochs):
    custom_Bert_Model.train()
    total_loss = 0.0

    for batch in tqdm(train_loader, desc=f'Epoch {epoch + 1}', leave=False):
        input_ids,attention_mask,emotion_labels,trigger_label = batch

        # Zero the gradients on the optimizer
        optimizer.zero_grad()

        for el in range(batch_size):
          emotion_loss = 0.0
          trigger_loss = 0.0

          input_ids_el = input_ids[el].squeeze().to(device)
          attention_mask_el = attention_mask[el].squeeze().to(device)
          emotion_labels_el = emotion_labels[el].squeeze().to(device)
          trigger_label_el = trigger_label[el].squeeze().to(device)

          # Forward pass
          emotion_logits, trigger_logits = custom_Bert_Model(input_ids_el, attention_mask_el)
          # Compute the loss for both emotion and trigger

          emotion_loss += criterion_emotion(emotion_logits, torch.argmax(emotion_labels_el, dim=1))
          trigger_loss += criterion_trigger(torch.argmax(trigger_logits, dim=1).float(), trigger_label_el.float())

        # Combine losses for backpropagation
        loss = (emotion_loss + trigger_loss)/batch_size

        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    # Compute the average loss
    average_loss = total_loss / len(train_loader)
    print(f"Epoch {epoch + 1}, Average Loss: {average_loss}")
    test_bert('validation')



Epoch 1, Average Loss: 0.15407862298190594




Average Sequence F1 (Emotion): 0.27070796489715576
Average Sequence F1 (Trigger): 0.449733167886734
Unrolled Sequence F1 (Emotion): 0.17098261415958405
Unrolled Sequence F1 (Trigger): 0.49200451374053955




Epoch 2, Average Loss: 0.13999401323497296




Average Sequence F1 (Emotion): 0.3039402663707733
Average Sequence F1 (Trigger): 0.4729042947292328
Unrolled Sequence F1 (Emotion): 0.21924729645252228
Unrolled Sequence F1 (Trigger): 0.510274350643158




Epoch 3, Average Loss: 0.13262631554156543




Average Sequence F1 (Emotion): 0.3030347526073456
Average Sequence F1 (Trigger): 0.45957833528518677
Unrolled Sequence F1 (Emotion): 0.22836025059223175
Unrolled Sequence F1 (Trigger): 0.4881386160850525




Epoch 4, Average Loss: 0.12252616142854095




Average Sequence F1 (Emotion): 0.3007153272628784
Average Sequence F1 (Trigger): 0.45699068903923035
Unrolled Sequence F1 (Emotion): 0.24919816851615906
Unrolled Sequence F1 (Trigger): 0.4994644820690155




KeyboardInterrupt: 

In [143]:
test_bert('test')



Average Sequence F1 (Emotion): 0.3182724118232727
Average Sequence F1 (Trigger): 0.1578434556722641
Unrolled Sequence F1 (Emotion): 0.25376445055007935
Unrolled Sequence F1 (Trigger): 0.17444218695163727


## Majority classifier

In [None]:
import torch

def find_majority_class(train_loader):
    # Initialize counters
    emotion_counts = torch.zeros(7) # Assuming there are 7 unique emotions
    trigger_counts = torch.zeros(2) # There are 2 classes for triggers: present or not
    negative_trigger_counts = 0
    positive_trigger_counts = 0
    # Iterate over the training dataset to count the labels
    for batch in train_loader:
        emotion_labels = batch['emotion_labels'].squeeze()
        trigger_labels = batch['trigger_label'].squeeze()
        #print(trigger_labels,torch.sum(trigger_labels, dim=0),(trigger_labels == 0).sum())
        # Sum up the counts for each class
        positive_trigger_counts += torch.sum(trigger_labels, dim=0)
        # Count the zeros for the negative class (absence of a trigger)
        # Since one-hot encoding, the absence is just the inverse of the presence
        negative_trigger_counts += torch.sum(1 - trigger_labels, dim=0)
        emotion_counts += torch.sum(emotion_labels, dim=0)

    trigger_counts[0] = negative_trigger_counts
    trigger_counts[1] = positive_trigger_counts
    print(trigger_counts)
    print(emotion_counts)
    # Find the index with the maximum count for emotions and triggers
    majority_emotion = torch.zeros_like(emotion_counts)
    majority_emotion[torch.argmax(emotion_counts)] = 1
    majority_trigger = torch.zeros_like(trigger_counts)
    majority_trigger[torch.argmax(trigger_counts)] = 1

    return majority_emotion, majority_trigger

# Let's assume that 'train_loader' is a DataLoader for your training dataset
# You need to replace 'train_loader' with the actual DataLoader for your dataset
majority_emotion, majority_trigger = find_majority_class(train_loader)
majority_emotion, majority_trigger

In [None]:
from sklearn.metrics import f1_score
from tqdm.auto import tqdm

def majority_classifier(majority_emotion, majority_trigger, test_loader):
    all_emotion_predictions = []
    all_trigger_predictions = []
    all_emotion_labels = []
    all_trigger_labels = []

    with torch.no_grad():
        for batch in tqdm(test_loader, desc='Evaluation', leave=False):
            emotion_labels = batch['emotion_labels']
            trigger_labels = batch['trigger_label']

            for emotion_lab, trigger_lab in zip(emotion_labels, trigger_labels):
                # Ensure we have at least 1 dimension
                if emotion_lab.ndim == 1 and emotion_lab.size(0) == 1:
                    emotion_lab = emotion_lab.unsqueeze(0)
                if trigger_lab.ndim == 1 and trigger_lab.size(0) == 1:
                    trigger_lab = trigger_lab.unsqueeze(0)

                # Store the labels
                all_emotion_labels.append(emotion_lab)
                all_trigger_labels.append(trigger_lab)

                # Repeat the majority class prediction to match the number of utterances
                emotion_predictions = majority_emotion.repeat(emotion_lab.size(0), 1)
                #print(emotion_predictions)

                trigger_predictions = majority_trigger.repeat(trigger_lab.size(0), 1)

                # Store the predictions
                all_emotion_predictions.append(emotion_predictions)
                all_trigger_predictions.append(trigger_predictions)

    # Use the stored predictions and labels to calculate sequence F1 and unrolled sequence F1
    average_sequence_f1_emotion, average_sequence_f1_trigger = compute_sequence_f1(
        [all_emotion_predictions, all_trigger_predictions],
        [all_emotion_labels, all_trigger_labels]
    )

    unrolled_sequence_f1_emotion, unrolled_sequence_f1_trigger = compute_unrolled_sequence_f1(
        [all_emotion_predictions, all_trigger_predictions],
        [all_emotion_labels, all_trigger_labels]
    )

    return average_sequence_f1_emotion, average_sequence_f1_trigger, unrolled_sequence_f1_emotion, unrolled_sequence_f1_trigger

# Assume majority_emotion and majority_trigger are tensors of the majority class (one-hot encoded)
# and test_loader is your DataLoader instance for the test dataset.
average_f1_emotion, average_f1_trigger, unrolled_f1_emotion, unrolled_f1_trigger = majority_classifier(majority_emotion, majority_trigger, test_loader)

print(f"Average Sequence F1 (Emotion): {average_f1_emotion}")
print(f"Average Sequence F1 (Trigger): {average_f1_trigger}")
print(f"Unrolled Sequence F1 (Emotion): {unrolled_f1_emotion}")
print(f"Unrolled Sequence F1 (Trigger): {unrolled_f1_trigger}")

## Random

In [None]:
import torch

def random_classifier(test_loader, emotion_distribution, trigger_distribution):
    all_emotion_predictions = []
    all_trigger_predictions = []
    all_emotion_labels = []
    all_trigger_labels = []

    with torch.no_grad():
        for batch in tqdm(test_loader, desc='Evaluation', leave=False):
            emotion_labels = batch['emotion_labels']
            trigger_labels = batch['trigger_label']

            for emotion_lab, trigger_lab in zip(emotion_labels, trigger_labels):
                # Ensure we have at least 1 dimension
                if emotion_lab.ndim == 1 and emotion_lab.size(0) == 1:
                    emotion_lab = emotion_lab.unsqueeze(0)
                if trigger_lab.ndim == 1 and trigger_lab.size(0) == 1:
                    trigger_lab = trigger_lab.unsqueeze(0)

                # Store the labels
                all_emotion_labels.append(emotion_lab)
                all_trigger_labels.append(trigger_lab)

                # Generate random predictions for emotions
                random_emotion_predictions = torch.randint(0, 2, (emotion_lab.size(0), 7))  # Randomly 0 or 1 for each emotion
                all_emotion_predictions.append(random_emotion_predictions.float())

                # Generate random predictions for triggers based on the training distribution
                random_trigger_probs = torch.rand((trigger_lab.size(0), 1))
                random_trigger_predictions = (random_trigger_probs < trigger_distribution).long()  # Binary prediction based on distribution
                random_trigger_predictions = torch.cat((random_trigger_predictions, 1 - random_trigger_predictions), dim=1)  # Make it one-hot
                all_trigger_predictions.append(random_trigger_predictions.float())

    # Calculate the F1 scores using your metric functions
    average_sequence_f1_emotion, average_sequence_f1_trigger = compute_sequence_f1(
        [all_emotion_predictions, all_trigger_predictions],
        [all_emotion_labels, all_trigger_labels]
    )

    unrolled_sequence_f1_emotion, unrolled_sequence_f1_trigger = compute_unrolled_sequence_f1(
        [all_emotion_predictions, all_trigger_predictions],
        [all_emotion_labels, all_trigger_labels]
    )

    return average_sequence_f1_emotion, average_sequence_f1_trigger, unrolled_sequence_f1_emotion, unrolled_sequence_f1_trigger

# You need to provide the distribution for the trigger class from your training data
# For example, if 30% of your training samples have a trigger, trigger_distribution should be 0.3
trigger_distribution = 0.5  # Replace with your actual distribution

# Now call your random classifier function
average_f1_emotion, average_f1_trigger, unrolled_f1_emotion, unrolled_f1_trigger = random_classifier(
    test_loader,
    emotion_distribution=None,  # Not used currently as we're assuming a uniform distribution
    trigger_distribution=trigger_distribution
)

print(f"Random Classifier Average Sequence F1 (Emotion): {average_f1_emotion}")
print(f"Random Classifier Average Sequence F1 (Trigger): {average_f1_trigger}")
print(f"Random Classifier Unrolled Sequence F1 (Emotion): {unrolled_f1_emotion}")
print(f"Random Classifier Unrolled Sequence F1 (Trigger): {unrolled_f1_trigger}")

## Bert with context?!?

In [None]:
import torch
from transformers import BertModel
import torch.nn as nn
class Attention(nn.Module):
    def __init__(self, hidden_size):
        super(Attention, self).__init__()
        self.hidden_size = hidden_size
        self.attention_weights = nn.Parameter(torch.Tensor(hidden_size, 1))
        nn.init.xavier_uniform_(self.attention_weights.data)

    def forward(self, lstm_output):
        # Apply attention weights
        attention_scores = torch.matmul(lstm_output, self.attention_weights).squeeze(-1)
        attention_scores = torch.softmax(attention_scores, dim=1).unsqueeze(2)

        # Apply the attention scores to the lstm_output
        weighted_sequence = lstm_output * attention_scores
        attended_output = weighted_sequence.sum(dim=1)
        return attended_output

class BERT(torch.nn.Module):
    def __init__(self, num_emotions, num_triggers, freeze_embeddings=True):
        super(BERT, self).__init__()
        self.bert = BertModel.from_pretrained('bert-base-uncased')#BertForSequenceClassification.from_pretrained
        #LSTM
        if freeze_embeddings:
            for name,param in self.bert.named_parameters():
                if 'embeddings' in name:
                    param.requires_grad = False
        # The size of the hidden layer in the LSTM, which will be the same as the BERT hidden size
        self.lstm_hidden_size = self.bert.config.hidden_size

        # Define a bidirectional LSTM layer that processes the full sequence of BERT outputs
        self.lstm = nn.LSTM(input_size=self.lstm_hidden_size,
                            hidden_size=self.lstm_hidden_size,
                            num_layers=1,
                            batch_first=True,
                            bidirectional=True)

        # Define the emotion and trigger heads with an input size that is twice the BERT hidden size
        # because the LSTM is bidirectional
        self.attention = Attention(self.lstm_hidden_size * 2)  # Because LSTM is bidirectional

        self.emotion_head = nn.Linear(self.lstm_hidden_size * 2, num_emotions)
        self.trigger_head = nn.Linear(self.lstm_hidden_size * 2, num_triggers)

    def forward(self, input_ids, attention_mask):
        bert_outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask, return_dict=True)
        bert_sequence_output = bert_outputs.last_hidden_state

        lstm_output, (h_n, c_n) = self.lstm(bert_sequence_output)

        # Apply attention to the LSTM output
        attended_output = self.attention(lstm_output)

        emotion_logits = self.emotion_head(attended_output)
        trigger_logits = self.trigger_head(attended_output)

        return emotion_logits, trigger_logits

In [None]:
num_emotions = 7
num_triggers = 2

# Instantiate the model
BERT_lstm = BERT(num_emotions,num_triggers).to(device)
#optimizer = AdamW(filter(lambda p: p.requires_grad, custom_Bert_Model.parameters()), lr=5e-5)
optimizer = torch.optim.Adam(BERT_lstm.parameters(), lr=5e-5)

In [None]:
num_epochs = 2
batch_size = 1
for epoch in range(num_epochs):
    BERT_lstm.train()
    total_loss = 0.0
    loss_emotion = 0.0
    loss_trigger = 0.0
    for batch in tqdm(train_loader, desc=f'Epoch {epoch + 1}', leave=False):
        input_ids = batch['input_ids'].squeeze().to(device)
        attention_mask = batch['attention_mask'].squeeze().to(device)
        emotion_labels = batch['emotion_labels'].squeeze().to(device)
        trigger_label = batch['trigger_label'].squeeze().to(device)

        # Zero the gradients on the optimizer
        optimizer.zero_grad()

        # Forward pass
        emotion_logits, trigger_logits = BERT_lstm(input_ids, attention_mask)

        # Compute the loss for both emotion and trigger
        emotion_loss = criterion(emotion_logits, torch.argmax(emotion_labels, dim=1))
        trigger_loss = criterion(trigger_logits, trigger_label)

        # Combine losses for backpropagation
        loss = emotion_loss + trigger_loss
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        loss_emotion += emotion_loss
        loss_trigger += trigger_loss

    # Compute the average loss
    average_loss = total_loss / len(train_loader)
    print(f"Epoch {epoch + 1}, Average Loss: {average_loss}")
    print(f"Epoch {epoch + 1}, Emotion Loss: {loss_emotion/len(train_loader)}")
    print(f"Epoch {epoch + 1}, Trigger Loss: {loss_trigger/len(train_loader)}")

In [None]:
# Usage in the eval loop
sequence_f1_scores_emotion = []
sequence_f1_scores_trigger = []
unrolled_predictions_emotion = []
unrolled_predictions_trigger = []
unrolled_labels_emotion = []
unrolled_labels_trigger = []
sequence_f1_scores = []

test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

with torch.no_grad():
    for batch in tqdm(test_loader, desc='Evaluation', leave=False):
        input_ids = batch['input_ids'].squeeze().to(device)
        attention_mask = batch['attention_mask'].squeeze().to(device)
        emotion_labels = batch['emotion_labels'].squeeze().to(device)
        trigger_label = batch['trigger_label'].squeeze().to(device)

        emotion_logits, trigger_logits = BERT_lstm(input_ids, attention_mask)

        # Store predictions and labels for later unrolled F1 computation
        unrolled_predictions_emotion.append(emotion_logits)
        unrolled_labels_emotion.append(emotion_labels)
        unrolled_predictions_trigger.append(trigger_logits)
        unrolled_labels_trigger.append(trigger_label)

        # Convert logits to probabilities and then to class predictions
        predicted_classes = torch.argmax(emotion_logits, dim=1)
        true_classes = torch.argmax(emotion_labels, dim=1)

        # Compute F1 for the current sequence (dialogue)
        sequence_f1 = f1_score(true_classes.cpu().numpy(), predicted_classes.cpu().numpy(), average='macro')
        sequence_f1_scores.append(sequence_f1)

# Compute the average Sequence F1 for emotions and triggers
average_sequence_f1_emotion, average_sequence_f1_trigger = compute_sequence_f1(
    [unrolled_predictions_emotion, unrolled_predictions_trigger],
    [unrolled_labels_emotion, unrolled_labels_trigger]
)

# Compute the Unrolled Sequence F1 for emotions and triggers
unrolled_sequence_f1_emotion, unrolled_sequence_f1_trigger = compute_unrolled_sequence_f1(
    [unrolled_predictions_emotion, unrolled_predictions_trigger],
    [unrolled_labels_emotion, unrolled_labels_trigger]
)

# Print the F1 scores for emotions and triggers
print(f"Average Sequence F1 (Emotion): {average_sequence_f1_emotion}")
print(f"Average Sequence F1 (Trigger): {average_sequence_f1_trigger}")
print(f"Unrolled Sequence F1 (Emotion): {unrolled_sequence_f1_emotion.item()}")
print(f"Unrolled Sequence F1 (Trigger): {unrolled_sequence_f1_trigger.item()}")