In [None]:
# %pip install scikit-learn
# %pip install matplotlib
# %pip install tqdm torch numpy

In [76]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import json
from sklearn.metrics import f1_score
import numpy as np
import pickle
import matplotlib.pyplot as plt
from tqdm import tqdm
import math
import torch.nn.functional as F

In [77]:
with open('train_dict.pkl', 'rb') as f:
    train_data = pickle.load(f, encoding='latin1')

with open('val_dict.pkl', 'rb') as f:
    val_data = pickle.load(f, encoding='latin1')

emotion_ids = {'neutral' : 0, 'joy' : 1, 'anger' : 2, 'surprise' : 3, 'sadness' : 4, 'fear' : 5, 'disgust' : 6}

In [78]:
def pad_data():
    max_length = 25
    pad_word = 'PAD'
    pad_emotion = 'neutral'
    data = [train_data, val_data]
    dict_ = {}

    for task_data in data:
        remove_keys = []
        for key in task_data:
            if len(task_data[key][0]) not in dict_:
                dict_[len(task_data[key][0])] = 0
            dict_[len(task_data[key][0])] += 1
            # checking_nan
            for step in range(len(task_data[key][3])):
                if task_data[key][3][step] is None:
                    remove_keys.append(key)
                    
              
            for step in range(len(task_data[key][0]), max_length):
                task_data[key][0].append(pad_word)
                task_data[key][1].append(np.zeros(768))
                task_data[key][2].append(pad_emotion)
                task_data[key][3].append(0)
        
        for key in remove_keys:
            if key in task_data:
                del task_data[key] 


pad_data()
train_data = {new_key: train_data[old_key] for new_key, (old_key, _) in enumerate(train_data.items())}
val_data = {new_key: val_data[old_key] for new_key, (old_key, _) in enumerate(val_data.items())}

In [108]:
class GRUModel(nn.Module):
    def __init__(self, embedding_dim, num_classes):
        super(GRUModel, self).__init__()
        self.gru1 = nn.GRU(embedding_dim, 256, num_layers=1, batch_first=True)
        self.gru2 = nn.GRU(256, 64, num_layers=1, batch_first=True)
        # self.gru3 = nn.GRU(128, 64, num_layers=1, batch_first=True)
        self.gru4 = nn.GRU(64, 16, num_layers=1, batch_first=True)
        self.fc1 = nn.Linear(16, num_classes)
        # self.fc2 = nn.Linear(32, num_classes)

    def forward(self, x):
        out, _ = self.gru1(x)
        out, _ = self.gru2(out)
        # out, _ = self.gru3(out)
        out, _ = self.gru4(out)
        out = self.fc1(out)   
        # out = self.fc2(out)
        out = F.softmax(out, dim = -1)
        return out

In [103]:
class ErfDataset(Dataset):
    def __init__(self, data, emo_index):
        self.data = data
        self.length = len(self.data)
        self.emo_index =  emo_index

    def __len__(self):
        return self.length

    def __getitem__(self, index):
        sentence_embeddings = np.array(self.data[index][1] , dtype='float32')
        emotion_sequence = self.data[index][2]
        emotion_labels = [self.emo_index[emotion] for emotion in emotion_sequence]
        return torch.tensor(sentence_embeddings, dtype= torch.float32), torch.tensor(emotion_labels, dtype= torch.float32)

In [104]:
train_dataset = ErfDataset(train_data, emotion_ids)
val_dataset = ErfDataset(val_data, emotion_ids)

In [121]:
def train_model(train_dataset, val_dataset, model, optimizer, criterion, device, num_epochs = 30, bs = 32):

    train_dataloader = DataLoader(train_dataset, batch_size = bs, shuffle = True)
    val_dataloader =  DataLoader(val_dataset, batch_size = bs, shuffle = False)

    train_losses = []
    val_losses = []
    train_f1_scores = []
    val_f1_scores = []

    for epoch in range(num_epochs):
        model.train()  
        total_train_loss = 0
        all_train_predictions = []
        all_train_targets = []

        for batch_idx, (inputs, emotions) in enumerate(train_dataloader):
            inputs, emotions = inputs.to(device), emotions.to(device)
            # print(f"inputs shape - {inputs.shape}")
            # print(f"emotions shape - {emotions.shape}")
            optimizer.zero_grad()
            outputs = model(inputs)
            
            # print(f"outputs shape - {outputs.shape}")
            loss = 0
    
            # Iterate over time steps
            for i in range(outputs.size(1)):
                # print(outputs[:, i, :].shape)
                # print(emotions[:, i].long().shape)
                loss += criterion(outputs[:, i, :], emotions[:, i].long())
            
            loss.backward()
            optimizer.step()

            total_train_loss += loss.item()

            predictions = outputs.argmax(dim=2).view(-1).cpu().numpy()
            targets = emotions.view(-1).cpu().numpy()

            all_train_predictions.extend(predictions)
            all_train_targets.extend(targets)

        avg_train_loss = total_train_loss / len(train_dataloader)

        train_macro_f1 = f1_score(all_train_targets, all_train_predictions, average='weighted')
        print(f"Epoch {epoch + 1}, Training Loss: {avg_train_loss}, Training Macro F1-Score: {train_macro_f1}")

        model.eval()  
        total_val_loss = 0
        all_val_predictions = []
        all_val_targets = []

        with torch.no_grad():
            for val_inputs, val_emotions in val_dataloader:
                val_inputs, val_emotions = val_inputs.to(device), val_emotions.to(device)
                val_outputs = model(val_inputs)

                # Compute loss
                loss = 0
        
                # Iterate over time steps
                for i in range(val_outputs.size(1)):
                    # Compute loss at each time step
                    loss += criterion(val_outputs[:, i, :], val_emotions[:, i].long())

                # loss = criterion(val_outputs.view(-1, val_outputs.size(2)), val_emotions.view(-1))
                total_val_loss += loss.item()

                # Convert predictions and targets to numpy arrays
                predictions = val_outputs.argmax(dim=2).view(-1).cpu().numpy()
                targets = val_emotions.view(-1).cpu().numpy()

                all_val_predictions.extend(predictions)
                all_val_targets.extend(targets)

            avg_val_loss = total_val_loss / len(val_dataloader)
            val_losses.append(avg_val_loss)

            # Calculate F1 score
            val_macro_f1 = f1_score(all_val_targets, all_val_predictions, average='weighted')
            val_f1_scores.append(val_macro_f1)

        print(f"Epoch {epoch + 1},  Validation Loss: {avg_val_loss}, Validation Macro F1-Score: {val_macro_f1}")


    # plot_results(train_losses, val_losses, train_f1_scores, val_f1_scores)
    return train_losses, train_f1_scores, val_losses, val_f1_scores

In [122]:
device = torch.device(
    "mps"
    if torch.backends.mps.is_available()
    else "cuda" if torch.cuda.is_available() else "cpu"
)

# class_counts = np.array([23702, 1466, 911, 1021, 576, 229, 225])
# class_counts = [83634, 6552, 4188, 4844, 2806, 1177, 1049]
# class_counts_tensor = torch.tensor(class_counts, dtype=torch.float32).to(device)

In [123]:
model = GRUModel(768, 7).to(device) 
optimizer = optim.Adam(model.parameters(), lr = 0.00005)
criterion = nn.CrossEntropyLoss()
# criterion = nn.CrossEntropyLoss(weight = class_counts_tensor)

train_losses, train_f1_scores, val_losses, val_f1_scores = train_model(train_dataset, val_dataset, model = model, num_epochs = 20, optimizer=optimizer, criterion=criterion, device=device)

Epoch 1, Training Loss: 46.05013921606632, Training Macro F1-Score: 0.6318498316249621
Epoch 1,  Validation Loss: 44.19022031930777, Validation Macro F1-Score: 0.719545758038355
Epoch 2, Training Loss: 43.58903451002281, Training Macro F1-Score: 0.7142877841065643
Epoch 2,  Validation Loss: 42.9547119140625, Validation Macro F1-Score: 0.7191911530072801
Epoch 3, Training Loss: 42.42246421056849, Training Macro F1-Score: 0.7142697103040407
Epoch 3,  Validation Loss: 41.836326305682846, Validation Macro F1-Score: 0.7191911530072801
Epoch 4, Training Loss: 41.35969485217378, Training Macro F1-Score: 0.7142888920287054
Epoch 4,  Validation Loss: 40.82538296626164, Validation Macro F1-Score: 0.719290573532332
Epoch 5, Training Loss: 40.414768888750146, Training Macro F1-Score: 0.7145377227597478
Epoch 5,  Validation Loss: 39.93742121182955, Validation Macro F1-Score: 0.719290573532332
Epoch 6, Training Loss: 39.58480802929128, Training Macro F1-Score: 0.7145377227597478
Epoch 6,  Validation

In [85]:
# def plot_results(train_losses, val_losses, train_f1_scores, val_f1_scores):
#     epochs = range(1, len(train_losses) + 1)

#     plt.figure(figsize=(12, 5))

#     # Plotting Losses
#     plt.subplot(1, 2, 1)
#     plt.plot(epochs, train_losses, label='Training Loss')
#     plt.plot(epochs, val_losses, label='Validation Loss')
#     plt.title('Training and Validation Losses')
#     plt.xlabel('Epochs')
#     plt.ylabel('Loss')
#     plt.legend()

#     # Plotting Macro F1-Scores
#     plt.subplot(1, 2, 2)
#     plt.plot(epochs, train_f1_scores, label='Training Macro F1-Score')
#     plt.plot(epochs, val_f1_scores, label='Validation Macro F1-Score')
#     plt.title('Training and Validation Macro F1-Scores')
#     plt.xlabel('Epochs')
#     plt.ylabel('Macro F1-Score')
#     plt.legend()

#     plt.tight_layout()
#     plt.show()

# def plot_test_results(test_loss, test_macro_f1):
#     plt.figure(figsize=(6, 6))
#     labels = ['Test Loss', 'Test Macro F1-Score']
#     values = [test_loss, test_macro_f1]

#     plt.bar(labels, values, color=['blue', 'green'])
#     plt.title('Test Results')
#     plt.ylabel('Values')
#     plt.show()

In [86]:
# def test_model(task, embedding_type, model, criterion, device, batch_size = 1):
#     test_dataloader = None
#     if task == 1:
#         test_dataloader =  DataLoader(Task_data(task1_test_data, bio_mapping_task1, embedding_type), batch_size=batch_size, shuffle=False)

#     elif task == 2:
#         test_dataloader =  DataLoader(Task_data(task2_test_data, bio_mapping_task2, embedding_type), batch_size=batch_size, shuffle=False)

#     total_test_loss = 0
#     all_test_predictions = []
#     all_test_targets = []

#     with torch.no_grad():
#         for test_inputs, test_targets in test_dataloader:
#             test_inputs, test_targets = test_inputs.to(device), test_targets.to(device)
#             test_outputs = model(test_inputs)

#             loss = 0
#             for i in range(test_outputs.size(1)):  # Iterate over time steps
#                 loss += criterion(test_outputs[:, i, :], test_targets[:, i])  

#             total_test_loss += loss.item()

#             all_test_predictions.extend(test_outputs.argmax(dim=2).view(-1).cpu().numpy())
#             all_test_targets.extend(test_targets.view(-1).cpu().numpy())

#         avg_test_loss = total_test_loss / len(test_dataloader)
#         test_macro_f1 = f1_score(all_test_targets, all_test_predictions, average='macro')
#     print(f'Test Loss: {avg_test_loss}, Test Macro F1-Score: {test_macro_f1}')