In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import json
from sklearn.metrics import f1_score
import numpy as np
import pickle
import matplotlib.pyplot as plt
from tqdm import tqdm
import math
import torch.nn.functional as F


In [9]:
with open('/Users/nalishjain/Acad Sem 6/NLP-Assignments/A4_16/val_dict.pkl', 'rb') as f:
    val_data =  pickle.load(f, encoding='latin1')

with open('/Users/nalishjain/Acad Sem 6/NLP-Assignments/A4_16/val_dict_speakers.pkl', 'rb') as f:
    val_speaker_data =  pickle.load(f, encoding='latin1')
    
emotion_ids = {'neutral' : 0, 'joy' : 1, 'anger' : 2, 'surprise' : 3, 'sadness' : 4, 'fear' : 5, 'disgust' : 6}

In [10]:
def remove_nan():
    # max_length = 25
    # pad_word = 'PAD'
    # pad_emotion = 'neutral'
    data = [val_data, val_speaker_data]

    for task_data in data:
        remove_keys = []
        for key in task_data:
            # checking_nan
            for step in range(len(task_data[key][3])):
                if task_data[key][3][step] is None:
                    remove_keys.append(key)
            # for step in range(len(task_data[key][0]), max_length):
            #     task_data[key][0].append(pad_word)
            #     task_data[key][1].append(np.zeros(768))
            #     task_data[key][2].append(pad_emotion)
            #     task_data[key][3].append(0)
        
        for key in remove_keys:
            if key in task_data:
                del task_data[key]  
    # print(dict_)

remove_nan()
val_data = {new_key: val_data[old_key] for new_key, (old_key, _) in enumerate(val_data.items())}
val_speaker_data = {new_key: val_speaker_data[old_key] for new_key, (old_key, _) in enumerate(val_speaker_data.items())}


## Model Classes

In [17]:
# M1
class LSTMModel(nn.Module):
    def __init__(self, embedding_dim, num_classes):
        super(LSTMModel, self).__init__()
        self.lstm1 = nn.LSTM(embedding_dim, 256, num_layers=1, batch_first=True)
        self.lstm2 = nn.LSTM(256, 64, num_layers=1, batch_first=True)
        # self.lstm3 = nn.LSTM(128, 64, num_layers=1, batch_first=True)
        self.lstm4 = nn.LSTM(64, 16, num_layers=1, batch_first=True)
        self.fc1 = nn.Linear(16, num_classes)
        # self.fc2 = nn.Linear(32, num_classes)

    def forward(self, x):
        out, _ = self.lstm1(x)
        out, _ = self.lstm2(out)
        # out, _ = self.lstm3(out)
        out, _ = self.lstm4(out)
        out = self.fc1(out)   # Taking only the last time step output
        # out = self.fc2(out)
        out = F.softmax(out, dim=-1)
        return out

# M2
class GRUModel_m2(nn.Module):
    def __init__(self, embedding_dim, num_classes):
        super(GRUModel_m2, self).__init__()
        self.gru1 = nn.GRU(embedding_dim, 256, num_layers=1, batch_first=True)
        self.gru2 = nn.GRU(256, 64, num_layers=1, batch_first=True)
        # self.gru3 = nn.GRU(128, 64, num_layers=1, batch_first=True)
        self.gru4 = nn.GRU(64, 16, num_layers=1, batch_first=True)
        self.fc1 = nn.Linear(16, num_classes)
        # self.fc2 = nn.Linear(32, num_classes)

    def forward(self, x):
        out, _ = self.gru1(x)
        out, _ = self.gru2(out)
        # out, _ = self.gru3(out)
        out, _ = self.gru4(out)
        out = self.fc1(out)   
        # out = self.fc2(out)
        out = F.softmax(out, dim = -1)
        return out
    
# M3
class GRUModel(nn.Module):
    def __init__(self, embedding_dim, output_size):
        super(GRUModel, self).__init__()
        self.gru1 = nn.GRU(embedding_dim, 256, num_layers=1, batch_first=True)
        self.gru2 = nn.GRU(256, 64, num_layers=1, batch_first=True)
        # self.gru3 = nn.GRU(128, 64, num_layers=1, batch_first=True)
        self.gru4 = nn.GRU(64, 16, num_layers=1, batch_first=True)
        self.fc1 = nn.Linear(16, 4)
        self.fc2 = nn.Linear(4, output_size)

    def forward(self, x):
        out, _ = self.gru1(x)
        out, _ = self.gru2(out)
        # out, _ = self.gru3(out)
        out, _ = self.gru4(out)
        out = self.fc1(out)   
        out = self.fc2(out)      
        return out
    
# M4
class GRUModel_emotions(nn.Module):
    def __init__(self, embedding_dim, output_size):
        super(GRUModel_emotions, self).__init__()
        self.gru1 = nn.GRU(embedding_dim, 256, num_layers=1, batch_first=True)
        self.gru2 = nn.GRU(256, 64, num_layers=1, batch_first=True)
        # self.gru3 = nn.GRU(128, 64, num_layers=1, batch_first=True)
        self.gru4 = nn.GRU(64, 32, num_layers=1, batch_first=True)
        self.one_hot_projection = nn.Linear(7, 16)
        self.fc1 = nn.Linear(32, 16)
        self.fc2 = nn.Linear(16, output_size)

    def forward(self, x, emotions):
        out, _ = self.gru1(x)
        out, _ = self.gru2(out)
        # out, _ = self.gru3(out)
        out, _ = self.gru4(out)
        out = self.fc1(out) 
        out += self.one_hot_projection(emotions.float()) #Fusing emotions
        out = self.fc2(out)      
        return out

# Dataset Class

In [20]:
class ErfDataset(Dataset):
    def __init__(self, data, emo_index):
        self.data = data
        self.length = len(self.data)
        self.emo_index =  emo_index

    def __len__(self):
        return self.length

    def __getitem__(self, index):
        sentence_embeddings = np.array(self.data[index][1] , dtype='float32')
        emotion_sequence = self.data[index][2]
        # print(emotion_sequence)
        emotion_labels = [self.emo_index[emotion] for emotion in emotion_sequence]
        output_labels = np.array(self.data[index][3], dtype='float32')
        return torch.tensor(sentence_embeddings, dtype= torch.float32), torch.tensor(emotion_labels), torch.tensor(output_labels, dtype= torch.float32)
    
class ErcDataset(Dataset):
    def __init__(self, data, emo_index):
        self.data = data
        self.length = len(self.data)
        self.emo_index =  emo_index

    def __len__(self):
        return self.length

    def __getitem__(self, index):
        sentence_embeddings = np.array(self.data[index][1] , dtype='float32')
        emotion_sequence = self.data[index][2]
        emotion_labels = [self.emo_index[emotion] for emotion in emotion_sequence]
        return torch.tensor(sentence_embeddings, dtype= torch.float32), torch.tensor(emotion_labels, dtype= torch.float32)

In [21]:
val_dataset_erf = ErfDataset(val_data, emotion_ids)
val_dataset_erc = ErcDataset(val_data, emotion_ids)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# Add test dataset here

## Testing Functions

In [27]:
def test_model_1(test_dataset, model, device):
    test_dataloader = None
    test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=True)


    total_test_loss = 0
    all_test_predictions = []
    all_test_targets = []

    with torch.no_grad():
        for test_inputs, test_emotions in test_dataloader:
            test_inputs, test_emotions = test_inputs.to(device), test_emotions.to(device)
            test_outputs = model(test_inputs)
            all_test_predictions.extend(test_outputs.argmax(dim=2).view(-1).cpu().numpy())
            all_test_targets.extend(test_emotions.view(-1).cpu().numpy())

        avg_test_loss = total_test_loss / len(test_dataloader)
        test_macro_f1 = f1_score(all_test_targets, all_test_predictions, average='weighted')
    print(f'Test Loss: {avg_test_loss}, Test Weighted F1-Score: {test_macro_f1}')

def test_model_2(test_dataset, model, device):
    test_dataloader = None
    test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=True)


    total_test_loss = 0
    all_test_predictions = []
    all_test_targets = []

    with torch.no_grad():
        for test_inputs, test_emotions in test_dataloader:
            test_inputs, test_emotions = test_inputs.to(device), test_emotions.to(device)
            test_outputs = model(test_inputs)

            all_test_predictions.extend(test_outputs.argmax(dim=2).view(-1).cpu().numpy())
            all_test_targets.extend(test_emotions.view(-1).cpu().numpy())

        avg_test_loss = total_test_loss / len(test_dataloader)
        test_macro_f1 = f1_score(all_test_targets, all_test_predictions, average='weighted')
    print(f'Test Loss: {avg_test_loss}, Test Weighted F1-Score: {test_macro_f1}')

def test_model_3(test_dataset, model, device):
    test_dataloader = None
    test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=True)


    total_test_loss = 0
    all_test_predictions = []
    all_test_targets = []

    with torch.no_grad():
        for test_inputs, test_emotions, test_targets in test_dataloader:
            # one_hot_emotions = F.one_hot(test_emotions, num_classes=7)
            test_inputs, test_targets = test_inputs.to(device), test_targets.to(device)
            test_outputs = model(test_inputs)

            all_test_predictions.extend(test_outputs.argmax(dim=2).view(-1).cpu().numpy())
            all_test_targets.extend(test_targets.view(-1).cpu().numpy())

        avg_test_loss = total_test_loss / len(test_dataloader)
        test_macro_f1 = f1_score(all_test_targets, all_test_predictions, average='weighted')
    print(f'Test Loss: {avg_test_loss}, Test Weighted F1-Score: {test_macro_f1}')

def test_model_4(test_dataset, model, device):
    test_dataloader = None
    test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=True)


    total_test_loss = 0
    all_test_predictions = []
    all_test_targets = []

    with torch.no_grad():
        for test_inputs, test_emotions, test_targets in test_dataloader:
            one_hot_emotions = F.one_hot(test_emotions, num_classes=7)
            test_inputs, one_hot_emotions, test_targets = test_inputs.to(device), one_hot_emotions.to(device), test_targets.to(device)
            test_outputs = model(test_inputs, one_hot_emotions)

            all_test_predictions.extend(test_outputs.argmax(dim=2).view(-1).cpu().numpy())
            all_test_targets.extend(test_targets.view(-1).cpu().numpy())

        avg_test_loss = total_test_loss / len(test_dataloader)
        test_macro_f1 = f1_score(all_test_targets, all_test_predictions, average='weighted')
    print(f'Test Loss: {avg_test_loss}, Test Weighted F1-Score: {test_macro_f1}')

# Model 1 Results

In [23]:
loaded_model = LSTMModel(768, 7).to(device) 
loaded_model.load_state_dict(torch.load('model_m1_dict.pt'))
test_model_1(val_dataset_erc, loaded_model, device)

Test Loss: 0.0, Test Weighted F1-Score: 0.6203351842352914


# Model 2 Results

In [24]:
loaded_model = GRUModel_m2(768, 7).to(device) 
loaded_model.load_state_dict(torch.load('model_m2_dict.pt'))
test_model_2(val_dataset_erc, loaded_model, device)

Test Loss: 0.0, Test Weighted F1-Score: 0.7412858709165464


# Model 3 Results

In [25]:
loaded_model = GRUModel(768, 2).to(device) 
loaded_model.load_state_dict(torch.load('model_3_dict.pt'))
test_model_3(val_dataset_erf, loaded_model, device)

Test Loss: 0.0, Test Weighted F1-Score: 0.30591673359002275


# Model 4 Results

In [26]:
loaded_model = GRUModel_emotions(768, 2).to(device) 
loaded_model.load_state_dict(torch.load('model_4_dict.pt'))
test_model_4(val_dataset_erf, loaded_model, device)

Test Loss: 0.0, Test Weighted F1-Score: 0.6880068166184801
