In [None]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchmetrics import AUROC
import pandas as pd

from transformers import BertTokenizerFast as BertTokenizer, BertModel, AdamW, RobertaModel, RobertaTokenizer, get_linear_schedule_with_warmup
from transformers import AutoConfig, AutoTokenizer

import pytorch_lightning as pl
from pytorch_lightning.metrics.functional import accuracy, f1, auroc
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger

from tqdm.auto import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

import seaborn as sns
from pylab import rcParams
import matplotlib.pyplot as plt
from matplotlib import rc

%matplotlib inline
%config InlineBackend.figure_format='retina'

RANDOM_SEED = 42

sns.set(style='whitegrid', palette='muted', font_scale=1.2)
HAPPY_COLORS_PALETTE = ["#01BEFE", "#FFDD00", "#FF7D00", "#FF006D", "#ADFF02", "#8F00FF"]
sns.set_palette(sns.color_palette(HAPPY_COLORS_PALETTE))
rcParams['figure.figsize'] = 12, 8
pl.seed_everything(RANDOM_SEED)

In [None]:
device = torch.device('cpu')

### Text embeddings 

In [None]:
train_PATH = "C:/Data/Sentiment Analysis/MELD/Processed/Processed_final/embeddings_v2/train.pt"
dev_PATH = "C:/Data/Sentiment Analysis/MELD/Processed/Processed_final/embeddings_v2/dev.pt"
test_PATH = "C:/Data/Sentiment Analysis/MELD/Processed/Processed_final/embeddings_v2/test.pt"

In [None]:
train_embeddings = torch.load(train_PATH, map_location=device)
dev_embeddings = torch.load(dev_PATH, map_location=device)
test_embeddings = torch.load(test_PATH, map_location=device)

#### Last four layers 

In [None]:
train_PATH = "C:/Data/Sentiment Analysis/MELD/Processed/Processed_final/embeddings_v2/train_lfl.pt"
dev_PATH = "C:/Data/Sentiment Analysis/MELD/Processed/Processed_final/embeddings_v2/dev_lfl.pt"
test_PATH = "C:/Data/Sentiment Analysis/MELD/Processed/Processed_final/embeddings_v2/test_lfl.pt"

In [None]:
train_embeddings = torch.load(train_PATH, map_location=device)
dev_embeddings = torch.load(dev_PATH, map_location=device)
test_embeddings = torch.load(test_PATH, map_location=device)

### Audio embeddings 

In [None]:
train_wav_PATH = "C:/Data/Sentiment Analysis/MELD/Processed/wav2vec/embeddings_v2/train_wav.pt"
dev_wav_PATH = "C:/Data/Sentiment Analysis/MELD/Processed/wav2vec/embeddings_v2/dev_wav.pt"
test_wav_PATH = "C:/Data/Sentiment Analysis/MELD/Processed/wav2vec/embeddings_v2/test_wav.pt"

In [None]:
train_wav_embeddings = torch.load(train_wav_PATH, map_location=device)
dev_wav_embeddings = torch.load(dev_wav_PATH, map_location=device)
test_wav_embeddings = torch.load(test_wav_PATH, map_location=device)

In [None]:
# Two file_ID is equal => your data are mapped 
# If this is False, need 2 for loop to find the correct fileID
train_embeddings["fileID"] == train_wav_embeddings["fileID"]

In [None]:
# The correct shape of embedding should be [768] 
# [1, 768] is because when creating the embeddings, I used unsqueeze to change the dimension
# So better to convert it back to original shape for easier used when training a classifier
print(train_embeddings["embeddings"][0].shape)
print(train_wav_embeddings["embeddings"][0].shape)

In [None]:
# So, in the for loop, 
# I'll add the `[0]` to get the embeddings only (remove the unused dimension)
train_embeddings["embeddings"][0][0].shape

## Train 

In [None]:
# List of data
train_dataset = []

# i is the index of arrays
for i in range(len(train_wav_embeddings["fileID"])):
    # To check the fileIDs are correct
    if train_wav_embeddings["fileID"][i] != train_embeddings["fileID"][i]:
        print(i, train_wav_embeddings["fileID"][i], train_embeddings["fileID"][i])
    
    # To check the labels are correct
    if train_wav_embeddings["labels"][i] != train_embeddings["labels"][i]:
        print(i, train_wav_embeddings["labels"][i], train_embeddings["labels"][i])
    
    # Using [i] to get the i-th datapoint
    # Use "dict" to store data object for easier use when training a classifier
    train_dataset.append({
        "fileID": train_embeddings["fileID"][i], 
        "label": train_embeddings["labels"][i], 
        "wav_embeddings": train_wav_embeddings["embeddings"][i][0],
        "text_embeddings": train_embeddings["embeddings"][i][0],
    })

In [None]:
train_dataset[0]

In [None]:
len(train_dataset)

## Dev

In [None]:
# List of data
dev_dataset = []

# i is the index of arrays
for i in range(len(dev_wav_embeddings["fileID"])):
    # To check the fileIDs are correct
    if dev_wav_embeddings["fileID"][i] != dev_embeddings["fileID"][i]:
        print(i, dev_wav_embeddings["fileID"][i], dev_embeddings["fileID"][i])
    
    # To check the labels are correct
    if dev_wav_embeddings["labels"][i] != dev_embeddings["labels"][i]:
        print(i, dev_wav_embeddings["labels"][i], dev_embeddings["labels"][i])
    
    # Using [i] to get the i-th datapoint
    # Use "dict" to store data object for easier use when training a classifier
    dev_dataset.append({
        "fileID": dev_embeddings["fileID"][i], 
        "label": dev_embeddings["labels"][i], 
        "wav_embeddings": dev_wav_embeddings["embeddings"][i][0],
        "text_embeddings": dev_embeddings["embeddings"][i][0],
    })

In [None]:
len(dev_dataset)

## Test 

In [None]:
# List of data
test_dataset = []

# i is the index of arrays
for i in range(len(test_wav_embeddings["fileID"])):
    # To check the fileIDs are correct
    if test_wav_embeddings["fileID"][i] != test_embeddings["fileID"][i]:
        print(i, test_wav_embeddings["fileID"][i], test_embeddings["fileID"][i])
    
    # To check the labels are correct
    if test_wav_embeddings["labels"][i] != test_embeddings["labels"][i]:
        print(i, test_wav_embeddings["labels"][i], test_embeddings["labels"][i])
    
    # Using [i] to get the i-th datapoint
    # Use "dict" to store data object for easier use when training a classifier
    test_dataset.append({
        "fileID": test_embeddings["fileID"][i], 
        "label": test_embeddings["labels"][i], 
        "wav_embeddings": test_wav_embeddings["embeddings"][i][0],
        "text_embeddings": test_embeddings["embeddings"][i][0],
    })

In [None]:
len(test_dataset)

## Encapsulating all data 

In [None]:
class DataModule(pl.LightningDataModule):
    
    def __init__(self, train_dataset, test_dataset, dev_dataset, batch_size=16):
        super().__init__()
        self.batch_size = batch_size
        self.train = train_dataset
        self.dev = dev_dataset
        self.test = test_dataset

    def train_dataloader(self):
        return DataLoader(
          self.train,
          batch_size=self.batch_size,
          shuffle=True,
          num_workers=0
        )

    def val_dataloader(self):
        return DataLoader(
          self.dev,
          batch_size=self.batch_size,
          num_workers=0
        )

    def test_dataloader(self):
        return DataLoader(
          self.test,
          batch_size=self.batch_size,
          num_workers=0
        )

#### Instance for class DataModule

In [None]:
N_EPOCHS = 100
BATCH_SIZE = 16

data_module = DataModule(train_dataset, test_dataset, dev_dataset, batch_size=BATCH_SIZE)

#### Modelling Prep 

In [None]:
#train_dataset[0]["text_embeddings"].size()
train_dataset[0]["text_embeddings"].size()

In [None]:
#train_dataset[0]["wav_embeddings"].size()
train_dataset[0]["wav_embeddings"].size()

In [None]:
#testtt = torch.cat((train_dataset[0]["text_embeddings"], train_dataset[0]["wav_embeddings"]))
testt = torch.cat((train_dataset[0]["text_embeddings"], train_dataset[0]["wav_embeddings"]))

In [None]:
testt.size()

## Modelling 

This is a RoBERTa model wherein multi-modal embeddings are fitted for training into the linear classification layer.

In [None]:
class Tagger(pl.LightningModule):
    
    def __init__(self, n_classes: int, n_training_steps=None, n_warmup_steps=None):
        super().__init__()
        
        # pooler output config
        #self.linear_1 = nn.Linear(1536, 1236)
        #self.linear_2 = nn.Linear(1236, 936)
        #self.linear_3 = nn.Linear(936, 636)
        #self.linear_4 = nn.Linear(636, 256)
        
        # last four hidden layers config
        self.linear_1 = nn.Linear(3840, 2928)
        self.linear_2 = nn.Linear(2928, 2016)
        self.linear_3 = nn.Linear(2016, 1104)
        self.linear_4 = nn.Linear(1104, 256)
    
        self.classifier = nn.Linear(256, n_classes)
        self.n_training_steps = n_training_steps
        self.n_warmup_steps = n_warmup_steps
        self.criterion = nn.CrossEntropyLoss()
        

    def forward(self, text_embeddings, wav_embeddings, labels=None):

        output = torch.cat((text_embeddings, wav_embeddings), 1)
        #print(output)
        #print(output.shape)
        output = self.linear_1(output)
        output = self.linear_2(output)
        output = self.linear_3(output)
        output = self.linear_4(output)
        #output = self.linear_5(output)
        output = self.classifier(output)
        output = torch.softmax(output, dim=1)
        loss = 0
        
        #Print to debug
        #print(output)
        #print(labels)
        
        if labels is not None:
            labels = labels.flatten() ##
            loss = self.criterion(output, labels)
        return loss, output

    def training_step(self, batch, batch_idx):

        text_embeddings = batch["text_embeddings"]
        audio_embeddings = batch["wav_embeddings"]
        labels = batch["label"]
        loss, outputs = self(text_embeddings, audio_embeddings, labels)
        self.log("train_loss", loss, prog_bar=True, logger=True)
        return {"loss": loss, "predictions": outputs, "labels": labels}

    def validation_step(self, batch, batch_idx):
        
        text_embeddings = batch["text_embeddings"]
        audio_embeddings = batch["wav_embeddings"]
        labels = batch["label"]
        loss, outputs = self(text_embeddings, audio_embeddings, labels)
        self.log("val_loss", loss, prog_bar=True, logger=True)
        return loss

    def test_step(self, batch, batch_idx):

        text_embeddings = batch["text_embeddings"]
        audio_embeddings = batch["wav_embeddings"]
        labels = batch["label"]
        loss, outputs = self(text_embeddings, audio_embeddings, labels)
        self.log("test_loss", loss, prog_bar=True, logger=True)
        return loss

    def training_epoch_end(self, outputs):

        labels = []
        predictions = []

        for output in outputs:
            for out_labels in output["labels"].detach().cpu():
                labels.append(out_labels)
            for out_predictions in output["predictions"].detach().cpu():
                predictions.append(out_predictions)
        
        labels = torch.stack(labels).int()
        predictions = torch.stack(predictions)
        pred = torch.argmax(predictions, dim=1)
        
        train_acc = accuracy(pred, labels, num_classes=3)
        #print("Label:", labels)
        #print("Prediction:", pred)
        print("Training Accuracy:", train_acc)
        
        label = labels.flatten()
        auroc = AUROC(num_classes=3)
        auroc = auroc(predictions, label)
        print("AUROC:", auroc)

    def configure_optimizers(self): #configuring the optimizers

        optimizer = AdamW(self.parameters(), lr=2e-5)

        scheduler = get_linear_schedule_with_warmup(
          optimizer,
          num_warmup_steps=self.n_warmup_steps,
          num_training_steps=self.n_training_steps

        )

        return dict(
            optimizer=optimizer,
            lr_scheduler=dict(
                scheduler=scheduler,
                interval='step'
            )

        )

In [None]:
steps_per_epoch=len(train_dataset) // BATCH_SIZE
total_training_steps = steps_per_epoch * N_EPOCHS

In [None]:
#1/5 of the training steps as warm-up
warmup_steps = total_training_steps // 5
warmup_steps, total_training_steps

#### Instance for modelling class Tagger 

In [None]:
model = Tagger(
  n_classes=3,
  n_warmup_steps=warmup_steps,
  n_training_steps=total_training_steps
)

## Training 

In [None]:
#Checkpointing that saves the best model (based on validation loss)
checkpoint_callback = ModelCheckpoint(
    dirpath="C:/Users/id301281/NLP/NLU/MELD/Fusion",
    filename="best-checkpoint",
    save_top_k=1,
    verbose=True,
    monitor="val_loss",
    mode="min"
    )

In [None]:
#And early stopping triggers when the loss hasn’t improved for the last 30 epochs
early_stopping_callback = EarlyStopping(monitor='val_loss', patience=30)

In [None]:
#starting training
trainer = pl.Trainer(
    callbacks=[checkpoint_callback, early_stopping_callback],
    max_epochs=N_EPOCHS,
    gpus=1,
    progress_bar_refresh_rate=30
)

In [None]:
torch.cuda.is_available()

In [None]:
trainer.fit(model, data_module)

### Storing preds 

In [None]:
trained_model = Tagger.load_from_checkpoint(
  trainer.checkpoint_callback.best_model_path,
  n_classes=3
)

trained_model.eval()
trained_model.freeze()

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
trained_model = trained_model.to(device)

test_dataset = test_dataset

test_predictions = { "fileID_roberta": [], "predicted_roberta": [], "actual_roberta": []}


for item in tqdm(test_dataset):
    _, prediction = trained_model(
        item["text_embeddings"].unsqueeze(dim=0).to(device),
        item["wav_embeddings"].unsqueeze(dim=0).to(device)
    )
    
    pred2 = prediction.flatten()
    pred3 = torch.argmax(pred2).squeeze().tolist()
    test_predictions["predicted_roberta"].append(pred3)
    test_predictions["actual_roberta"].append(item["label"])
    test_predictions["fileID_roberta"].append(item["fileID"])

### Evaluation

In [None]:
trainer.test()

In [None]:
preds = test_predictions["predicted_roberta"]
labels = test_predictions["actual_roberta"]

In [None]:
accuracy_score(preds, labels)

In [None]:
encode_map = {'negative': 0,'neutral': 1,'positive': 2}

In [None]:
print(classification_report(labels, preds, target_names=encode_map))

In [None]:
def show_confusion_matrix(confusion_matrix):
  hmap = sns.heatmap(confusion_matrix, annot=True, fmt="d", cmap= "YlGnBu")
  hmap.yaxis.set_ticklabels(hmap.yaxis.get_ticklabels(), rotation=0, ha='right')
  hmap.xaxis.set_ticklabels(hmap.xaxis.get_ticklabels(), rotation=30, ha='right')
  plt.ylabel('True sentiment')
  plt.xlabel('Predicted sentiment');

cm = confusion_matrix(labels, preds)
df_cm = pd.DataFrame(cm, index=encode_map, columns=encode_map)
show_confusion_matrix(df_cm)