## M2H2 dataset
1. Raw text data- </br>
(a) Train(train.tsv) - gdrive id: 1Z-Mt2kMtA6ZJ5YA704SRqFtCZsNbPMMR </br>
(b) Test(test.tsv) - gdrive id: 1TWO58qHYYEVcUeaEltVv2xVH1HLVsUnK</br>

2. Sentence-BERT features(per utterance 512 dims) - </br>
(a) Train(train_utterance_embeddings_sentenceBERT.txt) - gdrive id: 1decZ9lPDjxlKLJZfpvKW8vUynz6UA01n</br>
(b) Test(test_utterance_embeddings_sentenceBERT.txt) - gdrive id: 1--0jt4tgOGRfajVMYBHuoKKDw-aZjgUW

3. FastText features(per utterance 300 dims) - </br>
(a) Train(train_utterance_embeddings_FastText.txt) - gdrive id: 1CP9Q83PQ1eD6D3QpTxQQWhdmpZ4Bb70r </br>
(b) Test(test_utterance_embeddings_FastText.txt) - gdrive id: 11-89-yI6uwslPACgqsTNMxjTOV_LzNK1

4. 3D CNN(ResNext101) features(per utterance 2048 dims) - </br>
(a) Train(train_utterance_features_resnext101.txt) - gdrive id: 1J0cc2mf2n03zAGwbLZ9TO1SEHtWAs9Rb </br>
(b) Test(test_utterance_features_resnext101.txt) - gdrive id: 191WO9nVckQnjbiAy3NROZXQOId5AX_w9

5. openSmile features(per utterance 65 dims) - </br>
(a) Train(train_features_opensmile_avg.txt) - gdrive id: 
1-2isFu4OFEpg4ftrcdpOeHNrRCJ9OLPo </br>
(b) Test(test_features_opensmile_avg.txt) - gdrive id: 
1-GlUVqGL4oLtYzfz7Ik1HuzMiGLAGUL3

In [None]:
# importing libraries
import torch
import numpy as np
import pandas as pd
import seaborn as sns
from pylab import rcParams
import matplotlib.pyplot as plt
from matplotlib import rc
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from collections import defaultdict
from textwrap import wrap
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
from torch.autograd import Variable
import os
%matplotlib inline
%config InlineBackend.figure_format='retina'

# set parameters
sns.set(style='whitegrid', palette='muted', font_scale=1.2)
HAPPY_COLORS_PALETTE = ["#01BEFE", "#FFDD00", "#FF7D00", "#FF006D", "#ADFF02", "#8F00FF"]
sns.set_palette(sns.color_palette(HAPPY_COLORS_PALETTE))

#define vars
rcParams['figure.figsize'] = 12, 8
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
'''
'text':['FastText',300]
'text':['sentence-BERT',512]
'''
#modality_and_repr_type = {'text':['sentence-BERT',512]}
#modality_and_repr_type = {'video':['resnet101',2048]}
modality_and_repr_type = {'audio':['opensmile',65]}
#modality_and_repr_type = {'text':['sentence-BERT',512], 'video':['resnet101',2048]}
#modality_and_repr_type = {'text':['sentence-BERT',512], 'audio':['opensmile',65]}
#modality_and_repr_type = {'video':['resnet101',2048], 'audio':['opensmile',65]}
#modality_and_repr_type = {'text':['sentence-BERT',512], 'video':['resnet101',2048], 'audio':['opensmile',65]}

In [None]:
# creating dataset
class M2H2_Dataset(Dataset):
  def __init__(self, df, modality_and_repr_type):
    features = np.array([])
    # load the features depending on the modality 
    if 'text' in modality_and_repr_type.keys():
      features = np.array(list(df['text']))
    if 'video' in modality_and_repr_type.keys():
      if len(features)==0: features = np.array(list(df['video']))
      else: features = np.hstack((features,np.array(list(df['video']))))
    if 'audio' in modality_and_repr_type.keys():
      if len(features)==0: features = np.array(list(df['audio']))
      else: features = np.hstack((features,np.array(list(df['audio']))))

    self.len = features.shape[0] # number of data points in the dataset
    self.x_data = torch.from_numpy(features)
    self.y_data = torch.from_numpy(np.array(list(df['Label'])))
    
  def __len__(self):
    # data length
    return self.len
  def __getitem__(self, index):
     # return one item based on the index value
     return self.x_data[index],self.y_data[index]

In [None]:
# declaring train and test dataframes
df_train = pd.DataFrame()
df_test = pd.DataFrame()

if 'text' in modality_and_repr_type: 
  !gdown --id 1Z-Mt2kMtA6ZJ5YA704SRqFtCZsNbPMMR -q # train raw
  !gdown --id 1TWO58qHYYEVcUeaEltVv2xVH1HLVsUnK -q # test raw
  df_train['Label'] = np.array(pd.read_csv('train.tsv',sep='\t')['Label'])
  df_test['Label'] = np.array(pd.read_csv('test.tsv',sep='\t')['Label'])
  print("Downloaded the raw text M2H2 data !")

  # download feature representations
  if modality_and_repr_type['text'][0]=='sentence-BERT':
    !gdown --id 1decZ9lPDjxlKLJZfpvKW8vUynz6UA01n -q # train
    !gdown --id 1--0jt4tgOGRfajVMYBHuoKKDw-aZjgUW -q # test
    df_train['text'] = pd.DataFrame({'text':np.loadtxt('train_utterance_embeddings_sentenceBERT.txt').tolist()})
    df_test['text'] = pd.DataFrame({'text':np.loadtxt('test_utterance_embeddings_sentenceBERT.txt').tolist()})
    print("Downloaded the sentence-BERT embeddings !")
  elif modality_and_repr_type['text'][0]=='FastText':
    !gdown --id 1CP9Q83PQ1eD6D3QpTxQQWhdmpZ4Bb70r -q # train
    !gdown --id 11-89-yI6uwslPACgqsTNMxjTOV_LzNK1 -q # test
    df_train['text'] = pd.DataFrame({'text':np.loadtxt('train_utterance_embeddings_FastText.txt').tolist()})
    df_test['text'] = pd.DataFrame({'text':np.loadtxt('test_utterance_embeddings_FastText.txt').tolist()})
    print("Downloaded the FastText embeddings !")

if 'video' in modality_and_repr_type:
  !gdown --id 1Z-Mt2kMtA6ZJ5YA704SRqFtCZsNbPMMR -q # train raw
  !gdown --id 1TWO58qHYYEVcUeaEltVv2xVH1HLVsUnK -q # test raw
  df_train['Label'] = np.array(pd.read_csv('train.tsv',sep='\t')['Label'])
  df_test['Label'] = np.array(pd.read_csv('test.tsv',sep='\t')['Label'])
  print("Downloaded the raw text M2H2 data !")

  !gdown --id 1J0cc2mf2n03zAGwbLZ9TO1SEHtWAs9Rb -q # train
  !gdown --id 191WO9nVckQnjbiAy3NROZXQOId5AX_w9 -q # test
  df_train['video'] = pd.DataFrame({'video':np.loadtxt('train_utterance_features_resnext101.txt').tolist()})
  df_test['video'] = pd.DataFrame({'video':np.loadtxt('test_utterance_features_resnext101.txt').tolist()})
  print("Downloaded the resnect101 features !")

if 'audio' in modality_and_repr_type:
  !gdown --id 1Z-Mt2kMtA6ZJ5YA704SRqFtCZsNbPMMR -q # train raw
  !gdown --id 1TWO58qHYYEVcUeaEltVv2xVH1HLVsUnK -q # test raw
  df_train['Label'] = np.array(pd.read_csv('train.tsv',sep='\t')['Label'])
  df_test['Label'] = np.array(pd.read_csv('test.tsv',sep='\t')['Label'])
  print("Downloaded the raw text M2H2 data !")

  !gdown --id 1-2isFu4OFEpg4ftrcdpOeHNrRCJ9OLPo -q # train
  !gdown --id 1-GlUVqGL4oLtYzfz7Ik1HuzMiGLAGUL3 -q # test
  df_train['audio'] = pd.DataFrame({'audio':np.loadtxt('train_features_opensmile_avg.txt').tolist()})
  df_test['audio'] = pd.DataFrame({'audio':np.loadtxt('test_features_opensmile_avg.txt').tolist()})
  print("Downloaded the opensmile averaged features !")

''' Vanilla data split without class balancing'''
# train-test split
df_train, df_val = train_test_split(
  df_train,
  test_size=0.1,
  random_state=RANDOM_SEED
)
print("Number of utterances in train : ", len(df_train))
print("Number of utterances in val : ", len(df_val))

In [None]:
# creating data loader
def create_data_loader(df, batch_size):
  ds = M2H2_Dataset(df,modality_and_repr_type)
  return DataLoader(ds,batch_size=batch_size,num_workers=2)

In [None]:
BATCH_SIZE = 4
train_data_loader = create_data_loader(df_train, BATCH_SIZE)
val_data_loader = create_data_loader(df_val, BATCH_SIZE)
test_data_loader = create_data_loader(df_test, BATCH_SIZE)

In [None]:
class HumorClassifier(nn.Module):
  def __init__(self, n_classes, feature_len):
    super(HumorClassifier, self).__init__()
    self.out = nn.Linear(feature_len, n_classes)
  def forward(self, x):
    return self.out(x)

In [None]:
# get the total feature length
feature_len = 0
for modality in modality_and_repr_type.keys(): feature_len+=modality_and_repr_type[modality][1]

# define the NN
model = HumorClassifier(2,feature_len=feature_len) # as we have just two classes: humorous and not humorous 
model = model.to(device)

In [None]:
# training
EPOCHS = 20
optimizer = torch.optim.Adam(model.parameters(), lr=5e-3)
total_steps = len(train_data_loader) * EPOCHS
loss_fn = nn.CrossEntropyLoss().to(device)

In [None]:
def train_epoch(
  model,
  data_loader,
  loss_fn,
  optimizer,
  device,
  n_examples
):
  model = model.train()
  losses = []
  correct_predictions = 0
  for i,data in enumerate(data_loader,0):
    inputs, targets = data
    inputs, targets = inputs.float(), targets.long()
    inputs, targets = Variable(inputs), Variable(targets)
    outputs = model(inputs)
    _, preds = torch.max(outputs, dim=1)
    loss = loss_fn(outputs, targets)
    correct_predictions += torch.sum(preds == targets)
    losses.append(loss.item())
    loss.backward()
    #nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
    optimizer.step()
    optimizer.zero_grad()
  return correct_predictions.double() / n_examples, np.mean(losses)

In [None]:
def eval_model(model, data_loader, loss_fn, device, n_examples):
  model = model.eval()
  losses = []
  correct_predictions = 0
  with torch.no_grad():
    for i,data in enumerate(data_loader,0):
      inputs, targets = data
      inputs, targets = inputs.to(device), targets.to(device)
      inputs, targets = inputs.float(), targets.long()
      inputs, targets = Variable(inputs), Variable(targets)
      outputs = model(inputs)
      _, preds = torch.max(outputs, dim=1)
      loss = loss_fn(outputs, targets)
      correct_predictions += torch.sum(preds == targets)
      losses.append(loss.item())
  return correct_predictions.double() / n_examples, np.mean(losses)


In [None]:
history = defaultdict(list)
best_accuracy = 0
best_model = model
for epoch in range(EPOCHS):
  print(f'Epoch {epoch + 1}/{EPOCHS}')
  print('-' * 10)
  train_acc, train_loss = train_epoch(
    model,
    train_data_loader,
    loss_fn,
    optimizer,
    device,
    len(df_train)
  )
  print(f'Train loss {train_loss} accuracy {train_acc}')
  val_acc, val_loss = eval_model(
    model,
    val_data_loader,
    loss_fn,
    device,
    len(df_val)
  )

  print(f'Val   loss {val_loss} accuracy {val_acc}')
  print()
  history['train_acc'].append(train_acc)
  history['train_loss'].append(train_loss)
  history['val_acc'].append(val_acc)
  history['val_loss'].append(val_loss)
  if val_acc > best_accuracy:
    best_model = model
    torch.save(model.state_dict(), 'best_model_state.bin')
    best_accuracy = val_acc

In [None]:
plt.plot(history['train_acc'], label='train accuracy')
plt.plot(history['val_acc'], label='validation accuracy')
plt.title('Training history')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend()
plt.ylim([0, 1]);

In [None]:
def get_predictions(model, data_loader):
  model = model.eval()
  review_texts = []
  predictions = []
  prediction_probs = []
  real_values = []
  with torch.no_grad():
    for i,data in enumerate(data_loader,0):
      inputs, targets = data
      inputs, targets = inputs.to(device), targets.to(device)
      inputs, targets = inputs.float(), targets.long()
      inputs, targets = Variable(inputs), Variable(targets)
      outputs = model(inputs)
      _, preds = torch.max(outputs, dim=1)
      predictions.extend(preds)
      prediction_probs.extend(outputs)
      real_values.extend(targets)
  predictions = torch.stack(predictions).cpu()
  prediction_probs = torch.stack(prediction_probs).cpu()
  real_values = torch.stack(real_values).cpu()
  return review_texts, predictions, prediction_probs, real_values


y_review_texts, y_pred, y_pred_probs, y_test = get_predictions(
  best_model,
  test_data_loader
)

In [None]:
class_names = ['not humorous', 'humorous']
print(classification_report(y_test, y_pred, target_names=class_names))

In [None]:
print(confusion_matrix(y_test, y_pred))