In [51]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

Mounted at /content/gdrive


In [52]:
!pip install allosaurus



In [53]:
#These libraries help to interact with the operating system and the runtime environment respectively
import os
import sys
import pickle

#Model/Training related libraries
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.nn.utils.rnn import pad_sequence

#Dataloader libraries
from torch.utils.data import DataLoader, Dataset

# Transforms and datasets
import torchvision.transforms as transforms
import torchvision.datasets as dset

import time
import matplotlib.pyplot as plt
from PIL import Image
import pandas as pd
from tqdm import tqdm
import random

# Allosaurus
from allosaurus.audio import read_audio
from allosaurus.app import read_recognizer
from allosaurus.am.utils import *


# Cross validation
from sklearn.model_selection import KFold

In [54]:
recognizer = read_recognizer()

In [55]:
df = pd.read_csv("/content/gdrive/MyDrive/iemocap_full_dataset.csv")
df.shape

(10039, 7)

In [56]:
df = df[df.emotion != 'xxx']  # only keep data that has emotion label
# only keep 'neu', 'hap', 'sad', 'ang' labels
df = df.drop(df[~ ((df.emotion == 'neu') | (df.emotion == 'hap') | (df.emotion == 'sad') | (df.emotion == 'ang'))].index)

df_unedit = df.copy()
df_unedit["path"] = df_unedit["path"].apply(lambda x : x.split('/')[-1])
all_files = list(df_unedit.path)
file_to_emotion = dict(zip(df_unedit.path, df_unedit.emotion))

all_full_files = list(df.path)
# print(df)
# print(df_unedit)
# print(len(file_to_emotion))
# print(file_to_emotion)
# print(all_full_files)

In [57]:
from collections import Counter

# get unique emotions
# emotion_to_label = {'neu': 0, 'fru': 1, 'sad': 2, 'sur': 3, 'ang': 4, 'hap': 5, 'exc': 6, 'fea': 7, 'dis': 8, 'oth': 9}
emotion_to_label = {'neu': 0, 'hap': 1, 'sad': 2, 'ang': 3}
label_to_emotion = {v: k for k, v in emotion_to_label.items()}
print(emotion_to_label)
print(label_to_emotion)

# counter number of class instances
emotion_instances_list = [v for v in file_to_emotion.values()]
counter = Counter(emotion_instances_list)
print(counter)

{'neu': 0, 'hap': 1, 'sad': 2, 'ang': 3}
{0: 'neu', 1: 'hap', 2: 'sad', 3: 'ang'}
Counter({'neu': 1708, 'ang': 1103, 'sad': 1084, 'hap': 595})


In [None]:
file_to_label = {k: emotion_to_label[v] for k, v in file_to_emotion.items()}
print(file_to_label)

In [59]:
class MyDataset(Dataset):
    def __init__(self, file_list, target_list):
        
        self.file_list = file_list
        self.target_list = target_list
        self.num_classes = len(list(set(target_list)))

        self.x = file_list
        self.y = target_list

    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, index):
        filepath = self.file_list[index]
        x = torch.tensor(recognizer.pm.compute(read_audio(filepath)))
        x = x.detach()
        x_len = torch.tensor(np.array([x.shape[0]], dtype=np.int32))
        x_len = x_len.detach()
        y = torch.Tensor([self.target_list[index]])
        return x, x_len, y

In [None]:
!tar -xvf  "/content/gdrive/MyDrive/IEMOCAP_full_release_withoutVideos.tar.gz" -C "/content/data"

In [60]:
# collate function
def pad_collate(batch):

    # batch looks like [(x0, xlen0, y0), (x4, xlen4, y4), (x2, xlen2, y2)... ]
    feats = [sample[0] for sample in batch]
    feat_lens = [sample[1] for sample in batch]
    target_list = torch.Tensor([sample[2] for sample in batch])

    feats = pad_sequence(feats, batch_first=True, padding_value=0) # batch, features, len
    feat_lens = pad_sequence(feat_lens, batch_first=True, padding_value=0).squeeze()
    idx = torch.argsort(feat_lens, descending=True) # sorting the input in descending order as required by the lstms in AM.

    targets = target_list[idx]
    tensor_batch_feat, tensor_batch_feat_len = move_to_tensor([feats[idx], feat_lens[idx]], device_id=-1) # converting to the required tensors

    # Features
    output_tensor, input_lengths = recognizer.am(tensor_batch_feat, tensor_batch_feat_len, return_lstm=True)# output_shape: [len,batch,features]
    output_tensor = output_tensor.permute(1,2,0)
    output_tensor = output_tensor.detach()
    input_lengths = input_lengths.detach()
    
    return output_tensor, input_lengths, targets

In [61]:
all_file_paths = [os.path.join("/content", "data", "IEMOCAP_full_release", file_path) for file_path in all_full_files]
total_instances = len(all_file_paths)

In [62]:
num_train = round(0.8 * total_instances)
num_test_all = total_instances - num_train
num_val = round(0.5 * num_test_all)
num_test = num_test_all - num_val

print("number training instances:", str(num_train))
print("number validation instances:", str(num_val))
print("number test instances:", str(num_test))
assert(num_train + num_val + num_test == total_instances)

number training instances: 3592
number validation instances: 449
number test instances: 449


In [63]:
# shuffle data
import random
random.seed(2021)

shuffled_data_paths = random.sample(all_file_paths, k=total_instances)
train_list_paths = shuffled_data_paths[:num_train]
testall_list_paths = shuffled_data_paths[num_train:]
val_list_paths = testall_list_paths[:num_val]
test_list_paths = testall_list_paths[num_test:]

assert(len(train_list_paths) + len(val_list_paths) + len(test_list_paths) == total_instances)

# # train, val, test variables:
# train_list_paths
# val_list_paths
# test_list_paths

In [64]:
# get corresponding labels for data
train_list_labels = [file_to_label[filepath.split('/')[-1]] for filepath in train_list_paths]
val_list_labels = [file_to_label[filepath.split('/')[-1]] for filepath in val_list_paths]
test_list_labels = [file_to_label[filepath.split('/')[-1]] for filepath in test_list_paths]

assert(len(train_list_labels) == len(train_list_paths))
assert(len(val_list_labels) == len(val_list_paths))
assert(len(test_list_labels) == len(test_list_paths))

In [65]:
# train dataloader
train_dset = MyDataset(train_list_paths, train_list_labels)
train_args = dict(shuffle=True, batch_size=64, num_workers=4, collate_fn=pad_collate, drop_last=True)  # change to num_workers=4 on diff platform
train_loader = DataLoader(train_dset, **train_args)

In [66]:
# val dataloader
val_dset = MyDataset(val_list_paths, val_list_labels)
val_args = dict(shuffle=False, batch_size=64, num_workers=4, collate_fn=pad_collate, drop_last=True)
val_loader = DataLoader(val_dset, **val_args)

In [67]:
def get_k_folder(k, i):
  n = num_train+num_val
  train_val_list_paths = shuffled_data_paths[:n]
  fold_size = n // k

  train_list_paths.append(train_val_list_paths[i*fold_size+fold_size:n])
  val_list_paths.append(train_val_list_paths[i*fold_size:min(i*fold_size+fold_size, n)])

  return train_list_paths, val_list_paths

In [68]:
#shuffled paths

complete_paths=train_list_paths+val_list_paths
complete_labels= train_list_labels+val_list_labels
complete_dataset=MyDataset(complete_paths,complete_labels)


complete_paths_lst=([i for i in complete_paths])
complete_labels_lst=([i for i in complete_labels])

In [69]:
def kfoldSKLearn(kValue, currentFold, complete_dataset):
  # testing k-fold function
  k_folds = kValue
  # Define the K-fold Cross Validator
  kfold = KFold(n_splits=k_folds, shuffle=False)
  train_paths=[]
  val_paths=[]
  train_labels=[]
  val_labels=[]
  # print("K FOLD FUNCTION ACCESSED")
  for fold, (train_ids, val_ids) in enumerate(kfold.split(complete_dataset)):
      
      train_paths.append([complete_paths_lst[i] for i in train_ids ])
      val_paths.append([complete_paths_lst[i] for i in val_ids ])
      
      train_labels.append([complete_labels_lst[i] for i in train_ids ])
      val_labels.append([complete_labels_lst[i] for i in val_ids ])
  # print(val_paths[currentFold])
  # print(train_paths[currentFold])
  train_dset = MyDataset(train_paths[currentFold], train_labels[currentFold])
  train_args = dict(shuffle=True, batch_size=64, num_workers=2, collate_fn=pad_collate, drop_last=True)  # change to num_workers=4 on diff platform
  train_loader = DataLoader(train_dset, **train_args)

  val_dset = MyDataset(val_paths[currentFold], val_labels[currentFold])
  val_args = dict(shuffle=False, batch_size=64, num_workers=2, collate_fn=pad_collate, drop_last=True)
  val_loader = DataLoader(val_dset, **val_args)

  return train_loader,val_loader

In [70]:
class CNNModel(nn.Module):
    def __init__(self, in_channels =640, out_channels = 256, layers=4, label_size=4):
        super().__init__()

        self.layers = layers
        kernel = [3,5,7,9]
        dil = [1,2,3,4]
        pad = []
        for i in range(4):
          out = int(kernel[i]/2) * (dil[i])
          pad.append(out)
        
        if layers >=1:
          self.layer1 = nn.Sequential(
                            nn.Conv1d(in_channels, out_channels, kernel_size=int(kernel[0]), stride=1, padding=int(pad[0]), dilation=int(dil[0]), bias=False),
                            nn.BatchNorm1d(out_channels),
                            nn.ReLU(),
                            nn.Dropout(p=0.2))
    

        if layers >=2:
          self.layer2 = nn.Sequential(
                            nn.Conv1d(out_channels, out_channels, kernel_size=kernel[1], stride=1, padding=pad[1], dilation=dil[1], bias=False),
                            nn.BatchNorm1d(out_channels),
                            nn.ReLU(),
                            nn.Dropout(p=0.4))

        if layers >=3:
          self.layer3 = nn.Sequential(
                            nn.Conv1d(out_channels, out_channels, kernel_size=kernel[2], stride=1, padding=pad[2], dilation=dil[2], bias=False),
                            nn.BatchNorm1d(out_channels),
                            nn.ReLU(),
                            nn.Dropout(p=0.4))

        if layers >=4:
          self.layer4 = nn.Sequential(
                            nn.Conv1d(out_channels, out_channels // 4, kernel_size=kernel[3], stride=1, padding=pad[3], dilation=dil[3], bias=False),
                            nn.BatchNorm1d(out_channels // 4),
                            nn.ReLU(),
                            nn.Dropout(p=0.4))
            
        self.avg_pool  = nn.Sequential(nn.AdaptiveAvgPool1d(4))

        self.last = nn.Sequential(nn.Dropout(p=0.5),nn.Flatten())

        if layers<=3: 
          self.linear = nn.Linear(in_features = out_channels * 4, out_features = label_size)
        else: 
          self.linear = nn.Linear(in_features = out_channels//4 * 4, out_features = label_size)


    def forward(self, input, lengths):
      
      out = self.layer1(input)
      if self.layers >=2:
        out = self.layer2(out)
      if self.layers >=3:
        out = self.layer3(out)
      if self.layers >=4:
        out = self.layer4(out)

      out = self.avg_pool(out)

      out = self.last(out)

      logits = self.linear(out)
      return logits

## Training

In [71]:
def train_model(train_loader, model, opt, criterion, device):

    loss_accum = 0.0
    batch_cnt = 0

    acc_cnt = 0     #count correct predictions
    err_cnt = 0     #count incorrect predictions

    model.train()
    start_time = time.time()
    for batch, (x, lengths, y) in enumerate(train_loader):
        x = x.to(device)
        #lengths = lengths.to(device)
        y = y.long().to(device)
        opt.zero_grad()

        # print(x.shape)
        # print(y.shape)

        logits = model(x, lengths)

        loss = criterion(logits, y)
        loss_score = loss.cpu().item()

        loss_accum += loss_score
        batch_cnt += 1
        loss.backward()
        opt.step()

        #model outputs
        out_val, out_indices = torch.max(logits, dim=1)
        tar_indices = y

        for i in range(len(out_indices)):
            if out_indices[i] == tar_indices[i]:
                acc_cnt += 1
            else:
                err_cnt += 1
                     
    training_accuracy =  acc_cnt/(err_cnt+acc_cnt) 
    training_loss = loss_accum / batch_cnt
        
    return model, training_accuracy, training_loss


def test_model(loader, model, opt, criterion, device):
    model.eval()
    acc_cnt = 0
    err_cnt = 0

    for x, lengths, y in loader:
        
        x = x.to(device)
        y = y.long().to(device)
        
        logits = model(x, lengths)

        out_val, out_indices = torch.max(logits, dim=1)
        tar_indices = y

        for i in range(len(out_indices)):
            if out_indices[i] == tar_indices[i]:
                acc_cnt += 1
            else:
                err_cnt += 1

    current_acc = acc_cnt/(err_cnt+acc_cnt)
    
    return current_acc

## Main runner

In [72]:
cuda = torch.cuda.is_available()
device = torch.device("cuda" if cuda else "cpu")
criterion = nn.CrossEntropyLoss()
device = torch.device("cuda" if cuda else "cpu")
# print(device)

In [73]:
file1 = open("/content/gdrive/MyDrive/projectlog.txt","a+")

In [74]:
def reset_weights(m):
    if isinstance(m, (nn.Conv1d, nn.Linear, nn.BatchNorm1d)):
        m.reset_parameters()

In [75]:
# model = CNNModel(640, 256, 2, label_size=4)

In [76]:
# model.apply(reset_weights)

In [None]:
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0"

n_epochs = 10
cuda = torch.cuda.is_available()

#Define Training Grid Search
in_channels = [640]
out_channels = [64, 128, 256]
layers = [2, 3, 4]

for layer in layers:
    for in_channel in in_channels:
        for out_channel in out_channels:

            model = CNNModel(in_channel, out_channel, layer, label_size=4)        

            device = torch.device("cuda" if cuda else "cpu")
            model.to(device)
            
            print(model)

            k = 5
            
            avg_val_acc = 0
  
            for i in range(k):

              print(f'.........Running {i}th cross validation.......')

              ## Reset weights for each fold

              model.apply(reset_weights)

              opt = optim.Adam(model.parameters(), lr = 0.001, weight_decay=1e-6)
              criterion = nn.CrossEntropyLoss()
              scheduler = optim.lr_scheduler.ReduceLROnPlateau(opt, patience=2)

              ## To reload saved model

              # key = str(i) + '-' + str(layer) + '-' + str(in_channel) + '-' + str(out_channel)
              # path = '/content/gdrive/MyDrive/model/{i}.pt'.format(i=key)
              # checkpoint = torch.load(path)
              # model.load_state_dict(checkpoint['model_state_dict'])
              # opt.load_state_dict(checkpoint['optimizer_state_dict'])
              # saved_epoch = 15

              for n in range(0, n_epochs):      
                  
                  train_loader, val_loader =  kfoldSKLearn(k, i,complete_dataset)

                  model, train_acc, train_loss = train_model(train_loader, model, opt, criterion, device)

                  valid_acc = test_model(val_loader, model, opt, criterion, device)
    
                  scheduler.step(valid_acc)
            
                  print("Epoch: "+str(n)+ ", Fold: " + str(i) + ", Training Accuracy: " +str(train_acc)+ ", Training loss:"+str(train_loss)+ ", Validation accuracy:" +str(valid_acc))

                  #Logging the results of the 10th epoch 

                  key = str(n) + '-' + str(i) + '-' + str(layer) + '-' + str(in_channel) + '-' + str(out_channel) + '-' + str(valid_acc) + '\n'
                  
                  file1.write(key)
              
              # Considering the validation acc of the last epoch for each of the k folds

              avg_val_acc+=valid_acc

              p = str(i) + '-' + str(layer) + '-' + str(in_channel) + '-' + str(out_channel)

              path = '/content/gdrive/MyDrive/model/{i}.pt'.format(i=p)

              torch.save({
                      'model_state_dict': model.state_dict(),
                      'optimizer_state_dict': opt.state_dict(),
                      'scheduler_state_dict' : scheduler.state_dict(),
                      }, path) 

            avg_val_acc/=k
            print("Average Val Accuracy: " + str(avg_val_acc))  


CNNModel(
  (layer1): Sequential(
    (0): Conv1d(640, 64, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
    (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.2, inplace=False)
  )
  (layer2): Sequential(
    (0): Conv1d(64, 64, kernel_size=(5,), stride=(1,), padding=(4,), dilation=(2,), bias=False)
    (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.4, inplace=False)
  )
  (avg_pool): Sequential(
    (0): AdaptiveAvgPool1d(output_size=4)
  )
  (last): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Flatten(start_dim=1, end_dim=-1)
  )
  (linear): Linear(in_features=256, out_features=4, bias=True)
)
.........Running 0th cross validation.......


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fddcb43e710>
Traceback (most recent call last):
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fddcb43e710>
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1328, in __del__
Traceback (most recent call last):
    self._shutdown_workers()
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1328, in __del__
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
    self._shutdown_workers()
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.7/multiprocessing/process.py", line 151, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
    if w.is_alive():
  File "/usr/lib/pytho