In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [2]:
!pip install allosaurus

Collecting allosaurus
  Downloading allosaurus-1.0.2-py3-none-any.whl (52 kB)
[?25l[K     |██████▎                         | 10 kB 29.5 MB/s eta 0:00:01[K     |████████████▋                   | 20 kB 18.4 MB/s eta 0:00:01[K     |██████████████████▉             | 30 kB 15.0 MB/s eta 0:00:01[K     |█████████████████████████▏      | 40 kB 14.0 MB/s eta 0:00:01[K     |███████████████████████████████▌| 51 kB 7.0 MB/s eta 0:00:01[K     |████████████████████████████████| 52 kB 162 kB/s 
Collecting panphon
  Downloading panphon-0.19-py2.py3-none-any.whl (72 kB)
[K     |████████████████████████████████| 72 kB 544 kB/s 
[?25hCollecting unicodecsv
  Downloading unicodecsv-0.14.1.tar.gz (10 kB)
Collecting munkres
  Downloading munkres-1.1.4-py2.py3-none-any.whl (7.0 kB)
Building wheels for collected packages: unicodecsv
  Building wheel for unicodecsv (setup.py) ... [?25l[?25hdone
  Created wheel for unicodecsv: filename=unicodecsv-0.14.1-py3-none-any.whl size=10765 sha256=2b5e47e

In [3]:
#These libraries help to interact with the operating system and the runtime environment respectively
import os
import sys
import pickle

#Model/Training related libraries
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.nn.utils.rnn import pad_sequence

#Dataloader libraries
from torch.utils.data import DataLoader, Dataset

# Transforms and datasets
import torchvision.transforms as transforms
import torchvision.datasets as dset

import time
import matplotlib.pyplot as plt
from PIL import Image
import pandas as pd
from tqdm import tqdm
import random
from sklearn.model_selection import KFold
import wave
import librosa

# Allosaurus
from allosaurus.audio import read_audio
from allosaurus.app import read_recognizer
from allosaurus.am.utils import *

In [4]:
# pth = '/content/gdrive/MyDrive/18786 IDL/IDL Project/data/IEMOCAP_full_release/Session1/sentences/wav/Ses01F_impro01/Ses01F_impro01_F000.wav'
pth = 'gdrive/MyDrive/18786 IDL/IDL Project/data/AESDD/AESDD_mod/fear/f02 (1).wav'
wf = wave.open(pth)  # will throw error for AESDD files
# audio, sr = librosa.load(pth, sr=None)

In [4]:
recognizer = read_recognizer()

downloading model  latest
from:  https://github.com/xinjli/allosaurus/releases/download/v1.0/latest.tar.gz
to:    /usr/local/lib/python3.7/dist-packages/allosaurus/pretrained
please wait...


# IEMOCAP

In [None]:
df = pd.read_csv("/content/gdrive/MyDrive/iemocap_full_dataset.csv")
df.shape

(10039, 7)

In [None]:
df = df[df.emotion != 'xxx']  # only keep data that has emotion label
# only keep 'neu', 'hap', 'sad', 'ang' labels
df = df.drop(df[~ ((df.emotion == 'neu') | (df.emotion == 'hap') | (df.emotion == 'sad') | (df.emotion == 'ang'))].index)

df_unedit = df.copy()
df_unedit["path"] = df_unedit["path"].apply(lambda x : x.split('/')[-1])
all_files = list(df_unedit.path)
file_to_emotion = dict(zip(df_unedit.path, df_unedit.emotion))

all_full_files = list(df.path)
print(df)
print(df_unedit)
print(len(file_to_emotion))
print(file_to_emotion)
print(all_full_files)

In [None]:
from collections import Counter

# get unique emotions
# emotion_to_label = {'neu': 0, 'fru': 1, 'sad': 2, 'sur': 3, 'ang': 4, 'hap': 5, 'exc': 6, 'fea': 7, 'dis': 8, 'oth': 9}
emotion_to_label = {'neu': 0, 'hap': 1, 'sad': 2, 'ang': 3}
label_to_emotion = {v: k for k, v in emotion_to_label.items()}
print(emotion_to_label)
print(label_to_emotion)

# counter number of class instances
emotion_instances_list = [v for v in file_to_emotion.values()]
counter = Counter(emotion_instances_list)
print(counter)

{'neu': 0, 'hap': 1, 'sad': 2, 'ang': 3}
{0: 'neu', 1: 'hap', 2: 'sad', 3: 'ang'}
Counter({'neu': 1708, 'ang': 1103, 'sad': 1084, 'hap': 595})


In [None]:
file_to_label = {k: emotion_to_label[v] for k, v in file_to_emotion.items()}
print(file_to_label)

# AESDD PreProcessing

In [5]:
data_dir = os.path.join("gdrive", "MyDrive", "18786 IDL", "IDL Project", "data", "AESDD", "AESDD_mod")
mapping = {'happiness': 0, 'sadness': 1, 'anger': 2, 'disgust': 3, 'fear': 4}
mapping2 = {'h': 0, 's': 1, 'a': 2, 'd': 3, 'f': 4}

In [6]:
def parse_data(data_dir):
    file_paths = []  # full file paths from drive to wav file
    labels = []  # corresponding labels

    for root, directories, filenames in os.walk(data_dir):
            for filename in filenames:
                if filename.endswith('.wav') and filename[0] != '.':
                    abbrev = filename[0]
                    label = mapping2[abbrev]
                    path = os.path.join(root, filename)

                    file_paths.append(path)
                    labels.append(label)

    return file_paths, labels

In [7]:
file_paths, labels = parse_data(data_dir)

In [8]:
print(file_paths[:10])
print(labels[:10])

['gdrive/MyDrive/18786 IDL/IDL Project/data/AESDD/AESDD_mod/disgust/d07 (3).wav', 'gdrive/MyDrive/18786 IDL/IDL Project/data/AESDD/AESDD_mod/disgust/d17 (3).wav', 'gdrive/MyDrive/18786 IDL/IDL Project/data/AESDD/AESDD_mod/disgust/d03 (5)b.wav', 'gdrive/MyDrive/18786 IDL/IDL Project/data/AESDD/AESDD_mod/disgust/d11 (6).wav', 'gdrive/MyDrive/18786 IDL/IDL Project/data/AESDD/AESDD_mod/disgust/d06 (2).wav', 'gdrive/MyDrive/18786 IDL/IDL Project/data/AESDD/AESDD_mod/disgust/d20 (2).wav', 'gdrive/MyDrive/18786 IDL/IDL Project/data/AESDD/AESDD_mod/disgust/d05 (4).wav', 'gdrive/MyDrive/18786 IDL/IDL Project/data/AESDD/AESDD_mod/disgust/d04 (5).wav', 'gdrive/MyDrive/18786 IDL/IDL Project/data/AESDD/AESDD_mod/disgust/d13 (1).wav', 'gdrive/MyDrive/18786 IDL/IDL Project/data/AESDD/AESDD_mod/disgust/d16 (2).wav']
[3, 3, 3, 3, 3, 3, 3, 3, 3, 3]


In [9]:
combined = list(zip(file_paths, labels))

In [10]:
random.shuffle(combined)

In [11]:
file_paths = [pair[0] for pair in combined]
labels = [pair[1] for pair in combined] 

In [12]:
print(file_paths[:10])
print(labels[:10])

['gdrive/MyDrive/18786 IDL/IDL Project/data/AESDD/AESDD_mod/anger/a17 (4).wav', 'gdrive/MyDrive/18786 IDL/IDL Project/data/AESDD/AESDD_mod/happiness/h20 (6).wav', 'gdrive/MyDrive/18786 IDL/IDL Project/data/AESDD/AESDD_mod/disgust/d07 (1).wav', 'gdrive/MyDrive/18786 IDL/IDL Project/data/AESDD/AESDD_mod/happiness/h12 (1).wav', 'gdrive/MyDrive/18786 IDL/IDL Project/data/AESDD/AESDD_mod/disgust/d15 (5).wav', 'gdrive/MyDrive/18786 IDL/IDL Project/data/AESDD/AESDD_mod/happiness/h13 (5).wav', 'gdrive/MyDrive/18786 IDL/IDL Project/data/AESDD/AESDD_mod/happiness/h16 (1).wav', 'gdrive/MyDrive/18786 IDL/IDL Project/data/AESDD/AESDD_mod/disgust/d10 (4).wav', 'gdrive/MyDrive/18786 IDL/IDL Project/data/AESDD/AESDD_mod/happiness/h19 (3).wav', 'gdrive/MyDrive/18786 IDL/IDL Project/data/AESDD/AESDD_mod/fear/f15 (6).wav']
[2, 0, 3, 0, 3, 0, 0, 3, 0, 4]


# K fold (testing)

In [39]:
kf = KFold(n_splits=5, shuffle=False)  # already shuffled above

In [40]:
test_data = []  # reserved for test data splits

In [24]:
for i, (train_index, test_index) in enumerate(kf.split(file_paths)):
    # print("TRAIN:", train_index, "TEST:", test_index)
    train_index = list(train_index)
    test_index = list(test_index)
    xtrain, ytrain = [], []
    xtest_all, ytest_all = [], []

    for idx in train_index:
        xtrain.append(file_paths[idx])
        ytrain.append(labels[idx])

    for idx in test_index:
        xtest_all.append(file_paths[idx])
        ytest_all.append(labels[idx])

    mid = len(xtest_all) // 2
    xval = xtest_all[:mid]
    yval = ytest_all[:mid]

    xtest = xtest_all[mid:]
    ytest = ytest_all[mid:]

    test_data.append((xtest, ytest))
    
    print(xval[-10:])
    print(yval[-10:])
    
    assert(len(xtrain) + len(xval) + len(xtest) == len(file_paths))

['gdrive/MyDrive/18786 IDL/IDL Project/data/AESDD/AESDD/happiness/h09 (5).wav', 'gdrive/MyDrive/18786 IDL/IDL Project/data/AESDD/AESDD/fear/f07 (4).wav', 'gdrive/MyDrive/18786 IDL/IDL Project/data/AESDD/AESDD/happiness/h12 (3).wav', 'gdrive/MyDrive/18786 IDL/IDL Project/data/AESDD/AESDD/disgust/d12 (2).wav', 'gdrive/MyDrive/18786 IDL/IDL Project/data/AESDD/AESDD/sadness/s11 (5).wav', 'gdrive/MyDrive/18786 IDL/IDL Project/data/AESDD/AESDD/disgust/d03 (2).wav', 'gdrive/MyDrive/18786 IDL/IDL Project/data/AESDD/AESDD/sadness/s03 (4).wav', 'gdrive/MyDrive/18786 IDL/IDL Project/data/AESDD/AESDD/sadness/s02 (3).wav', 'gdrive/MyDrive/18786 IDL/IDL Project/data/AESDD/AESDD/happiness/h14 (5).wav', 'gdrive/MyDrive/18786 IDL/IDL Project/data/AESDD/AESDD/happiness/h14 (4).wav']
[0, 4, 0, 3, 1, 3, 1, 1, 0, 0]
['gdrive/MyDrive/18786 IDL/IDL Project/data/AESDD/AESDD/fear/f15 (2).wav', 'gdrive/MyDrive/18786 IDL/IDL Project/data/AESDD/AESDD/fear/f05 (1).wav', 'gdrive/MyDrive/18786 IDL/IDL Project/data/A

In [10]:
print(xtrain[400])
print(ytrain[400])

gdrive/MyDrive/18786 IDL/IDL Project/data/AESDD/AESDD/disgust/d19 (3).wav
3


# weight reset

In [13]:
def reset_weights(m):
    '''
        Try resetting model weights to avoid
        weight leakage.
    '''
    for layer in m.children():
        if hasattr(layer, 'reset_parameters'):
            print(f'Reset trainable parameters of layer = {layer}')
            layer.reset_parameters()

# Dataset

In [14]:
class MyDataset(Dataset):
    def __init__(self, file_list, target_list):
        
        self.file_list = file_list
        self.target_list = target_list
        self.num_classes = len(list(set(target_list)))

        self.x = file_list
        self.y = target_list

    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, index):
        filepath = self.file_list[index]
        x = torch.tensor(recognizer.pm.compute(read_audio(filepath)))
        x = x.detach()
        x_len = torch.tensor(np.array([x.shape[0]], dtype=np.int32))
        x_len = x_len.detach()
        y = torch.Tensor([self.target_list[index]])
        abbrev = filepath.split('/')[-1][0]
        assert(mapping2[abbrev] == self.target_list[index])
        return x, x_len, y

In [15]:
# collate function
def pad_collate(batch):
    # print("inside collate")
    # batch looks like [(x0, xlen0, y0), (x4, xlen4, y4), (x2, xlen2, y2)... ]
    feats = [sample[0] for sample in batch]
    feat_lens = [sample[1] for sample in batch]
    target_list = torch.Tensor([sample[2] for sample in batch])

    feats = pad_sequence(feats, batch_first=True, padding_value=0) # batch, features, len
    feat_lens = pad_sequence(feat_lens, batch_first=True, padding_value=0).squeeze()
    idx = torch.argsort(feat_lens, descending=True) # sorting the input in descending order as required by the lstms in AM.

    # reorder
    # tensor_batch_feat = feats[idx]
    # tensor_batch_feat_len = feat_lens[idx]
    targets = target_list[idx]
    tensor_batch_feat, tensor_batch_feat_len = move_to_tensor([feats[idx], feat_lens[idx]], device_id=-1) # converting to the required tensors

    # Features
    output_tensor, input_lengths = recognizer.am(tensor_batch_feat, tensor_batch_feat_len, return_lstm=True) # output_shape: [len,batch,features]
    output_tensor = output_tensor.detach()
    input_lengths = input_lengths.detach()
    
    return output_tensor, input_lengths, targets

# IEMOCAP data preprocessing

In [None]:
all_file_paths = [os.path.join("gdrive", "MyDrive", "data", "IEMOCAP_full_release", file_path) for file_path in all_full_files]
total_instances = len(all_file_paths)

In [None]:
!tar -xf archive.tar.gz

In [None]:
num_train = round(0.8 * total_instances)
num_test_all = total_instances - num_train
num_val = round(0.5 * num_test_all)
num_test = num_test_all - num_val

print("number training instances:", str(num_train))
print("number validation instances:", str(num_val))
print("number test instances:", str(num_test))
assert(num_train + num_val + num_test == total_instances)

number training instances: 3592
number validation instances: 449
number test instances: 449


In [None]:
# shuffle data
import random
random.seed(2021)

shuffled_data_paths = random.sample(all_file_paths, k=total_instances)
# train_list_paths = shuffled_data_paths[:num_train]
# testall_list_paths = shuffled_data_paths[num_train:]
# val_list_paths = testall_list_paths[:num_val]
# test_list_paths = testall_list_paths[num_test:]

# assert(len(train_list_paths) + len(val_list_paths) + len(test_list_paths) == total_instances)

# train, val, test variables:
# train_list_paths
# val_list_paths
# test_list_paths

In [None]:
# get corresponding labels for data
train_list_labels = [file_to_label[filepath.split('/')[-1]] for filepath in train_list_paths]
val_list_labels = [file_to_label[filepath.split('/')[-1]] for filepath in val_list_paths]
test_list_labels = [file_to_label[filepath.split('/')[-1]] for filepath in test_list_paths]

assert(len(train_list_labels) == len(train_list_paths))
assert(len(val_list_labels) == len(val_list_paths))
assert(len(test_list_labels) == len(test_list_paths))

In [None]:
# train dataloader
train_dset = MyDataset(train_list_paths, train_list_labels)
train_args = dict(shuffle=True, batch_size=64, num_workers=2, collate_fn=pad_collate, drop_last=True)  # change to num_workers=4 on diff platform
train_loader = DataLoader(train_dset, **train_args)

In [None]:
# val dataloader
val_dset = MyDataset(val_list_paths, val_list_labels)
val_args = dict(shuffle=False, batch_size=64, num_workers=2, collate_fn=pad_collate, drop_last=True)
val_loader = DataLoader(val_dset, **val_args)

In [None]:
test_batch = next(iter(train_loader))

NameError: ignored

In [None]:
x, x_len, y = test_batch
print(x.shape)  # seq_len, batch_size, input_size
print(x_len)
print(y)

torch.Size([473, 64, 640])
tensor([473, 350, 316, 307, 305, 300, 265, 263, 240, 240, 235, 233, 224, 209,
        203, 191, 189, 184, 181, 178, 166, 165, 161, 159, 156, 149, 145, 143,
        142, 140, 129, 128, 125, 125, 122, 121, 117, 109, 108, 108, 106, 105,
        105, 101, 100,  99,  95,  93,  91,  86,  85,  77,  70,  70,  66,  61,
         56,  56,  55,  55,  46,  46,  44,  44], dtype=torch.int32)
tensor([3., 0., 2., 0., 3., 1., 2., 2., 0., 1., 0., 3., 3., 2., 0., 0., 0., 0.,
        2., 3., 2., 1., 0., 3., 2., 2., 1., 0., 1., 2., 3., 3., 1., 0., 2., 0.,
        3., 2., 2., 0., 3., 3., 0., 3., 2., 3., 0., 1., 3., 3., 0., 1., 2., 0.,
        0., 0., 0., 0., 0., 2., 0., 0., 3., 0.])


##Model

In [16]:
class ICASSP3CNN(nn.Module):
    def __init__(self, embed_size=640, hidden_size=512, num_lstm_layers = 2, bidirectional = False, label_size=31):
        super().__init__()
        self.n_layers = num_lstm_layers 
        self.hidden = hidden_size
        self.bidirectional = bidirectional
        
        self.cnn  = nn.Conv1d(embed_size, embed_size, kernel_size=3, padding=1)
        self.cnn2 = nn.Conv1d(embed_size, embed_size, kernel_size=5, padding=2)
        self.cnn3 = nn.Conv1d(embed_size, embed_size, kernel_size=7, padding=3)

        self.batchnorm = nn.BatchNorm1d(3 * embed_size)

        self.lstm = nn.LSTM(input_size = 3 * embed_size, 
                            hidden_size = hidden_size, 
                            num_layers = num_lstm_layers, 
                            bidirectional = bidirectional)

        self.linear = nn.Linear(in_features = 2 * hidden_size if bidirectional else hidden_size, 
                                out_features = label_size)


    def forward(self, x, lengths):
        """
        padded_x: (B,T) padded LongTensor
        """
        
        batch_size = x.shape[0]
        
        x = x.permute(1, 2, 0)    # (seq_len, batch_size, input_size) -> (batch_size, input_size, seq_len)
      
        cnn_output = torch.cat([self.cnn(x), self.cnn2(x), self.cnn3(x)], dim=1)

        input = F.relu(self.batchnorm(cnn_output))

        input = input.transpose(1,2)

        pack_tensor = nn.utils.rnn.pack_padded_sequence(input, lengths, batch_first=True)
        _, (hn, cn) = self.lstm(pack_tensor)

        if self.bidirectional:
            h_n = hn.view(self.n_layers, 2, batch_size, self.hidden)
            h_n = torch.cat([ h_n[-1, 0,:], h_n[-1,1,:] ], dim = 1)
        else:
            h_n = hn[-1]
        
        logits = self.linear(h_n)

        return logits
        
        
class ICASSP2CNN(nn.Module):
    def __init__(self, embed_size=640, hidden_size=512, num_lstm_layers = 2, bidirectional = False, label_size=31):
        super().__init__()
        self.n_layers = num_lstm_layers 
        self.hidden = hidden_size
        self.bidirectional = bidirectional

        self.cnn  = nn.Conv1d(embed_size, embed_size, kernel_size=3, padding=1)
        self.cnn2 = nn.Conv1d(embed_size, embed_size, kernel_size=5, padding=2)

        self.batchnorm = nn.BatchNorm1d(2 * embed_size)

        self.lstm = nn.LSTM(input_size = 2 * embed_size, 
                            hidden_size = hidden_size, 
                            num_layers = num_lstm_layers, 
                            bidirectional = bidirectional)

        self.linear = nn.Linear(in_features = 2 * hidden_size if bidirectional else hidden_size, 
                                out_features = label_size)


    def forward(self, x, lengths):
        """
        padded_x: (B,T) padded LongTensor
        """
        
        batch_size = x.shape[0]
        # torch.Size([468, 64, 640])
        x = x.permute(1, 2, 0)    # (seq_len, batch_size, input_size) -> (batch_size, input_size, seq_len)

        cnn_output = torch.cat([self.cnn(x), self.cnn2(x)], dim=1)

        input = F.relu(self.batchnorm(cnn_output))

        input = input.transpose(1,2)

        pack_tensor = nn.utils.rnn.pack_padded_sequence(input, lengths, batch_first=True)
        _, (hn, cn) = self.lstm(pack_tensor)

        if self.bidirectional:
            h_n = hn.view(self.n_layers, 2, batch_size, self.hidden)
            h_n = torch.cat([ h_n[-1, 0,:], h_n[-1,1,:] ], dim = 1)
        else:
            h_n = hn[-1]
        
        logits = self.linear(h_n)

        return logits
    

class ICASSP1CNN(nn.Module):
    def __init__(self, embed_size=640, hidden_size=512, num_lstm_layers = 2, bidirectional = False, label_size=31):
        super().__init__()
        self.n_layers = num_lstm_layers 
        self.hidden = hidden_size
        self.bidirectional = bidirectional

        self.cnn  = nn.Conv1d(embed_size, embed_size, kernel_size=3, padding=1)

        self.batchnorm = nn.BatchNorm1d(embed_size)

        self.lstm = nn.LSTM(input_size = embed_size, 
                            hidden_size = hidden_size, 
                            num_layers = num_lstm_layers, 
                            bidirectional = bidirectional)

        self.linear = nn.Linear(in_features = 2 * hidden_size if bidirectional else hidden_size, 
                                out_features = label_size)


    def forward(self, x, lengths):
        """
        padded_x: (B,T) padded LongTensor
        """
        batch_size = x.shape[0]
     
        x = x.permute(1, 2, 0)    # (seq_len, batch_size, input_size) -> (batch_size, input_size, seq_len)

        cnn_output = self.cnn(x)

        input = F.relu(self.batchnorm(cnn_output))

        input = input.transpose(1,2)

        pack_tensor = nn.utils.rnn.pack_padded_sequence(input, lengths, batch_first=True)
        _, (hn, cn) = self.lstm(pack_tensor)

        if self.bidirectional:
            h_n = hn.view(self.n_layers, 2, batch_size, self.hidden)
            h_n = torch.cat([ h_n[-1, 0,:], h_n[-1,1,:] ], dim = 1)
        else:
            h_n = hn[-1]
        
        logits = self.linear(h_n)

        return logits
        

In [18]:
x = torch.zeros([468, 64, 640])
print(x.shape)
x = x.permute(2, 0, 1) 
print(x.shape)

torch.Size([468, 64, 640])
torch.Size([640, 468, 64])


## Training

In [17]:
def train_language_model(train_loader_LM, model, opt, criterion, device):

    loss_accum = 0.0
    batch_cnt = 0

    model.train()
    start_time = time.time()
    for batch, (x, lengths, y) in enumerate(train_loader_LM):

        x = x.to(device)
        #lengths = lengths.to(device)
        y = y.long().to(device)
        opt.zero_grad()

        logits = model(x, lengths)
        
        loss = criterion(logits.permute(0,2,1), y)
        loss_score = loss.cpu().item()

        loss_accum += loss_score
        batch_cnt += 1
        loss.backward()
        opt.step()      

    NLL = loss_accum / batch_cnt
        
    return model, NLL


def train_model(train_loader, model, opt, criterion, device):

    loss_accum = 0.0
    batch_cnt = 0

    acc_cnt = 0     #count correct predictions
    err_cnt = 0     #count incorrect predictions

    model.train()
    start_time = time.time()
    for batch, (x, lengths, y) in enumerate(train_loader):
        x = x.to(device)
        #lengths = lengths.to(device)
        y = y.long().to(device)
        opt.zero_grad()

        logits = model(x, lengths)

        loss = criterion(logits, y)
        loss_score = loss.cpu().item()

        loss_accum += loss_score
        batch_cnt += 1
        loss.backward()
        opt.step()

        #model outputs
        out_val, out_indices = torch.max(logits, dim=1)
        tar_indices = y

        for i in range(len(out_indices)):
            if out_indices[i] == tar_indices[i]:
                acc_cnt += 1
            else:
                err_cnt += 1
                     
    training_accuracy =  acc_cnt/(err_cnt+acc_cnt) 
    training_loss = loss_accum / batch_cnt
        
    return model, training_accuracy, training_loss


def test_model(loader, model, opt, criterion, device):
    model.eval()
    acc_cnt = 0
    err_cnt = 0

    for x, lengths, y in loader:
        
        x = x.to(device)
        y = y.long().to(device)
        
        logits = model(x, lengths)

        out_val, out_indices = torch.max(logits, dim=1)
        tar_indices = y

        for i in range(len(out_indices)):
            if out_indices[i] == tar_indices[i]:
                acc_cnt += 1
            else:
                err_cnt += 1

    current_acc = acc_cnt/(err_cnt+acc_cnt)
    
    return current_acc

## Main runner

In [18]:
cuda = torch.cuda.is_available()
device = torch.device("cuda" if cuda else "cpu")
# model = ICASSP1CNN()
# checkpoint = torch.load("/content/gdrive/MyDrive/model/model.pt")
# model.load_state_dict(checkpoint['model_state_dict'])
# opt.load_state_dict(checkpoint['optimizer_state_dict'])
# scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
# criterion = nn.CrossEntropyLoss()
# model.to(device)
# opt = optim.Adam(model.parameters(), lr = 0.001)
# criterion = nn.CrossEntropyLoss()
# device = torch.device("cuda" if cuda else "cpu")
# valid_acc = test_model(val_loader, model, opt, criterion, device)

In [19]:
batch_size = 8
lr = 0.001
weight_decay = 5e-5

In [20]:
kf = KFold(n_splits=5, shuffle=False)

In [None]:
def get_k_folder(k, i):  # deprecated in AESDD
  n = num_train+num_val
  train_val_list_paths = shuffled_data_paths[:n]
  fold_size = (n + k-1) // k

  train_list_paths = train_val_list_paths[:i*fold_size] + train_val_list_paths[i*fold_size+fold_size:n]
  val_list_paths = train_val_list_paths[i*fold_size:min(i*fold_size+fold_size, n)]

  return train_list_paths, val_list_paths

In [21]:
def get_loaders(xtrain, ytrain, xval, yval):
  # train dataloader
  train_dset = MyDataset(xtrain, ytrain)
  train_args = dict(shuffle=True, batch_size=batch_size, num_workers=1, collate_fn=pad_collate, drop_last=True)  # change to num_workers=4 on diff platform
  train_loader = DataLoader(train_dset, **train_args)

  # val dataloader
  val_dset = MyDataset(xval, yval)
  val_args = dict(shuffle=False, batch_size=batch_size, num_workers=1, collate_fn=pad_collate, drop_last=True)
  val_loader = DataLoader(val_dset, **val_args)

  return train_loader, val_loader

In [22]:
# os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
# os.environ["CUDA_VISIBLE_DEVICES"]="0"

run_num = 2
n_epochs = 20
cuda = torch.cuda.is_available()

#Define Training Grid Search
# model_list = ['1CNN', '2CNN', '3CNN']
# emb_size_list = [640, 640, 640, 640]
# hidden_size_list = [128, 256, 384, 512]
# num_lstm_layers_list = [1, 2, 3]
# bidirectional_list = [False, True]

model_list = ['3CNN']
emb_size_list = [640]
hidden_size_list = [128]
num_lstm_layers_list = [3]
bidirectional_list = [False]


for model_name in model_list:
    for bidirectionality in bidirectional_list:
        for emb_size in emb_size_list:
            for num_lstm in num_lstm_layers_list:
                for hidden_size in hidden_size_list:
                    
                        start_time = time.time()
                        
                        #Log Metadata
                        metadata = model_name + ',' + str(bidirectionality) + ',' + str(emb_size) + ',' + str(num_lstm) + ',' + str(hidden_size)
                        print(metadata)

                        #initialize model
                        if model_name == '1CNN':
                            model = ICASSP1CNN(emb_size, hidden_size, num_lstm, bidirectionality)
                        elif model_name == '2CNN':
                            model = ICASSP2CNN(emb_size, hidden_size, num_lstm, bidirectionality)
                        elif model_name == '3CNN':
                            model = ICASSP3CNN(emb_size, hidden_size, num_lstm, bidirectionality)        
                          
                        # opt = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
                        # criterion = nn.CrossEntropyLoss()
                        # scheduler = optim.lr_scheduler.ReduceLROnPlateau(opt, mode="max", factor=0.5, patience=2, threshold=0.04, threshold_mode='abs', verbose=True)
 
                        device = torch.device("cuda" if cuda else "cpu")
                        model.to(device)
                        
                        print(model)
                        
                        test_data = []

                        for i, (train_index, test_index) in enumerate(kf.split(file_paths)):
                            max_valid_acc = 0
                            max_test_acc = 0
                            model.apply(reset_weights)  # reset weights on each fold
                            opt = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
                            criterion = nn.CrossEntropyLoss()
                            scheduler = optim.lr_scheduler.ReduceLROnPlateau(opt, mode="max", factor=0.5, patience=1, threshold=0.04, threshold_mode='abs', verbose=True)

                            print(f'Running {i + 1}th cross validation.......')
                            # print("TRAIN:", train_index, "TEST:", test_index)
                            train_index = list(train_index)
                            test_index = list(test_index)
                            xtrain, ytrain = [], []
                            xtest_all, ytest_all = [], []

                            for idx in train_index:
                                xtrain.append(file_paths[idx])
                                ytrain.append(labels[idx])

                            for idx in test_index:
                                xtest_all.append(file_paths[idx])
                                ytest_all.append(labels[idx])

                            mid = len(xtest_all) // 2
                            xval = xtest_all[:mid]
                            yval = ytest_all[:mid]

                            xtest = xtest_all[mid:]
                            ytest = ytest_all[mid:]

                            test_data.append((xtest, ytest))

                            assert(len(xtrain) + len(xval) + len(xtest) == len(file_paths))
                            
                            train_loader, val_loader = get_loaders(xtrain, ytrain, xval, yval)

                            for epoch in range(n_epochs):
                                model, train_acc, train_loss = train_model(train_loader, model, opt, criterion, device)
                                valid_acc = test_model(val_loader, model, opt, criterion, device)
                                # test_acc = test_model(test_loader, model, opt, criterion, device)

                                if valid_acc >= max_valid_acc:
                                    print("SAVING CHECKPOINT")
                                    max_valid_acc = valid_acc
                                    # max_test_acc = test_acc
                                    save_path = os.path.join("gdrive", "MyDrive", "18786 IDL", "IDL Project", "saved_models", "AESDD", f"run{run_num}", f"epoch{epoch}_fold{i+1}_model{model_name}_batchsize{batch_size}_lr{lr}_embed{emb_size}_hsize{hidden_size}_numlstm{num_lstm}.pth")

                                    torch.save({
                                        'model_state_dict': model.state_dict(),
                                        'optimizer_state_dict': opt.state_dict(),
                                        'scheduler_state_dict' : scheduler.state_dict(),
                                        'train_acc': train_acc,
                                        'val_acc': valid_acc,
                                        }, save_path)

                                
                                scheduler.step(valid_acc)


                                # Print log of accuracy and loss
                                print("Epoch: "+ str(epoch) +", Training Accuracy: "+str(train_acc)+", Training loss:"+str(train_loss)+ ", Validation accuracy:"+str(valid_acc))
                        

                        total_time = (time.time() - start_time) / 60
            

3CNN,False,640,3,128
ICASSP3CNN(
  (cnn): Conv1d(640, 640, kernel_size=(3,), stride=(1,), padding=(1,))
  (cnn2): Conv1d(640, 640, kernel_size=(5,), stride=(1,), padding=(2,))
  (cnn3): Conv1d(640, 640, kernel_size=(7,), stride=(1,), padding=(3,))
  (batchnorm): BatchNorm1d(1920, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (lstm): LSTM(1920, 128, num_layers=3)
  (linear): Linear(in_features=128, out_features=31, bias=True)
)
Reset trainable parameters of layer = Conv1d(640, 640, kernel_size=(3,), stride=(1,), padding=(1,))
Reset trainable parameters of layer = Conv1d(640, 640, kernel_size=(5,), stride=(1,), padding=(2,))
Reset trainable parameters of layer = Conv1d(640, 640, kernel_size=(7,), stride=(1,), padding=(3,))
Reset trainable parameters of layer = BatchNorm1d(1920, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
Reset trainable parameters of layer = LSTM(1920, 128, num_layers=3)
Reset trainable parameters of layer = Linear(in_features=128,

In [23]:
def get_test_acc(model, test_loader):
    # checkpoint = torch.load(model_pth)
    # model.load_state_dict(checkpoint["model_state_dict"])

    model.eval()
    test_num_correct = 0
    total = 0
    for batch_num, (x, lengths, y) in enumerate(test_loader):
        x = x.to(device)
        y = y.long().to(device)

        logits = model(x, lengths)
        test_num_correct += (torch.argmax(logits, axis=1) == y).sum().item()
        total += len(y)

    test_acc = test_num_correct / total
    return test_acc

In [24]:
# test dataloader
test_dset = MyDataset(xtest, ytest)
test_args = dict(shuffle=False, batch_size=batch_size, num_workers=1, collate_fn=pad_collate, drop_last=True)
test_loader = DataLoader(test_dset, **test_args)

test_acc = get_test_acc(model, test_loader)
print(test_acc)

0.42857142857142855
