In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install torch torchvision torchaudio

Collecting torchaudio
  Downloading torchaudio-0.10.0-cp37-cp37m-manylinux1_x86_64.whl (2.9 MB)
[K     |████████████████████████████████| 2.9 MB 4.2 MB/s 
  Downloading torchaudio-0.9.1-cp37-cp37m-manylinux1_x86_64.whl (1.9 MB)
[K     |████████████████████████████████| 1.9 MB 79.1 MB/s 
[?25h  Downloading torchaudio-0.9.0-cp37-cp37m-manylinux1_x86_64.whl (1.9 MB)
[K     |████████████████████████████████| 1.9 MB 54.3 MB/s 
[?25hInstalling collected packages: torchaudio
Successfully installed torchaudio-0.9.0


In [None]:
!pip install allosaurus

Collecting allosaurus
  Downloading allosaurus-1.0.2-py3-none-any.whl (52 kB)
[?25l[K     |██████▎                         | 10 kB 31.6 MB/s eta 0:00:01[K     |████████████▋                   | 20 kB 19.8 MB/s eta 0:00:01[K     |██████████████████▉             | 30 kB 11.3 MB/s eta 0:00:01[K     |█████████████████████████▏      | 40 kB 9.3 MB/s eta 0:00:01[K     |███████████████████████████████▌| 51 kB 4.2 MB/s eta 0:00:01[K     |████████████████████████████████| 52 kB 805 kB/s 
Collecting panphon
  Downloading panphon-0.19-py2.py3-none-any.whl (72 kB)
[?25l[K     |████▌                           | 10 kB 38.4 MB/s eta 0:00:01[K     |█████████                       | 20 kB 33.3 MB/s eta 0:00:01[K     |█████████████▋                  | 30 kB 13.0 MB/s eta 0:00:01[K     |██████████████████▏             | 40 kB 16.2 MB/s eta 0:00:01[K     |██████████████████████▊         | 51 kB 12.3 MB/s eta 0:00:01[K     |███████████████████████████▎    | 61 kB 14.2 MB/s eta 0:0

In [None]:
#These libraries help to interact with the operating system and the runtime environment respectively
import os
import sys
import pickle

#Model/Training related libraries
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.nn.utils.rnn import pad_sequence

#Dataloader libraries
from torch.utils.data import DataLoader, Dataset

# Transforms and datasets
import torchvision.transforms as transforms
import torchvision.datasets as dset

import time
import matplotlib.pyplot as plt
from PIL import Image
import pandas as pd
from tqdm import tqdm
import random
from sklearn.model_selection import KFold

# Allosaurus
from allosaurus.audio import read_audio
from allosaurus.app import read_recognizer
from allosaurus.am.utils import *

In [None]:
data_dir = os.path.join("drive", "MyDrive", "18786 IDL", "IDL Project", "data", "IEMOCAP_full_release")
# data_dir = os.path.join("drive", "MyDrive", "IDL Project", "data", "IEMOCAP_full_release")
print(data_dir)

drive/MyDrive/18786 IDL/IDL Project/data/IEMOCAP_full_release


In [None]:
recognizer = read_recognizer()

downloading model  latest
from:  https://github.com/xinjli/allosaurus/releases/download/v1.0/latest.tar.gz
to:    /usr/local/lib/python3.7/dist-packages/allosaurus/pretrained
please wait...


In [None]:
df = pd.read_csv("iemocap_full_dataset.csv")
df.shape

(10039, 7)

In [None]:
df = df[df.emotion != 'xxx']  # only keep data that has emotion label
# only keep 'neu', 'hap', 'sad', 'ang' labels
df = df.drop(df[~ ((df.emotion == 'neu') | (df.emotion == 'hap') | (df.emotion == 'sad') | (df.emotion == 'ang'))].index)

df_unedit = df.copy()
df_unedit["path"] = df_unedit["path"].apply(lambda x : x.split('/')[-1])
all_files = list(df_unedit.path)
file_to_emotion = dict(zip(df_unedit.path, df_unedit.emotion))

all_full_files = list(df.path)
print(df)
print(df_unedit)
print(len(file_to_emotion))
print(file_to_emotion)
print(all_full_files)

In [None]:
from collections import Counter

# get unique emotions
# emotion_to_label = {'neu': 0, 'fru': 1, 'sad': 2, 'sur': 3, 'ang': 4, 'hap': 5, 'exc': 6, 'fea': 7, 'dis': 8, 'oth': 9}
emotion_to_label = {'neu': 0, 'hap': 1, 'sad': 2, 'ang': 3}
label_to_emotion = {v: k for k, v in emotion_to_label.items()}
print(emotion_to_label)
print(label_to_emotion)

# counter number of class instances
emotion_instances_list = [v for v in file_to_emotion.values()]
counter = Counter(emotion_instances_list)
print(counter)

{'neu': 0, 'hap': 1, 'sad': 2, 'ang': 3}
{0: 'neu', 1: 'hap', 2: 'sad', 3: 'ang'}
Counter({'neu': 1708, 'ang': 1103, 'sad': 1084, 'hap': 595})


In [None]:
file_to_label = {k: emotion_to_label[v] for k, v in file_to_emotion.items()}
print(file_to_label)

# AESDD setup

In [None]:
data_dir = os.path.join("drive", "MyDrive", "18786 IDL", "IDL Project", "data", "IEMOCAP_full_release")
mapping = {'happiness': 0, 'sadness': 1, 'anger': 2, 'disgust': 3, 'fear': 4}

# Dataset

In [None]:
class MyDataset(Dataset):
    def __init__(self, file_list, target_list):
        
        self.file_list = file_list
        self.target_list = target_list
        self.num_classes = len(list(set(target_list)))

        self.x = file_list
        self.y = target_list

    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, index):
        filepath = self.file_list[index]
        x = torch.tensor(recognizer.pm.compute(read_audio(filepath)))
        x = x.detach()
        x_len = torch.tensor(np.array([x.shape[0]], dtype=np.int32))
        x_len = x_len.detach()
        y = torch.Tensor([self.target_list[index]])
        return x, x_len, y

In [None]:
%cd /content
!ls

/content
gdrive	sample_data


In [None]:
# collate function
def pad_collate(batch):
    # print("inside collate")
    # batch looks like [(x0, xlen0, y0), (x4, xlen4, y4), (x2, xlen2, y2)... ]
    feats = [sample[0] for sample in batch]
    feat_lens = [sample[1] for sample in batch]
    target_list = torch.Tensor([sample[2] for sample in batch])

    feats = pad_sequence(feats, batch_first=True, padding_value=0) # batch, features, len
    feat_lens = pad_sequence(feat_lens, batch_first=True, padding_value=0).squeeze()
    idx = torch.argsort(feat_lens, descending=True) # sorting the input in descending order as required by the lstms in AM.

    # reorder
    # tensor_batch_feat = feats[idx]
    # tensor_batch_feat_len = feat_lens[idx]
    targets = target_list[idx]
    tensor_batch_feat, tensor_batch_feat_len = move_to_tensor([feats[idx], feat_lens[idx]], device_id=-1) # converting to the required tensors

    # Features
    output_tensor, input_lengths = recognizer.am(tensor_batch_feat, tensor_batch_feat_len, return_lstm=True) # output_shape: [len,batch,features]
    output_tensor = output_tensor.detach()
    input_lengths = input_lengths.detach()
    
    return output_tensor, input_lengths, targets

In [None]:
all_file_paths = [os.path.join("drive", "MyDrive", "18786 IDL", "IDL Project", "data", "IEMOCAP_full_release", file_path) for file_path in all_full_files]
# all_file_paths = [os.path.join("drive", "MyDrive", "IDL Project", "data", "IEMOCAP_full_release", file_path) for file_path in all_full_files]
total_instances = len(all_file_paths)
print(total_instances)

4490


In [None]:
!tar -xf archive.tar.gz

In [None]:
num_train = round(0.8 * total_instances)
num_test_all = total_instances - num_train
num_val = round(0.5 * num_test_all)
num_test = num_test_all - num_val

print("number training instances:", str(num_train))
print("number validation instances:", str(num_val))
print("number test instances:", str(num_test))
assert(num_train + num_val + num_test == total_instances)

number training instances: 3592
number validation instances: 449
number test instances: 449


In [None]:
# shuffle data
import random
random.seed(2021)

shuffled_data_paths = random.sample(all_file_paths, k=total_instances)
train_list_paths = shuffled_data_paths[:num_train]
testall_list_paths = shuffled_data_paths[num_train:]
val_list_paths = testall_list_paths[:num_val]
test_list_paths = testall_list_paths[num_test:]

assert(len(train_list_paths) + len(val_list_paths) + len(test_list_paths) == total_instances)

# train, val, test variables:
# train_list_paths
# val_list_paths
# test_list_paths

In [None]:
# get corresponding labels for data
train_list_labels = [file_to_label[filepath.split('/')[-1]] for filepath in train_list_paths]
val_list_labels = [file_to_label[filepath.split('/')[-1]] for filepath in val_list_paths]
test_list_labels = [file_to_label[filepath.split('/')[-1]] for filepath in test_list_paths]

assert(len(train_list_labels) == len(train_list_paths))
assert(len(val_list_labels) == len(val_list_paths))
assert(len(test_list_labels) == len(test_list_paths))

In [None]:
# train dataloader
train_dset = MyDataset(train_list_paths, train_list_labels)
train_args = dict(shuffle=True, batch_size=32, num_workers=2, collate_fn=pad_collate, drop_last=True)  # change to num_workers=4 on diff platform
train_loader = DataLoader(train_dset, **train_args)

In [None]:
# val dataloader
val_dset = MyDataset(val_list_paths, val_list_labels)
val_args = dict(shuffle=False, batch_size=32, num_workers=2, collate_fn=pad_collate, drop_last=True)
val_loader = DataLoader(val_dset, **val_args)

In [None]:
test_batch = next(iter(train_loader))

In [None]:
x, x_len, y = test_batch
print(x.shape)  # seq_len, batch_size, input_size
print(x_len)
print(y)

torch.Size([363, 32, 640])
tensor([363, 335, 305, 274, 229, 163, 158, 143, 140, 129, 127, 121, 120, 115,
        112, 111, 108,  98,  96,  94,  88,  75,  74,  73,  69,  63,  62,  57,
         56,  47,  41,  40], dtype=torch.int32)
tensor([0., 0., 2., 3., 0., 2., 2., 0., 3., 0., 2., 2., 2., 2., 2., 2., 0., 3.,
        0., 0., 0., 0., 0., 2., 2., 0., 0., 3., 0., 0., 0., 0.])


##Model

In [None]:
class ICASSP3CNN(nn.Module):
    def __init__(self, embed_size=640, hidden_size=512, num_lstm_layers = 2, bidirectional = False, label_size=31):
        super().__init__()
        self.n_layers = num_lstm_layers 
        self.hidden = hidden_size
        self.bidirectional = bidirectional
        
        self.cnn  = nn.Conv1d(embed_size, embed_size, kernel_size=3, padding=1)
        self.cnn2 = nn.Conv1d(embed_size, embed_size, kernel_size=5, padding=2)
        self.cnn3 = nn.Conv1d(embed_size, embed_size, kernel_size=7, padding=3)

        self.batchnorm = nn.BatchNorm1d(3 * embed_size)

        self.lstm = nn.LSTM(input_size = 3 * embed_size, 
                            hidden_size = hidden_size, 
                            num_layers = num_lstm_layers, 
                            bidirectional = bidirectional)

        self.linear = nn.Linear(in_features = 2 * hidden_size if bidirectional else hidden_size, 
                                out_features = label_size)


    def forward(self, x, lengths):
        """
        padded_x: (B,T) padded LongTensor
        """
        
        batch_size = x.shape[0]
        
        x = x.permute(1, 2, 0)    # (seq_len, batch_size, input_size) -> (batch_size, input_size, seq_len)
      
        cnn_output = torch.cat([self.cnn(x), self.cnn2(x), self.cnn3(x)], dim=1)

        input = F.relu(self.batchnorm(cnn_output))

        input = input.transpose(1,2)

        pack_tensor = nn.utils.rnn.pack_padded_sequence(input, lengths, batch_first=True)
        _, (hn, cn) = self.lstm(pack_tensor)

        if self.bidirectional:
            h_n = hn.view(self.n_layers, 2, batch_size, self.hidden)
            h_n = torch.cat([ h_n[-1, 0,:], h_n[-1,1,:] ], dim = 1)
        else:
            h_n = hn[-1]
        
        logits = self.linear(h_n)

        return logits
        
        
class ICASSP2CNN(nn.Module):
    def __init__(self, embed_size=640, hidden_size=512, num_lstm_layers = 2, bidirectional = False, label_size=31):
        super().__init__()
        self.n_layers = num_lstm_layers 
        self.hidden = hidden_size
        self.bidirectional = bidirectional

        self.cnn  = nn.Conv1d(embed_size, embed_size, kernel_size=3, padding=1)
        self.cnn2 = nn.Conv1d(embed_size, embed_size, kernel_size=5, padding=2)

        self.batchnorm = nn.BatchNorm1d(2 * embed_size)

        self.lstm = nn.LSTM(input_size = 2 * embed_size, 
                            hidden_size = hidden_size, 
                            num_layers = num_lstm_layers, 
                            bidirectional = bidirectional)

        self.linear = nn.Linear(in_features = 2 * hidden_size if bidirectional else hidden_size, 
                                out_features = label_size)


    def forward(self, x, lengths):
        """
        padded_x: (B,T) padded LongTensor
        """
        
        batch_size = x.shape[0]
        # torch.Size([468, 64, 640])
        x = x.permute(1, 2, 0)    # (seq_len, batch_size, input_size) -> (batch_size, input_size, seq_len)

        cnn_output = torch.cat([self.cnn(x), self.cnn2(x)], dim=1)

        input = F.relu(self.batchnorm(cnn_output))

        input = input.transpose(1,2)

        pack_tensor = nn.utils.rnn.pack_padded_sequence(input, lengths, batch_first=True)
        _, (hn, cn) = self.lstm(pack_tensor)

        if self.bidirectional:
            h_n = hn.view(self.n_layers, 2, batch_size, self.hidden)
            h_n = torch.cat([ h_n[-1, 0,:], h_n[-1,1,:] ], dim = 1)
        else:
            h_n = hn[-1]
        
        logits = self.linear(h_n)

        return logits
    

class ICASSP1CNN(nn.Module):
    def __init__(self, embed_size=640, hidden_size=512, num_lstm_layers = 2, bidirectional = False, label_size=31):
        super().__init__()
        self.n_layers = num_lstm_layers 
        self.hidden = hidden_size
        self.bidirectional = bidirectional

        self.cnn  = nn.Conv1d(embed_size, embed_size, kernel_size=3, padding=1)

        self.batchnorm = nn.BatchNorm1d(embed_size)

        self.lstm = nn.LSTM(input_size = embed_size, 
                            hidden_size = hidden_size, 
                            num_layers = num_lstm_layers, 
                            bidirectional = bidirectional)

        self.linear = nn.Linear(in_features = 2 * hidden_size if bidirectional else hidden_size, 
                                out_features = label_size)


    def forward(self, x, lengths):
        """
        padded_x: (B,T) padded LongTensor
        """
        batch_size = x.shape[0]
     
        x = x.permute(1, 2, 0)    # (seq_len, batch_size, input_size) -> (batch_size, input_size, seq_len)

        cnn_output = self.cnn(x)

        input = F.relu(self.batchnorm(cnn_output))

        input = input.transpose(1,2)

        pack_tensor = nn.utils.rnn.pack_padded_sequence(input, lengths, batch_first=True)
        _, (hn, cn) = self.lstm(pack_tensor)

        if self.bidirectional:
            h_n = hn.view(self.n_layers, 2, batch_size, self.hidden)
            h_n = torch.cat([ h_n[-1, 0,:], h_n[-1,1,:] ], dim = 1)
        else:
            h_n = hn[-1]
        
        logits = self.linear(h_n)

        return logits
        

In [None]:
x = torch.zeros([468, 64, 640])
print(x.shape)
x = x.permute(2, 0, 1) 
print(x.shape)

torch.Size([468, 64, 640])
torch.Size([640, 468, 64])


## Training

In [None]:
def train_language_model(train_loader_LM, model, opt, criterion, device):

    loss_accum = 0.0
    batch_cnt = 0

    model.train()
    start_time = time.time()
    for batch, (x, lengths, y) in enumerate(train_loader_LM):

        x = x.to(device)
        #lengths = lengths.to(device)
        y = y.long().to(device)
        opt.zero_grad()

        logits = model(x, lengths)
        
        loss = criterion(logits.permute(0,2,1), y)
        loss_score = loss.cpu().item()

        loss_accum += loss_score
        batch_cnt += 1
        loss.backward()
        opt.step()      

    NLL = loss_accum / batch_cnt
        
    return model, NLL


def train_model(train_loader, model, opt, criterion, device, epoch_num):

    loss_accum = 0.0
    batch_cnt = 0

    acc_cnt = 0     #count correct predictions
    err_cnt = 0     #count incorrect predictions

    avg_loss = 0.0

    model.train()
    start_time = time.time()
    for batch, (x, lengths, y) in enumerate(train_loader):
        x = x.to(device)
        #lengths = lengths.to(device)
        y = y.long().to(device)
        opt.zero_grad()

        logits = model(x, lengths)

        loss = criterion(logits, y)
        loss_score = loss.cpu().item()

        avg_loss += loss_score
        training_loss = avg_loss

        loss_accum += loss_score
        batch_cnt += 1
        loss.backward()
        opt.step()

        if batch % 10 == 9:
            print('Epoch: {}\tBatch: {}\tAvg-Loss: {:.4f}'.format(epoch_num, batch + 1, avg_loss / 10))
            training_loss = avg_loss / 10
            avg_loss = 0.0

        #model outputs
        out_val, out_indices = torch.max(logits, dim=1)
        tar_indices = y

        for i in range(len(out_indices)):
            if out_indices[i] == tar_indices[i]:
                acc_cnt += 1
            else:
                err_cnt += 1
                     
    training_accuracy =  acc_cnt/(err_cnt+acc_cnt) 
    training_loss = loss_accum / batch_cnt
        
    return model, training_accuracy, training_loss


def test_model(loader, model, opt, criterion, device):
    model.eval()
    acc_cnt = 0
    err_cnt = 0

    for x, lengths, y in loader:
        
        x = x.to(device)
        y = y.long().to(device)
        
        logits = model(x, lengths)

        out_val, out_indices = torch.max(logits, dim=1)
        tar_indices = y

        for i in range(len(out_indices)):
            if out_indices[i] == tar_indices[i]:
                acc_cnt += 1
            else:
                err_cnt += 1

    current_acc = acc_cnt/(err_cnt+acc_cnt)
    
    return current_acc

## Main runner

In [None]:
cuda = torch.cuda.is_available()
device = torch.device("cuda" if cuda else "cpu")
model = ICASSP3CNN()
model.to(device)
opt = optim.Adam(model.parameters(), lr = 0.001)
criterion = nn.CrossEntropyLoss()
device = torch.device("cuda" if cuda else "cpu")
model, train_acc, train_loss = train_model(train_loader, model, opt, criterion, device)

train acc:  0.35128348214285715  train loss:  1.4221620282956533 --time: 1042.4626715183258


In [None]:
# os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
# os.environ["CUDA_VISIBLE_DEVICES"]="0"

run_num = 5
n_epochs = 30
batch_size = 32
lr = 0.001
cuda = torch.cuda.is_available()

#Define Training Grid Search
# model_list = ['1CNN', '2CNN', '3CNN']
# emb_size_list = [640, 640, 640, 640]
# hidden_size_list = [128, 256, 384, 512]
# num_lstm_layers_list = [1, 2, 3]
# bidirectional_list = [False, True]
model_list = ['3CNN']
emb_size_list = [640]
hidden_size_list = [256]
num_lstm_layers_list = [3]
bidirectional_list = [False]

'''model_list = ['3CNN']
emb_size_list = [512]
hidden_size_list = [512]
num_lstm_layers_list = [3]
bidirectional_list = [True]'''


for model_name in model_list:
    for bidirectionality in bidirectional_list:
        for emb_size in emb_size_list:
            for num_lstm in num_lstm_layers_list:
                for hidden_size in hidden_size_list:
                    
                        start_time = time.time()
                        
                        #Log Metadata
                        metadata = model_name + ',' + str(bidirectionality) + ',' + str(emb_size) + ',' + str(num_lstm) + ',' + str(hidden_size)
                        print(metadata)

                        #initialize model
                        if model_name == '1CNN':
                            model = ICASSP1CNN(emb_size, hidden_size, num_lstm, bidirectionality, label_size=4)
                        elif model_name == '2CNN':
                            model = ICASSP2CNN(emb_size, hidden_size, num_lstm, bidirectionality, label_size=4)
                        elif model_name == '3CNN':
                            model = ICASSP3CNN(emb_size, hidden_size, num_lstm, bidirectionality, label_size=4)        
                          
                        optimizer = optim.Adam(model.parameters(), lr = 0.001, weight_decay=5e-5)
                        criterion = nn.CrossEntropyLoss()
                        # scheduler = optim.lr_scheduler.ReduceLROnPlateau(opt, mode="max", factor=0.2, patience=4, threshold=0.04, threshold_mode='abs', verbose=True)
                        scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=4, gamma=0.5, verbose=True)
 
                        device = torch.device("cuda" if cuda else "cpu")
                        model.to(device)
                        
                        print(model)
                        
                        max_valid_acc = 0
                        # max_test_acc = 0
                        for epoch in range(n_epochs):
                            model, train_acc, train_loss = train_model(train_loader, model, optimizer, criterion, device, epoch)
                            valid_acc = test_model(val_loader, model, optimizer, criterion, device)
                            # test_acc = test_model(test_loader, model, opt, criterion, device)

                            if valid_acc > max_valid_acc:
                                max_valid_acc = valid_acc
                                # max_test_acc = test_acc
                            
                            # scheduler.step(valid_acc)
                            scheduler.step()

                            # torch.save({
                            #     'model_state_dict': model.state_dict(),
                            #     'optimizer_state_dict': opt.state_dict(),
                            #     'scheduler_state_dict' : scheduler.state_dict(),
                            #     }, "/content/gdrive/MyDrive/model/model.pt")
                            print("SAVING CHECKPOINT")
                            save_path = os.path.join("drive", "MyDrive", "18786 IDL", "IDL Project", "saved_models", f"run{run_num}", f"epoch{epoch}_batchsize{batch_size}_lr{lr}.pth")
                            # save_path = os.path.join("drive", "MyDrive", "IDL Project", "saved_models", f"run{run_num}", f"epoch{epoch + num_epochs}_batchsize{batch_size}_lr{lr}.pth")
                            torch.save({
                                    'epoch': epoch,
                                    'model_state_dict': model.state_dict(),
                                    'optimizer_state_dict': optimizer.state_dict(),
                                    'train_loss': train_loss,
                                    'train_acc': train_acc,
                                    'val_acc': valid_acc,
                                    'scheduler_state_dict': scheduler.state_dict()
                                }, save_path)


                            # Print log of accuracy and loss
                            print("Epoch: "+str(epoch)+", Training Accuracy: "+str(train_acc)+", Training loss:"+str(train_loss)+ ", Validation accuracy:"+str(valid_acc))
                        

                        total_time = (time.time() - start_time)/60
            

3CNN,False,640,3,256
Adjusting learning rate of group 0 to 1.0000e-03.
ICASSP3CNN(
  (cnn): Conv1d(640, 640, kernel_size=(3,), stride=(1,), padding=(1,))
  (cnn2): Conv1d(640, 640, kernel_size=(5,), stride=(1,), padding=(2,))
  (cnn3): Conv1d(640, 640, kernel_size=(7,), stride=(1,), padding=(3,))
  (batchnorm): BatchNorm1d(1920, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (lstm): LSTM(1920, 256, num_layers=3)
  (linear): Linear(in_features=256, out_features=4, bias=True)
)
Epoch: 0	Batch: 10	Avg-Loss: 1.3174
Epoch: 0	Batch: 20	Avg-Loss: 1.3681
Epoch: 0	Batch: 30	Avg-Loss: 1.2849
Epoch: 0	Batch: 40	Avg-Loss: 1.2183
Epoch: 0	Batch: 50	Avg-Loss: 1.2660
Epoch: 0	Batch: 60	Avg-Loss: 1.2727
Epoch: 0	Batch: 70	Avg-Loss: 1.2262
Epoch: 0	Batch: 80	Avg-Loss: 1.2205
Epoch: 0	Batch: 90	Avg-Loss: 1.2692
Epoch: 0	Batch: 100	Avg-Loss: 1.2531
Epoch: 0	Batch: 110	Avg-Loss: 1.2122
Adjusting learning rate of group 0 to 1.0000e-03.
SAVING CHECKPOINT
Epoch: 0, Training Accuracy: 0.393

KeyboardInterrupt: ignored

# Inference on Test Set

## test loader

In [None]:
# test loader
test_dset = MyDataset(test_list_paths, test_list_labels)
test_args = dict(shuffle=False, batch_size=32, num_workers=2, collate_fn=pad_collate, drop_last=True)
test_loader = DataLoader(test_dset, **test_args)

## load model for inference


In [None]:
load_pth = "/content/drive/MyDrive/18786 IDL/IDL Project/saved_models/run5/epoch23_batchsize32_lr0.001.pth"

In [None]:
checkpoint = torch.load(load_pth)
print(checkpoint["val_acc"])

0.5848214285714286


In [None]:
model = ICASSP3CNN(embed_size=640, hidden_size=256, num_lstm_layers=3, bidirectional=False, label_size=4)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

ICASSP3CNN(
  (cnn): Conv1d(640, 640, kernel_size=(3,), stride=(1,), padding=(1,))
  (cnn2): Conv1d(640, 640, kernel_size=(5,), stride=(1,), padding=(2,))
  (cnn3): Conv1d(640, 640, kernel_size=(7,), stride=(1,), padding=(3,))
  (batchnorm): BatchNorm1d(1920, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (lstm): LSTM(1920, 256, num_layers=3)
  (linear): Linear(in_features=256, out_features=4, bias=True)
)

## get test accuracy

In [None]:
def get_test_acc(model_pth, test_loader):
    checkpoint = torch.load(model_pth)
    model.load_state_dict(checkpoint["model_state_dict"])

    model.eval()
    test_num_correct = 0
    total = 0
    for batch_num, (x, lengths, y) in enumerate(test_loader):
        x = x.to(device)
        y = y.long().to(device)

        logits = model(x, lengths)
        test_num_correct += (torch.argmax(logits, axis=1) == y).sum().item()
        total += len(y)

    test_acc = test_num_correct / total
    return test_acc

In [None]:
test_acc = get_test_acc(load_pth, test_loader)
print(test_acc)

0.5714285714285714
