# ARI 3210

In [1]:
%%html
<style type='text/css'>
.CodeMirror{
font-family: JetBrains Mono;
</style>

In [2]:
import os
import torch
import torchvision
import numpy as np
import pandas as pd
import torch.nn as nn
import matplotlib.pyplot as plt

from torchvision import datasets, transforms
from sklearn.model_selection import train_test_split
from torch.utils.data.dataloader import default_collate

device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

In [3]:
#Show an image from the dataloader
def imshow(img):
    img = img/2 +0.5
    plt.imshow(np.transpose(img.cpu().numpy(), (1,2,0)))
    plt.show()

## Defining Model

In [4]:
batch_size = 2
num_workers = 2
train_folder = os.path.join('..', 'data', 'train')
val_folder = os.path.join('..', 'data', 'val')
test_folder = os.path.join('..', 'data', 'test')

speakers = [speaker for speaker in os.listdir(train_folder)]
num_speakers = len(speakers)


transform = transforms.Compose([transforms.Grayscale(),
                                transforms.ToTensor(),
                                transforms.Normalize(0.5, 0.5)])

#Train Loader
train_loader = torch.utils.data.DataLoader(
    datasets.ImageFolder(train_folder, transform=transform),
    batch_size=batch_size,
    shuffle=True, drop_last=True,
    num_workers=num_workers)

#Validate Loader
val_loader = torch.utils.data.DataLoader(
    datasets.ImageFolder(val_folder, transform=transform),
    batch_size=batch_size,
    shuffle=False, num_workers=num_workers)

#Test Loader
test_loader = torch.utils.data.DataLoader(
    datasets.ImageFolder(test_folder, transform=transform),
    batch_size=batch_size,
    shuffle=False, num_workers=num_workers)

In [12]:
class Model(nn.Module):

    def __init__(self, hidden_size: int, num_speakers: int, k:int):
        super(Model, self).__init__()
        
        self.k = k
        self.hidden_size = hidden_size
        
        #Convolutional Layers.
        self.conv1 = nn.Sequential(
                           nn.Conv2d(1, 8, kernel_size=k, stride=1,
                                    padding=1, device=device),
                           nn.ReLU(),
                           nn.MaxPool2d(kernel_size=2, stride=2),
                           nn.Dropout(p=0.2))     
        self.conv2 = nn.Sequential(
                           nn.Conv2d(8, 16, kernel_size=k, stride=1,
                                     padding=1, device=device),
                           nn.ReLU(),
                           nn.MaxPool2d(kernel_size=2, stride=2),
                           nn.Dropout(p=0.5))        
        
        # LSTM Layer.
        self.lstm = nn.LSTM(hidden_size*12*9, hidden_size, device=device)

        # Final Layer.
        self.fc = nn.Linear(hidden_size, num_speakers, device=device)
        
    def forward(self, x):
        
        #Convolute
        x = self.conv1(x)
        x = self.conv2(x)
        
        #Flatten
        x = x.view(-1,self.hidden_size*9*12)
        
        #LSTM
        x,_ = self.lstm(x)
        
        #Output Layer
        return self.fc(x)

In [13]:
#Free up memory
torch.cuda.empty_cache()

#Initialise model
model = Model(hidden_size=128, num_speakers=num_speakers, k=3)
model.to(device)

optimiser = torch.optim.AdamW(model.parameters())

mbt = 0 #The number of Mini-batches Trained (mbt) on.
train_errors = []
val_errors   = []


for step in range(10):
    
    #Train on mini-batches.
    for i, (inputs, labels) in enumerate(train_loader):
        
        print(f'Training: {i}     ', end='\r')
        
        #Pass tesors tensors to GPU
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        #Pass batch.
        optimiser.zero_grad()
        
        print(model(inputs).shape)
        print(labels.shape)
        
        error = torch.nn.functional.cross_entropy(
            model(inputs), labels)
        
        #Optimise.
        error.backward()
        optimiser.step()      
        
        #Track train loss every mini-batch.
        train_errors.append((mbt := mbt+1), error.tolist())

    #Validate on mini-batches.
    with torch.no_grad():
        
        val_error = 0
        
        for j, (val_inputs, val_labels) in enumerate(val_loader):
            
            print(f'Validating: {j}   ', end='\r')
            
            val_error += torch.nn.functional.cross_entropy(
                            model(val_inputs), val_labels).tolist()
        
        #Track validate loss as average of all mini-batches.
        val_errors.append(mbt, (val_error/(j+1)))

Training: 0     torch.Size([1, 284])
torch.Size([2])


ValueError: Expected input batch_size (1) to match target batch_size (2).

In [None]:
x = torch.rand([4,128,60,80])
x = x.view(-1,128*60*80)


In [None]:
images,labels = iter(train_loader).next()
imshow(torchvision.utils.make_grid(images))
[speakers[i] for i in labels.tolist()]

In [None]:
import multiprocessing
multiprocessing.cpu_count()