#Project - Primary Model & Baseline Model <img src="https://upload.wikimedia.org/wikipedia/en/thumb/0/04/Utoronto_coa.svg/1024px-Utoronto_coa.svg.png" width=60px align="right"> 
###APS360 - Applied Fundamentals of Machine Learning

---
**Members**
- Javiera Bao
- Kieran Kasha
- Rishik Kumar
- Abhay Verma

## 0. Imports

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import matplotlib.pyplot as plt
from torch.utils.data import Dataset 
from torch.utils.data.sampler import SubsetRandomSampler
import torchvision.transforms as transforms
import pandas as pd
from skimage import io
from math import floor
from PIL import Image
import seaborn as sn
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score

use_cuda = True

In [None]:
# Mounting Drive
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


## 1. Getting Preprocessed Data

In [None]:
# Image Dataset Creator

class SoundFilesDataset (Dataset):
    def __init__(self, csv_file, transform = None):
        self.annotations = pd.read_csv(csv_file)
        self.transform = transform

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, index):
        image_path = '/content/gdrive/Shareddrives/APS360/spectrograms/' + str(self.annotations.iloc[index, 1]) + '.png'
        image = io.imread(image_path)
        label = (self.annotations.iloc[index, 3])
        
        if label == 'acoustic guitar':
            label = 0 #acousitc guitar
        elif label == 'cello':
            label = 1 #cello
        elif label == 'clarinet':
            label = 2 #clarinet
        elif label == 'double bass':
            label = 3 #double bass
        elif label == 'drum set':
            label = 4 #drum set
        elif label == 'flute':
            label = 5 #flute
        elif label == 'piano':
            label = 6 #piano
        elif label == 'viola':
            label = 7 #viola
        elif label == 'violin':
            label = 8 #violin
        elif label == 'singer':
            label = 9 #singer
        
        if self.transform:
            image = self.transform(image)
        
        return (image, label)


In [None]:
dataset_training = SoundFilesDataset("/content/gdrive/Shareddrives/APS360/final_csv/train.csv", transform= transforms.ToTensor())
dataset_validation = SoundFilesDataset("/content/gdrive/Shareddrives/APS360/final_csv/valid.csv", transform= transforms.ToTensor())
dataset_testing = SoundFilesDataset("/content/gdrive/Shareddrives/APS360/final_csv/test.csv", transform= transforms.ToTensor())

num_workers = 0
batch_size = 5

training_loader = torch.utils.data.DataLoader(dataset_training, batch_size=batch_size, num_workers=num_workers, shuffle = True)
validation_loader = torch.utils.data.DataLoader(dataset_validation, batch_size=batch_size, num_workers=num_workers, shuffle = True)
testing_loader = torch.utils.data.DataLoader(dataset_testing, batch_size=batch_size, num_workers=num_workers, shuffle = True)
fitting_loader = torch.utils.data.DataLoader(dataset_fitting, batch_size=batch_size, num_workers=num_workers, shuffle = True)
Kieran_loader = torch.utils.data.DataLoader(dataset_Kieran, batch_size=batch_size, num_workers=num_workers, shuffle = True)

## 2. Baseline Model

In [None]:
torch.manual_seed(50)
class InstrumentClassifierBaseline(nn.Module):
    def __init__(self):
        super(InstrumentClassifierBaseline, self).__init__()
        
        self.name = 'InstrumentClassifierBaseline'

        self.fc1 = nn.Linear(4*432*288, 1000) #input, output
        self.fc2 = nn.Linear(1000, 750) #input, output
        self.fc3 = nn.Linear(750, 500) #input, output
        self.fc4 = nn.Linear(500, 200) #input, output
        self.fc5 = nn.Linear(200, 100) #input, output
        self.fc6 = nn.Linear(100, 10) #input, output

    def forward(self, x):
        x = x.view(-1, 288*432*100)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))
        x = F.relu(self.fc5(x))
        x = self.fc6(x) #Softmax is applied in training and get_accuracy
        return x

## 3. Primary Model: CNN Architecture

In [None]:
torch.manual_seed(50)
class InstrumentClassifierCNN(nn.Module):
    def __init__(self):
        self.kernelsizes = [5, 5, 5, 3, 3, 3]
        super(InstrumentClassifierCNN, self).__init__()
        
        self.name = 'InstrumentClassifierCNN'

        self.conv1 = nn.Conv2d(4, 25, self.kernelsizes[0]) #in_channels, out_chanels, kernel_size
        self.conv2 = nn.Conv2d(25, 40, self.kernelsizes[1]) #in_channels, out_chanels, kernel_size
        self.conv3 = nn.Conv2d(40, 60, self.kernelsizes[2]) #in_channels, out_chanels, kernel_size
        self.conv4 = nn.Conv2d(60, 75, self.kernelsizes[3]) #in_channels, out_chanels, kernel_size
        self.conv5 = nn.Conv2d(75, 85, self.kernelsizes[4]) #in_channels, out_chanels, kernel_size
        self.conv6 = nn.Conv2d(85, 100, self.kernelsizes[5]) #in_channels, out_chanels, kernel_size
        self.pool = nn.MaxPool2d(2, 2) #kernel_size, stride 

        # calculating the width and height of the final output from conv6
        self.width1 = (432 - self.kernelsizes[0] + 1)/2
        self.height1 = (288 - self.kernelsizes[0] + 1)/2
        self.width2 = ((self.width1 - self.kernelsizes[1] + 1)/2)
        self.height2 = ((self.height1 - self.kernelsizes[1] + 1)/2)    
        self.width3 = (self.width2 - self.kernelsizes[2] + 1)/2
        self.height3 = (self.height2 - self.kernelsizes[2] + 1)/2
        self.width4 = (self.width3 - self.kernelsizes[3] + 1)/2
        self.height4 = (self.height3 - self.kernelsizes[3] + 1)/2  
        self.width5 = (self.width4 - self.kernelsizes[4] + 1)/2
        self.height5 = (self.height4 - self.kernelsizes[4] + 1)/2  
        self.width6 = int((self.width5 - self.kernelsizes[5] + 1)/2)
        self.height6 = int((self.height5 - self.kernelsizes[5] + 1)/2)    
 
        self.fc1 = nn.Linear(self.height6*self.width6*100, 150) #input, output
        self.fc2 = nn.Linear(150, 10) #input, output
       
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = self.pool(F.relu(self.conv4(x)))
        x = self.pool(F.relu(self.conv5(x)))
        x = self.pool(F.relu(self.conv6(x)))
        x = x.view(-1, self.height6*self.width6*100)
        x = F.relu(self.fc1(x))
        x = self.fc2(x) #softmax is applied in training and get_accuracy
        return x

## 4. Function for Finding Accuracy, F1-score, and Generating a Confusion Matrix

In [None]:
def get_accuracy(model, loader, print_data = False, conf_mtx = False, f1 = False):
    correct = 0
    total = 0
    counter = 10

    ## Music Class Dictionary

    classes = {}
    classes[0] = 'acoustic guitar'
    classes[1] = 'cello'
    classes[2] = 'clarinet'
    classes[3] = 'double bass'
    classes[4] = 'drum set'
    classes[5] = 'flute'
    classes[6] = 'piano'
    classes[7] = 'viola'
    classes[8] = 'violin'
    classes[9] = 'singer'

    instruments = ['Acoustic Guitar', 'Cello', 'Clarinet', 'Double Bass', 'Drum Set', 'Flute', 'Piano', 'Viola', 'Violin', 'Singer']

    # For the Confusion Matrix:
    true_val = []
    pred_val = []


    for img, labels in loader:

        if use_cuda and torch.cuda.is_available():
          img = img.cuda()
          labels = labels.cuda()

        # getting the prediction and true label
        output = F.softmax(model(img), dim=1)
        prediction = output.max(1, keepdim=True)[1]
        correct += prediction.eq(labels.view_as(prediction)).sum().item()
        total += img.shape[0]

        #converting the prediction and true label to corresponding strings
        pred = classes[prediction[0][0].item()]
        corr = classes[labels[0].item()]

        # to print first eleven predictions and true labels if needed
        if print_data:
            if counter > 0:
                print("True instrument: " + corr)
                print("Predicted instrument: " + pred + '\n')
            else: 
                counter -= 1

        # to calculate the F1 score of the distribution
        if f1:
          for i in range(len(labels)):
                true_val.append(labels[i].item())
                pred_val.append(prediction[i][0].item())
                if loader == training_loader:
                  f1 = f1_score(true_val, pred_val, average='weighted')
                elif loader == validation_loader:
                  f1 = f1_score(true_val, pred_val, average='weighted') 
                elif loader == testing_loader:
                  f1 = f1_score(true_val, pred_val, average='weighted')
                
          print("f1 score:", f1)

        # to display the confusion matrix for the predictions
        if conf_mtx:
            for i in range(len(labels)):
                true_val.append(labels[i].item())
                pred_val.append(prediction[i][0].item())
        
    if conf_mtx:
        # Plot non-normalized confusion matrix
        matrix = confusion_matrix(true_val, pred_val, normalize= 'true')
        matrix_pandas = pd.DataFrame(matrix, index = instruments, columns = instruments)
        plt.figure(figsize = (10,7))
        ax = plt.axes()
        ax.set_title('Confusion Matrix')
        heatmap = sn.heatmap(matrix_pandas, ax = ax, annot=True)
        plt.xlabel("Predicted Label") 
        plt.ylabel("True Label") 
        plt.show()

    result = correct/total
    return result

## 5. Training Loop

In [None]:
def train(model, trainingdataloader, validationdataloader, batch_size=64, num_epochs=30, lr=0.01):

    torch.manual_seed(50)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    training_accuracy = np.zeros(num_epochs)
    validation_accuracy = np.zeros(num_epochs)

    losses = []
    
    # Now the training loops can begin.
    for epochs in range(num_epochs):
        total_training_err = 0.0
        total_training_loss = 0.0
        total_imgs = 0
        for imgs, labels in iter(trainingdataloader):
            
            # This code allows me to use the GPU
            if use_cuda and torch.cuda.is_available():
              imgs = imgs.cuda()
              labels = labels.cuda()

            # This code is essentially identical to tutorial 3a
            # This is the forward pass
            output = model(imgs)
            total_loss = criterion(output, labels)
            # This is the backward pass
            total_loss.backward()
            # Parameter updater
            optimizer.step()   
            # Clean-up step for pytorch
            optimizer.zero_grad()     
        
        losses.append(float(total_loss)/batch_size)  
        # Now all that's left is to calculate the accuracy.
        training_accuracy[epochs] = get_accuracy(model, trainingdataloader)
        validation_accuracy[epochs] = get_accuracy(model, validationdataloader)
        
        # A print statement allows for better visualization of the results.
        print(("Epoch Number {}: Training Accuracy: {}" + " Validation Accuracy: {}").format(epochs + 1, training_accuracy[epochs], validation_accuracy[epochs]))

    epoch = np.arange(1, num_epochs+1)

    # Plotting Training Loss Curve
    plt.title("Training Curve")
    plt.plot(epoch, losses, label="Train")
    plt.xlabel("Epoch Number")
    plt.ylabel("Loss")
    plt.show()

    # Plotting Training vs Validation Curve
    plt.title("Training vs Validation Curve")
    plt.plot(epoch, training_accuracy, label = 'Train')
    plt.plot(epoch, validation_accuracy, label = 'Validation')
    plt.xlabel("Epoch Number")
    plt.ylabel("Accuracy")
    plt.show()

    model_path = "model_{0}_bs{1}_lr{2}_epoch{3}".format(model.name, batch_size, lr, epochs)
    torch.save(model.state_dict(), model_path)

    print("Final Training Accuracy: {}".format(training_accuracy[-1]))
    print("Final Validation Accuracy: {}".format(validation_accuracy[-1]))
    

# 6. Training and Testing Models

In [None]:
# Training Primary Model
model1 = InstrumentClassifierCNN()

if use_cuda and torch.cuda.is_available():
    model1.cuda()

train(model1, training_loader, validation_loader, batch_size = 30, lr = 0.65e-4, num_epochs = 40)

In [None]:
# Testing Primary Model
print('Testing Accuracy: ', get_accuracy(model1, testing_loader, print_data= True, conf_mtx= True))

In [None]:
# Training Baseline Model
baseline = InstrumentClassifierCNN()

if torch.cuda.is_available():
    baseline.cuda()

train(baseline, training_loader, validation_loader, batch_size = 30, lr = 0.8e-4, num_epochs = 24)

In [None]:
# Testing Baseline Model
print(get_accuracy(baseline, testing_loader, conf_mtx= True, f1 = True))