<a href="https://colab.research.google.com/github/PriyankaTUI/AudioClassificationWithDeepLearningAnalysis/blob/master/alexnet_digit_recognizer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!git clone https://github.com/PriyankaTUI/AudioClassificationWithDeepLearningAnalysis.git
%cd AudioClassificationWithDeepLearningAnalysis
!pwd

Cloning into 'AudioClassificationWithDeepLearningAnalysis'...
remote: Enumerating objects: 236, done.[K
remote: Counting objects: 100% (91/91), done.[K
remote: Compressing objects: 100% (63/63), done.[K
remote: Total 236 (delta 61), reused 46 (delta 28), pack-reused 145[K
Receiving objects: 100% (236/236), 44.53 MiB | 26.11 MiB/s, done.
Resolving deltas: 100% (117/117), done.
/content/AudioClassificationWithDeepLearningAnalysis
/content/AudioClassificationWithDeepLearningAnalysis


In [None]:
import torch
import torch.nn as nn
import os
import copy
import torch.nn.functional as F
import torchaudio
import models
from torch.utils.data import DataLoader,random_split,Dataset
# from torchsummary import summary
# from torch.optim import lr_scheduler
import torchvision
#load tensorboard to monitor training
%load_ext tensorboard
import torch.utils.tensorboard as tb
from torch.utils.tensorboard import SummaryWriter
# import time
from utils import label_to_index, index_to_label, get_average_of_list
from dataset import SubsetSC
# import pickle
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
import pandas as pd
# from datetime import datetime

In [None]:
%matplotlib inline
plt.rcParams["figure.figsize"] = [10, 10]

**Pre-dataprocessing and data loading**

In [None]:
digits = ['zero','one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine'] 
train_dataset = SubsetSC("training", "old")
test_dataset = SubsetSC("testing", "old")

In [None]:
train_labels = [os.path.basename(os.path.dirname(w)) for w in train_dataset._walker]
test_labels = [os.path.basename(os.path.dirname(w)) for w in test_dataset._walker]

**Visualize data to avoid Data Imbalancing**

In [None]:
plt.hist(train_labels)

In [None]:
plt.hist(test_labels)

In [None]:

def pad_sequence(batch):
    # Make all tensor in a batch the same length by padding with zeros
    batch = [item.t() for item in batch]
    batch = torch.nn.utils.rnn.pad_sequence(batch, batch_first=True, padding_value=0.)
    return batch.permute(0, 2, 1)


def collate_fn(batch):
        tensors, targets = [], []
        for waveform, label in batch:
                tensors += [torch.squeeze(waveform)]
                targets += [label_to_index(digits, label)]
                
        tensors = torch.unsqueeze(pad_sequence(tensors), 1)
        targets = torch.stack(targets)
        return tensors, targets


# old_traindata, old_testdata = torch.utils.data.random_split(old_data_set, [round(len(old_data_set)*.8), round(len(old_data_set)*.2)])
train_dataloader = DataLoader(train_dataset,batch_size=64, collate_fn=collate_fn, shuffle=True)
test_dataloader = DataLoader(test_dataset,batch_size=64, collate_fn=collate_fn, shuffle=True)

**Training for spoken digit recognizer**

In [None]:
print("Initializing the neural network...")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f'Device: {device}')
model = models.AlexNet().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

writer = SummaryWriter()
# Training loop
num_epoch = 10
best_accuracy = 0.0
print("Started training !")
for epoch in tqdm(range(num_epoch), total=num_epoch, leave=False):
  running_loss = []
  validation_loss = []
  accuracy = []

  for i, data in tqdm(enumerate(train_dataloader), total=len(train_dataloader), leave=False, desc=f'Epoch: {epoch}/{num_epoch}'): 
    inputs, labels = data[0].to(device), data[1].to(device)
    optimizer.zero_grad()
    logits = model(inputs) #batch_size x 14
    loss = F.cross_entropy(logits, labels)
    loss.backward() 
    optimizer.step()
    running_loss.append(loss.item())

  print(f'Epoch {epoch}/{num_epoch}... Loss: {sum(running_loss)/len(running_loss)}')
  writer.add_scalars('Loss', {'Train':sum(running_loss)/len(running_loss)}, epoch)

  # Evaluate the model on the test set every 10 epochs
  if epoch % 5 == 0:
    #validation loss and accuracy for novel classes
    with torch.no_grad():
      model.eval()
      for i, data in tqdm(enumerate(test_dataloader), total=len(test_dataloader), leave=False, desc=f'Epoch: {epoch}/{num_epoch}'):
        inputs, labels = data[0].to(device), data[1].to(device)
        logits = model(inputs)
        loss = F.cross_entropy(logits,labels)
        validation_loss.append(loss.item())
        _, pred = logits.max(1)
        acc = (pred == labels).sum().item() / pred.size(0) #pred size= [batch_size, no of classes] e.g(10,14)
        accuracy.append(acc)
      
      batch_validation_loss = sum(validation_loss)/len(validation_loss)
      print(f'Epoch {epoch}/{num_epoch}... Validation loss: {batch_validation_loss}')
      writer.add_scalars('Loss', {'Test':batch_validation_loss}, epoch)
      # Saving model if accuracy on the test set is better than previous best model
      batch_accuracy = sum(accuracy)/len(accuracy)
      print(f'Epoch {epoch}/{num_epoch}... Accuracy: {batch_accuracy}')
      writer.add_scalars('Accuracy', {'Test': batch_accuracy}, epoch)
      
      if batch_accuracy > best_accuracy:
          best_accuracy = batch_accuracy
          torch.save(model.state_dict(), 'checkpoint_alexnet.pth')
          torch.save(optimizer.state_dict(), 'optimizer_checkpoint_alexnet.pth')
          print(f'Best model saved at epoch {epoch}/{num_epoch}')