# Neural network on the full stuff

## Preparation

### Package installation

In [1]:
!pip3 install psutil
!pip3 install librosa
!pip3 install -U -q PyDrive

Collecting librosa
[?25l  Downloading https://files.pythonhosted.org/packages/09/b4/5b411f19de48f8fc1a0ff615555aa9124952e4156e94d4803377e50cfa4c/librosa-0.6.2.tar.gz (1.6MB)
[K    100% |████████████████████████████████| 1.6MB 5.2MB/s 
[?25hCollecting audioread>=2.0.0 (from librosa)
  Downloading https://files.pythonhosted.org/packages/f0/41/8cd160c6b2046b997d571a744a7f398f39e954a62dd747b2aae1ad7f07d4/audioread-2.1.6.tar.gz
Collecting resampy>=0.2.0 (from librosa)
[?25l  Downloading https://files.pythonhosted.org/packages/14/b6/66a06d85474190b50aee1a6c09cdc95bb405ac47338b27e9b21409da1760/resampy-0.2.1.tar.gz (322kB)
[K    100% |████████████████████████████████| 327kB 25.7MB/s 
[?25hCollecting numba>=0.38.0 (from librosa)
[?25l  Downloading https://files.pythonhosted.org/packages/42/45/8d5fc45e5f760ac65906ba48dec98e99e7920c96783ac7248c5e31c9464e/numba-0.40.1-cp36-cp36m-manylinux1_x86_64.whl (3.2MB)
[K    100% |████████████████████████████████| 3.2MB 11.7MB/s 
[?25hCollecting llv

In [2]:
# to install pytorch on colab
from os import path
from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())

accelerator = 'cu80' if path.exists('/opt/bin/nvidia-smi') else 'cpu'

!pip install -q http://download.pytorch.org/whl/{accelerator}/torch-0.4.1-{platform}-linux_x86_64.whl torchvision

tcmalloc: large alloc 1073750016 bytes == 0x5794e000 @  0x7ff9103f52a4 0x594e17 0x626104 0x51190a 0x4f5277 0x510c78 0x5119bd 0x4f5277 0x4f3338 0x510fb0 0x5119bd 0x4f5277 0x4f3338 0x510fb0 0x5119bd 0x4f5277 0x4f3338 0x510fb0 0x5119bd 0x4f6070 0x510c78 0x5119bd 0x4f5277 0x4f3338 0x510fb0 0x5119bd 0x4f6070 0x4f3338 0x510fb0 0x5119bd 0x4f6070


In [0]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import time

import psutil
import pickle

import librosa
import torch
import torch.nn as nn
import torchvision
from torchvision import models,transforms,datasets

%matplotlib inline

In [0]:
# Install the PyDrive wrapper & import libraries.
# This only needs to be done once per notebook.

from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
from google.colab import drive

### Data download

In [5]:
use_gpu = torch.cuda.is_available()
print('Using gpu: %s ' % use_gpu)

dtype = torch.FloatTensor
if use_gpu:
    dtype = torch.cuda.FloatTensor

Using gpu: True 


In [0]:
google_drive_download = True
google_drive_storage = True

In [7]:
# We can use Google Drive to both download our data and store the proper model.

if google_drive_download:
  
  # Authenticate and create the PyDrive client.
  # This only needs to be done once per notebook.
  auth.authenticate_user()
  gauth = GoogleAuth()
  gauth.credentials = GoogleCredentials.get_application_default()
  gdrive = GoogleDrive(gauth)

  test_id = "1v76c_UdqCkF3do0DQjCKApwUrODBM9ay"
  downloaded = gdrive.CreateFile({'id': test_id})
  downloaded.GetContentFile('test_dataset')

  train_id_0 = "1v-xA4pTanJHcH6eK1yecsGxDJRY6TS1D"
  downloaded = gdrive.CreateFile({'id': train_id_0})
  downloaded.GetContentFile('train_dataset_0')

  train_id_1 = "1uxgvUUZmVRtlfDvfU9wvbZQvECd1PJ31"
  downloaded = gdrive.CreateFile({'id': train_id_1})
  downloaded.GetContentFile('train_dataset_1')

  validation_id = "1uwrddUzD3HGb3ygVOiexpzJ7Y6wHZkk4"
  downloaded = gdrive.CreateFile({'id': validation_id})
  downloaded.GetContentFile('validation_dataset')
  
  
if google_drive_storage:
  
  drive.mount('/content/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/gdrive


### Unloading the .npz files

In [0]:
filehandle = open('test_dataset', 'rb')
test = pickle.load(filehandle)

In [0]:
filehandle = open('validation_dataset', 'rb')
validation = pickle.load(filehandle)

In [0]:
filehandle = open('train_dataset_0', 'rb')
train = pickle.load(filehandle)

filehandle = open('train_dataset_1', 'rb')
train = train + pickle.load(filehandle)

## CNN Classifier

### Data loader

In [0]:
bs=100
train_loader = torch.utils.data.DataLoader(train,
    batch_size=bs, shuffle=True)
validation_loader = torch.utils.data.DataLoader(validation,
    batch_size=bs, shuffle=True)
test_loader = torch.utils.data.DataLoader(test,
    batch_size=bs, shuffle=False)

In [12]:
print(train[0][0].shape)
print(train[0][1].shape)

torch.Size([1, 64, 256])
torch.Size([])


### Define a convolutional neural network

In [0]:
#we use 1-channel image 

import torch.nn as nn 
import torch.nn.functional as F
import torch.optim as optim

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        #64 3*3 filters with stride 1 
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3, stride=1)
        #max pooling 2*4
        self.pool = nn.MaxPool2d(2, 4)
        #64 3*5 filters with stride 1
        self.conv2 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding = 1)
        self.conv3 = nn.Conv2d(in_channels = 64, out_channels = 64, kernel_size = (3,5), stride = 1)
        #max pooling 2*4
        #dropout 0.2
        self.dropout1 = nn.Dropout2d(p=0.2, inplace=False)
        #fully connected layer 32 nerous to pervious ones 
        self.fc1 = nn.Linear(64 * 3 * 1, 32)
        #dropout 0.2
        self.dropout2 = nn.Dropout(p=0.2, inplace = False)
        #output layers 8 nerous fully connected 
        self.fc2 = nn.Linear(32, 8)

    def forward(self, x):
        #print(x.shape)
        x = self.pool(F.relu(self.conv1(x)))
        #print(x.shape)
        x = self.pool(F.relu(self.conv2(x)))
        #print(x.shape)
        x = self.pool(F.relu(self.conv3(x)))
        
        x = self.dropout1(x)
        #print(x.shape)
        x = x.view(-1, 64 * 3 * 1)
        #print(x.shape)
        x = F.relu(self.fc1(x))
        x = self.dropout2(x)
        #print(x.shape)
        x = F.softmax(self.fc2(x), 1)
        return x



In [14]:
# Model definition
net = Net()


model_filename = 'gdrive/My Drive/Colab Notebooks/data/NEW_MODEL_0'

try:
  train_info_file = open(model_filename, 'rb')
  train_info = pickle.load(train_info_file)
  net.load_state_dict(train_info['parameters'])
  
  #net.load_state_dict(torch.load(model_filename))
  print("Parameters loaded from %s" % model_filename)
  
except:
  print("Pre-trained parameters not found")

 

Parameters loaded from gdrive/My Drive/Colab Notebooks/data/NEW_MODEL_0


### Training function

In [0]:
# Loss criterion: cross entropy
criterion = nn.CrossEntropyLoss()

In [0]:
def train_model(net = net, epochs = 2, train_info = None, bs = bs,
                validation = True, lr = 0.01, model_save = None):
  
  net.train()
  optimizer = optim.SGD(net.parameters(), lr=lr, momentum=0.9)
  start_time = time.time()
  
  if train_info == None:
    train_info = {'epochs': 0,
                  'lr': [],
                 'loss_train': [],
                 'acc_train': [],
                 'loss_val': [],
                 'acc_val': [],
                 'parameters': []}

  loss_train = []
  acc_train = []
  
  loss_val = []
  acc_val = []
  
  for epoch in range(epochs):  # loop over the dataset multiple times

    print('Epoch %s – Time lapsed: %s' % (str(epoch), str(time.time() - start_time)))    
    
    running_loss = 0.0
    running_corrects = 0.0
    size = 0
    
    for i, data in enumerate(train_loader, 0):
      # get the inputs
      inputs, labels = data
        
      if use_gpu:
        inputs.cuda()

      # zero the parameter gradients
      optimizer.zero_grad()

      # forward + backward + optimize

      outputs = net(inputs)
      loss = criterion(outputs, labels)
      loss.backward()
      optimizer.step()
        
      pred = outputs.max(1, keepdim=True)[1]
        
      size += bs
      running_loss += loss.item()
      running_corrects += pred.eq(labels.view_as(pred)).sum()
      
        
    epoch_loss = running_loss / size
    epoch_acc = running_corrects.item() / size
    loss_train += [epoch_loss]
    acc_train += [epoch_acc]
    
    if validation == True:
      correct = 0
      total = 0

      net.eval()

      with torch.no_grad():
        for val_i, val_data in enumerate(validation_loader):
          val_images, val_labels = val_data
          val_outputs = net(val_images)
          _, predicted = torch.max(val_outputs.data, 1)
          total += val_labels.size(0)
          correct += (predicted == val_labels).sum().item()

      print('Accuracy of the network on the validation tracks: %f' % (
          correct / total))
      
      train_info['epochs'] += 1
      train_info['lr'] += [lr]
      train_info['loss_train'] += [epoch_loss]
      train_info['acc_train'] += [epoch_acc]
      train_info['acc_val'] += [correct / total]
      
      if max(train_info['acc_val']) == correct / total:
        train_info['parameters'] = net.state_dict()
        
      if model_save is not None:
        filehandle = open(str(model_save), 'wb')
        pickle.dump(train_info, filehandle)
        filehandle.close()
      
      net.train()
    
    print('Train - Loss: {:.4f} Acc: {:.4f}\n'.format(epoch_loss, epoch_acc))

  print('Finished Training')
  
  return train_info

### Training the model

In [0]:
def trainer(learning = [(10, 0.01)], net = net, train_info = None, model_save = None):
  
  """
  `learning_rates` should be a list where each element is a tuple of size two.
    - The first element should be the learning rate,
    - The second element should be the number of epochs the user wants for this
      particular learning rate.
  """
  
  loss_train, acc_train, loss_val, acc_val = [], [], [], []
  
  for learning_tuple in learning:
    
    train_info = train_model(net,
                             learning_tuple[0],
                             train_info,
                             validation = True,
                             lr = learning_tuple[1],
                             model_save = model_save)
    
    #loss_train += list(new_loss_train)
    #acc_train += list(new_acc_train)
    #loss_val += list(new_loss_val)
    #acc_val += list(new_acc_val)
    

  if model_save is not None:
    filehandler = open(str(model_save), 'wb')
    pickle.dump(train_info, filehandler)
    #torch.save(net.state_dict(), str(model_save) + '.pt')
  
  return train_info
  

In [0]:
#train_info = trainer([(10, 0.1)], model_save = 'gdrive/My Drive/Colab Notebooks/data/NEW_MODEL_0')

In [0]:
#train_info = trainer([(15, 0.05)], train_info, model_save = 'gdrive/My Drive/Colab Notebooks/data/NEW_MODEL_0')

In [0]:
#train_info = trainer([(29, 0.01)], train_info, model_save = 'gdrive/My Drive/Colab Notebooks/data/NEW_MODEL_0')

In [26]:
train_info = trainer([(26, 0.001)], train_info, model_save = 'gdrive/My Drive/Colab Notebooks/data/NEW_MODEL_0')

Epoch 0 – Time lapsed: 5.0067901611328125e-06
Accuracy of the network on the validation tracks: 0.418375
Train - Loss: 0.0187 Acc: 0.4045

Epoch 1 – Time lapsed: 1383.235746383667
Accuracy of the network on the validation tracks: 0.419750
Train - Loss: 0.0187 Acc: 0.4047

Epoch 2 – Time lapsed: 2713.98912024498
Accuracy of the network on the validation tracks: 0.418500
Train - Loss: 0.0187 Acc: 0.4040

Epoch 3 – Time lapsed: 4036.203604698181
Accuracy of the network on the validation tracks: 0.419750
Train - Loss: 0.0186 Acc: 0.4048

Epoch 4 – Time lapsed: 5337.857761621475
Accuracy of the network on the validation tracks: 0.418500
Train - Loss: 0.0186 Acc: 0.4041

Epoch 5 – Time lapsed: 6625.1471474170685
Accuracy of the network on the validation tracks: 0.416750
Train - Loss: 0.0187 Acc: 0.4044

Epoch 6 – Time lapsed: 7936.3164830207825
Accuracy of the network on the validation tracks: 0.420625
Train - Loss: 0.0187 Acc: 0.4041

Epoch 7 – Time lapsed: 9283.227795600891
Accuracy of the

In [33]:
train_info = trainer([(10, 0.1)], train_info, model_save = None)

Epoch 0 – Time lapsed: 3.337860107421875e-06
Accuracy of the network on the validation tracks: 0.131250
Train - Loss: 0.0202 Acc: 0.2517

Epoch 1 – Time lapsed: 1346.694977760315


KeyboardInterrupt: ignored

### Testing the model

In [0]:
def test_model(data_loader = test_loader, net = net):
  """
  Runs the model on the test dataset (individual tracks)
  """
  correct = 0
  total = 0
  predictions = []

  net.eval()

  with torch.no_grad():
    for i, data in enumerate(data_loader):
      images, labels = data
      outputs = net(images)
      _, predicted = torch.max(outputs.data, 1)
      total += labels.size(0)
      predictions += [predicted]
      correct += (predicted == labels).sum().item()
  
  score = 100 * (correct / total)
  print('Accuracy of the network on the test tracks (3 second windows): %f %%' % score)
  return correct / total


def test_model_full(data_loader = test_loader, net = net):
  
  """
  Runs the model on the test dataset, where the songs are recombined into their
  original length (30 second).
  
  For each track, the final prediction is the genre that is predicted the most
  often for our 3-second windows: for example, if among our 10 samples, we have
  5 predictions for Rock, 3 for Instrumental and 2 for Pop, then the final
  prediction will be "Rock". In case of a tie, this algorithm will return the
  genre whose code is the lowest: for instance, Pop is represented by the label
  '6' and Rock by the label '7', so if there is a tie between those two, the
  function will return the prediction 'Pop'.
  """
  
  correct = 0
  total = 0
  
  net.eval()
  
  with torch.no_grad():
    
    for i, data in enumerate(data_loader):
      
      images, labels = data
      outputs = net(images)
      _, predicted = torch.max(outputs.data, 1)
      
      full_song_labels = [label for song_index, label in enumerate(labels)
                          if song_index % 10 == 0]
      
      total += len(full_song_labels)
      prediction = []
      
      for window in range(0,100,10):
        
        distribution = torch.bincount(predicted[window:(window+10)])

        # Note: when two or more genres have the same number of votes,
        # it automatically picks the first in the list
        song_voted = torch.argmax(distribution)
        prediction += [song_voted]
      
      correct += len([label for label, pred in zip(full_song_labels,prediction) if label == pred])
    
    score = 100 * (correct / total)
    print('Accuracy of the network on the test tracks (full tracks): %f %%' % score)
    return correct / total
      
      

In [28]:
single_window_accuracy = test_model(test_loader, net)
track_accuracy = test_model_full()

Accuracy of the network on the test tracks (3 second windows): 43.725000 %
Accuracy of the network on the test tracks (full tracks): 49.750000 %
