## Dataset - UrbanSound
https://drive.google.com/drive/folders/0By0bAi7hOBAFUHVXd1JCN3MwTEU

In [16]:
%matplotlib inline
from memory_profiler import memory_usage
import os
import pandas as pd
from glob import glob
import numpy as np

In [17]:
import librosa
import librosa.display
import pylab
import matplotlib.pyplot as plt
from matplotlib import figure
import gc
from path import Path


In [3]:
def create_spectrogram(filename,name):
    plt.interactive(False)
    clip, sample_rate = librosa.load(filename, sr=None)
    fig = plt.figure(figsize=[0.72,0.72])
    ax = fig.add_subplot(111)
    ax.axes.get_xaxis().set_visible(False)
    ax.axes.get_yaxis().set_visible(False)
    ax.set_frame_on(False)
    S = librosa.feature.melspectrogram(y=clip, sr=sample_rate)
    librosa.display.specshow(librosa.power_to_db(S, ref=np.max))
    filename  = 'data/train/img/' + name + '.jpg'
    plt.savefig(filename, dpi=400, bbox_inches='tight',pad_inches=0)
    plt.close()    
    fig.clf()
    plt.close(fig)
    plt.close('all')
    del filename,name,clip,sample_rate,fig,ax,S

In [45]:
def create_spectrogram_test(filename,name):
    plt.interactive(False)
    clip, sample_rate = librosa.load(filename, sr=None)
    fig = plt.figure(figsize=[0.72,0.72])
    ax = fig.add_subplot(111)
    ax.axes.get_xaxis().set_visible(False)
    ax.axes.get_yaxis().set_visible(False)
    ax.set_frame_on(False)
    S = librosa.feature.melspectrogram(y=clip, sr=sample_rate)
    librosa.display.specshow(librosa.power_to_db(S, ref=np.max))
    filename  = Path('data/test/img/' + name + '.jpg')
    fig.savefig(filename, dpi=400, bbox_inches='tight',pad_inches=0)
    plt.close()    
    fig.clf()
    plt.close(fig)
    plt.close('all')
    del filename,name,clip,sample_rate,fig,ax,S

In [5]:
Data_dir=np.array(glob("data/train/Train/4*"))
i=0
for file in Data_dir[i:i+2000]:
    #Define the filename as is, "name" refers to the JPG, and is split off into the number itself. 
    filename,name = file,file.split('/')[-1].split('.')[0]
    create_spectrogram(filename,name)
gc.collect()

i=2000
for file in Data_dir[i:i+2000]:
    filename,name = file,file.split('/')[-1].split('.')[0]
    create_spectrogram(filename,name)
gc.collect()

i=4000
for file in Data_dir[i:]:
    filename,name = file,file.split('/')[-1].split('.')[0]
    create_spectrogram(filename,name)
gc.collect()

0

In [46]:
Test_dir=np.array(glob("data/test/Test/4*"))
i=0
for file in Test_dir[i:i+1500]:
    filename,name = file,file.split('/')[-1].split('.')[0]
    create_spectrogram_test(filename,name)
gc.collect()
i=1500
for file in Test_dir[i:]:
    filename,name = file,file.split('/')[-1].split('.')[0]
    create_spectrogram_test(filename,name)
gc.collect()



0

In [87]:
data=pd.read_csv('data/train/train.csv',dtype=str)
data['filter'] = data['ID'].str[0]
data = data[data['filter'] == '4'][['ID', 'Class']]
data['ClassID'] = pd.factorize(data.Class)[0]
data.head()

Unnamed: 0,ID,Class,ClassID
4,4,dog_bark,0
24,40,jackhammer,1
25,42,engine_idling,2
26,43,siren,3
27,44,children_playing,4


In [88]:
from sklearn.model_selection import train_test_split

data_train, data_test = train_test_split(data, random_state = 1)


In [143]:
import torch
import torchvision
from PIL import Image
from torch.utils.data import DataLoader


class MyDataset(torch.utils.data.Dataset):
    def __init__(self, dataframe, path):
        self.dataframe = dataframe
        self.path = path

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, index):
        row = self.dataframe.iloc[index]
        im = Image.open(f'{self.path}/{row["ID"]}.jpg')
        im.thumbnail((64,64))
        return (
            torchvision.transforms.functional.to_tensor(im),
            row["ClassID"],
        )

train_dataset = MyDataset(data_train, path='data/train/img/')
test_dataset = MyDataset(data_test, path='data/train/img/')

In [144]:
i = Image.open(f'data/train/img/4.jpg')
i.thumbnail((64,64))
i.size

(64, 62)

In [145]:
dataloader_train = DataLoader(train_dataset, batch_size=64, shuffle=True)
dataloader_test = DataLoader(test_dataset, batch_size=64, shuffle=False)


## Feature Extraction and Database Building


In [146]:
from torchvision import datasets, transforms, models
import torch
from torch import nn


In [147]:
model = models.resnet18(pretrained=True)

In [148]:
# Freeze model weights
for param in model.parameters():
    param.requires_grad = False

In [149]:
vgg16 = models.vgg16(pretrained=True)
# Freeze VGG weights
for param in vgg16.parameters():
    param.requires_grad = False

In [163]:
classifier = nn.Sequential(
        nn.Linear(512, 64),
        nn.ReLU(),
        nn.Linear(64, len(data.ClassID.unique())),
        nn.Softmax(dim=1)
    )

In [164]:
model.fc = classifier

In [165]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [166]:
model.to(device)


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [171]:
from datetime import datetime
from sklearn.utils import shuffle

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

def batch_gd(model, criterion, optimizer, dataloader_train, dataloader_test, epochs):

        train_losses = np.zeros(epochs)
        test_losses = np.zeros(epochs)

        for it in range(epochs):
            t0 = datetime.now()
            train_loss = []

            # Shuffle each epoch
            # X_train, Y_train = shuffle(*train_loader)
            for inputs, targets in dataloader_train:
                # targets = targets.view(-1, 1).float()

                # print(inputs.shape, targets.shape)

                # to device
                inputs, targets = inputs.to(device), targets.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # Forward pass
                outputs = model(inputs)
                loss = criterion(outputs, targets)

                # Backward and optimize
                loss.backward()
                optimizer.step()

                train_loss.append(loss.item())

            # Get train loss and test loss
            train_loss = np.mean(train_loss)

            test_loss = []
            for inputs, targets in dataloader_test:

                # to device
                inputs, targets = inputs.to(device), targets.to(device)
                
                outputs = model(inputs)
                loss = criterion(outputs, targets)
                test_loss.append(loss.item())
            test_loss = np.mean(test_loss)

            # Save losses
            train_losses[it] = train_loss
            test_losses[it] = test_loss

            dt = datetime.now() - t0
            print(f'Epoch {it+1}/{epochs}, Train Loss: {train_loss:.4f}, '
                    f'Test Loss: {test_loss:.4f}, Duration: {dt}')

        return train_losses, test_losses

In [172]:
train_losses, test_losses = batch_gd(model, criterion, optimizer, 
                                    dataloader_train=dataloader_train, dataloader_test=dataloader_test, epochs=1)

Epoch 1/1, Train Loss: 2.1447, Test Loss: 2.1389, Duration: 0:00:03.322276
