In [1]:
import sys
sys.path.insert(0, '../')

import pandas as pd
from sklearn import metrics
#from keras.wrappers.scikit_learn import KerasClassifier
from classification_plots import plot_confusion_matrix
import matplotlib.pyplot as plt
import os
from collections import Counter
%matplotlib notebook

In [4]:
os.environ["CUDA_VISIBLE_DEVICES"]="0"
import warnings
warnings.filterwarnings('ignore')

In [5]:
audio_dir = '../../ESC-50/audio/'
path_to_db='../../ESC-50/'

In [6]:
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):
    def __init__(self,):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, 9)
        self.pool = nn.MaxPool2d(30, 30)
        self.fc1 = nn.Linear(896, 128)
        self.dropout = nn.Dropout2d(p=0.2)
#         self.fc2 = nn.Linear(256, 128)
#         self.fc3 = nn.Linear(128,56)
        self.fc4 = nn.Linear(128,50)
        self.softmax = nn.Softmax()
        self.batchnorm = nn.BatchNorm1d(128)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.pool(x)
        x = x.view(x.shape[0], -1)
        x = self.fc1(x)
        x = self.batchnorm(x)
        x = F.relu(x)
#         x = self.fc2(x)
#         x = F.relu(x)
#         x = self.dropout(x)
#         x = self.fc3(x)
#         x = F.relu(x)
#         x = self.dropout(x)
        x = self.fc4(x)
        x = self.softmax(x)
        return x
    
net = Net()
print(net)
net.cuda()

Net(
  (conv1): Conv2d(1, 16, kernel_size=(9, 9), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=30, stride=30, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=896, out_features=128, bias=True)
  (dropout): Dropout2d(p=0.2, inplace=False)
  (fc4): Linear(in_features=128, out_features=50, bias=True)
  (softmax): Softmax(dim=None)
  (batchnorm): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)


Net(
  (conv1): Conv2d(1, 16, kernel_size=(9, 9), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=30, stride=30, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=896, out_features=128, bias=True)
  (dropout): Dropout2d(p=0.2, inplace=False)
  (fc4): Linear(in_features=128, out_features=50, bias=True)
  (softmax): Softmax(dim=None)
  (batchnorm): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)

In [7]:
# load data:
from PIL import Image
import os 
import numpy as np
from torch.utils.data import Dataset, DataLoader

class trainset(Dataset):
    def __init__(self):
        
        root = "./data2/"
        self.data_list = []
        self.label_list = []
        for root, dir, files in os.walk("./data2/"):
            for file in files:
                if file.find('.npy')!= -1:
#                     print(os.path.join(root, file))
#                     print(int(root[13:]))
                    self.data_list.append(os.path.join(root, file))
                    self.label_list.append(int(root[13:]))
        print(len(self.data_list), len(self.label_list))
        
    def __len__(self):
        return len(self.data_list)

    def __getitem__(self, index):

        path = self.data_list[index]
        data = np.load(path,allow_pickle = True)
        data = np.expand_dims(data, axis=0)
        label= self.label_list[index] 

        label = np.asarray(label)
        
        return data, label
dataloader = trainset()
data,label = dataloader.__getitem__(5)
print(data.shape)

2000 2000
(1, 128, 431)


In [10]:
import torch.optim as optim
import torch
from torch.utils.data.sampler import SubsetRandomSampler

device = 0
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

validation_split = .2
random_seed= 42
shuffle_dataset = True
dataset_size = 2000
indices = list(range(dataset_size))
split = int(np.floor(validation_split * dataset_size))
if shuffle_dataset :
    np.random.seed(random_seed)
    np.random.shuffle(indices)
train_indices, val_indices = indices[split:], indices[:split]

# Creating PT data samplers and loaders:
train_sampler = SubsetRandomSampler(train_indices)
valid_sampler = SubsetRandomSampler(val_indices)

dataset  = trainset()
trainloader = torch.utils.data.DataLoader(dataset, batch_size=4, 
                                           sampler=train_sampler)
valloader = torch.utils.data.DataLoader(dataset, batch_size=4,
                                                sampler=valid_sampler)


for epoch in range(400):  # loop over the dataset multiple times
    
    count = 0
    running_accuracy = 0
    
        
    running_loss = 0.0
    count = 0
    training_accuracy = 0
    ######## training Stage ###########
    for i, data in enumerate(trainloader, 0):
        count += 1
        inputs, labels = data
        labels = labels.long().to(device)
        inputs = inputs.float().to(device)
        optimizer.zero_grad()

        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, preds = torch.max(outputs, 1)
        acc_train = torch.eq(preds, labels).float().mean()
        running_accuracy += acc_train.item()
        if i % 100 == 99:
            print(epoch, i, running_loss/100)
            running_loss = 0
        
    epoch_loss = running_loss / count
    print("epoch loss:", epoch, epoch_loss)
    print("train_accuracy:", running_accuracy/count)
    
# #             torch.save(net, 'toy_model.pt')
        
    
    print('Finished Training')

###### Validation Stage ######

    for i, data in enumerate(valloader, 0):
        count += 1
        inputs, labels = data
        inputs = inputs.float().to(device)
        labels = labels.long().to(device)
        outputs = net(inputs)
        _, preds = torch.max(outputs, 1)
        acc_val = torch.eq(preds, labels).float().mean()
        running_accuracy += acc_val.item()
        
    print("val_accuracy:", running_accuracy/count)
    



2000 2000
0 99 3.9126836681365966
0 199 3.911608214378357
0 299 3.9114722442626952
0 399 3.911874539852142
epoch loss: 0 0.0
train_accuracy: 0.020625
Finished Training
val_accuracy: 0.0215
1 99 3.909906015396118
1 199 3.910085299015045
1 299 3.908716344833374
1 399 3.9092970848083497
epoch loss: 1 0.0
train_accuracy: 0.045
Finished Training
val_accuracy: 0.044
2 99 3.9083939671516417
2 199 3.908027322292328
2 299 3.907524735927582
2 399 3.906241910457611
epoch loss: 2 0.0
train_accuracy: 0.056875
Finished Training
val_accuracy: 0.0555
3 99 3.9069862365722656
3 199 3.9059037733078004
3 299 3.905198118686676
3 399 3.906476275920868
epoch loss: 3 0.0
train_accuracy: 0.045625
Finished Training
val_accuracy: 0.042
4 99 3.9043344116210936
4 199 3.905897152423859
4 299 3.9031568431854247
4 399 3.9019855451583862
epoch loss: 4 0.0
train_accuracy: 0.063125
Finished Training
val_accuracy: 0.0595
5 99 3.9011924982070925
5 199 3.899320373535156
5 299 3.9031123447418214
5 399 3.8982022857666014
epo

45 99 3.829937169551849
45 199 3.828126006126404
45 299 3.818779559135437
45 399 3.810053870677948
epoch loss: 45 0.0
train_accuracy: 0.13125
Finished Training
val_accuracy: 0.125
46 99 3.8296637988090514
46 199 3.812653610706329
46 299 3.8057445001602175
46 399 3.808419418334961
epoch loss: 46 0.0
train_accuracy: 0.13625
Finished Training
val_accuracy: 0.1325
47 99 3.7923438429832457
47 199 3.8016586232185365
47 299 3.818778004646301
47 399 3.8132903933525086
epoch loss: 47 0.0
train_accuracy: 0.1425
Finished Training
val_accuracy: 0.133
48 99 3.8210325813293458
48 199 3.8019776129722596
48 299 3.8196233677864075
48 399 3.8251227951049804
epoch loss: 48 0.0
train_accuracy: 0.1325
Finished Training
val_accuracy: 0.128
49 99 3.8179404020309446
49 199 3.816905198097229
49 299 3.8203495359420776
49 399 3.8046286606788637
epoch loss: 49 0.0
train_accuracy: 0.13375
Finished Training
val_accuracy: 0.1305
50 99 3.8297224187850953
50 199 3.8042153573036193
50 299 3.8049029302597046
50 399 3.80

val_accuracy: 0.1815
90 99 3.7580912637710573
90 199 3.7624981927871706
90 299 3.7332367658615113
90 399 3.7801656913757324
epoch loss: 90 0.0
train_accuracy: 0.19
Finished Training
val_accuracy: 0.1785
91 99 3.759989125728607
91 199 3.7358604764938352
91 299 3.774722454547882
91 399 3.757681484222412
epoch loss: 91 0.0
train_accuracy: 0.191875
Finished Training
val_accuracy: 0.177
92 99 3.7289269161224365
92 199 3.7527594137191773
92 299 3.7909646320343016
92 399 3.7558555793762207
epoch loss: 92 0.0
train_accuracy: 0.188125
Finished Training
val_accuracy: 0.177
93 99 3.747603006362915
93 199 3.77869651556015
93 299 3.7635216164588927
93 399 3.7282878088951112
epoch loss: 93 0.0
train_accuracy: 0.193125
Finished Training
val_accuracy: 0.181
94 99 3.712419381141663
94 199 3.7672717118263246
94 299 3.7609370660781862
94 399 3.7918792510032655
epoch loss: 94 0.0
train_accuracy: 0.191875
Finished Training
val_accuracy: 0.177
95 99 3.7617301297187806
95 199 3.7389398527145388
95 299 3.7449

Finished Training
val_accuracy: 0.2215
134 99 3.700645534992218
134 199 3.7280182099342345
134 299 3.719328558444977
134 399 3.6959839510917663
epoch loss: 134 0.0
train_accuracy: 0.24125
Finished Training
val_accuracy: 0.221
135 99 3.729184422492981
135 199 3.714910140037537
135 299 3.7180245995521544
135 399 3.677668421268463
epoch loss: 135 0.0
train_accuracy: 0.24375
Finished Training
val_accuracy: 0.224
136 99 3.714381637573242
136 199 3.677514293193817
136 299 3.724837672710419
136 399 3.7271235585212708
epoch loss: 136 0.0
train_accuracy: 0.24375
Finished Training
val_accuracy: 0.225
137 99 3.7048048520088197
137 199 3.690313982963562
137 299 3.7353793239593505
137 399 3.7161522197723387
epoch loss: 137 0.0
train_accuracy: 0.244375
Finished Training
val_accuracy: 0.2245
138 99 3.694083592891693
138 199 3.709617955684662
138 299 3.6910609197616577
138 399 3.749066934585571
epoch loss: 138 0.0
train_accuracy: 0.243125
Finished Training
val_accuracy: 0.2255
139 99 3.697627956867218

177 399 3.6446431493759155
epoch loss: 177 0.0
train_accuracy: 0.286875
Finished Training
val_accuracy: 0.265
178 99 3.637444155216217
178 199 3.682978835105896
178 299 3.659292824268341
178 399 3.6578661584854126
epoch loss: 178 0.0
train_accuracy: 0.293125
Finished Training
val_accuracy: 0.266
179 99 3.64982768535614
179 199 3.658708291053772
179 299 3.6640164256095886
179 399 3.662013342380524
epoch loss: 179 0.0
train_accuracy: 0.29625
Finished Training
val_accuracy: 0.2705
180 99 3.665303966999054
180 199 3.632639715671539
180 299 3.6732580208778383
180 399 3.66772136926651
epoch loss: 180 0.0
train_accuracy: 0.29125
Finished Training
val_accuracy: 0.2665
181 99 3.664220275878906
181 199 3.679287734031677
181 299 3.6682836198806763
181 399 3.653530821800232
epoch loss: 181 0.0
train_accuracy: 0.283125
Finished Training
val_accuracy: 0.2605
182 99 3.65573269367218
182 199 3.699197006225586
182 299 3.635156686306
182 399 3.6636692070961
epoch loss: 182 0.0
train_accuracy: 0.289375
F

221 299 3.6259423732757567
221 399 3.6312841463088987
epoch loss: 221 0.0
train_accuracy: 0.32
Finished Training
val_accuracy: 0.2945
222 99 3.6407073831558225
222 199 3.6263163113594055
222 299 3.6513724374771117
222 399 3.605204174518585
epoch loss: 222 0.0
train_accuracy: 0.319375
Finished Training
val_accuracy: 0.291
223 99 3.632208034992218
223 199 3.630387864112854
223 299 3.6422479271888735
223 399 3.6408975458145143
epoch loss: 223 0.0
train_accuracy: 0.31625
Finished Training
val_accuracy: 0.2845
224 99 3.627582240104675
224 199 3.656919186115265
224 299 3.6025136065483094
224 399 3.647266173362732
epoch loss: 224 0.0
train_accuracy: 0.31875
Finished Training
val_accuracy: 0.2865
225 99 3.6314062905311584
225 199 3.6517954874038696
225 299 3.592480890750885
225 399 3.6608757185935974
epoch loss: 225 0.0
train_accuracy: 0.315625
Finished Training
val_accuracy: 0.284
226 99 3.641149892807007
226 199 3.6228583335876463
226 299 3.6283679366111756
226 399 3.629781143665314
epoch lo

val_accuracy: 0.342
265 99 3.5744801115989686
265 199 3.5750310754776002
265 299 3.581964945793152
265 399 3.5794244980812073
epoch loss: 265 0.0
train_accuracy: 0.383125
Finished Training
val_accuracy: 0.343
266 99 3.5866576480865477
266 199 3.5712431120872496
266 299 3.551328547000885
266 399 3.607775390148163
epoch loss: 266 0.0
train_accuracy: 0.38
Finished Training
val_accuracy: 0.344
267 99 3.5738171696662904
267 199 3.5539057445526123
267 299 3.6381610822677612
267 399 3.5773016238212585
epoch loss: 267 0.0
train_accuracy: 0.375
Finished Training
val_accuracy: 0.341
268 99 3.589512038230896
268 199 3.600403439998627
268 299 3.5511984062194824
268 399 3.565739233493805
epoch loss: 268 0.0
train_accuracy: 0.383125
Finished Training
val_accuracy: 0.345
269 99 3.5833947896957397
269 199 3.5596247124671936
269 299 3.597479066848755
269 399 3.5818361473083495
epoch loss: 269 0.0
train_accuracy: 0.378125
Finished Training
val_accuracy: 0.34
270 99 3.569880781173706
270 199 3.5724836015

epoch loss: 308 0.0
train_accuracy: 0.41625
Finished Training
val_accuracy: 0.3715
309 99 3.5315383195877077
309 199 3.5399331498146056
309 299 3.5195218443870546
309 399 3.548793542385101
epoch loss: 309 0.0
train_accuracy: 0.4225
Finished Training
val_accuracy: 0.3745
310 99 3.5372847652435304
310 199 3.52645890712738
310 299 3.5137377619743346
310 399 3.5700231432914733
epoch loss: 310 0.0
train_accuracy: 0.4225
Finished Training
val_accuracy: 0.3765
311 99 3.556287496089935
311 199 3.539217014312744
311 299 3.540986816883087
311 399 3.5173716855049135
epoch loss: 311 0.0
train_accuracy: 0.420625
Finished Training
val_accuracy: 0.375
312 99 3.531867687702179
312 199 3.4844931149482727
312 299 3.5669665789604186
312 399 3.543639919757843
epoch loss: 312 0.0
train_accuracy: 0.42625
Finished Training
val_accuracy: 0.379
313 99 3.4923355865478514
313 199 3.564540705680847
313 299 3.5617724609375
313 399 3.5285778617858887
epoch loss: 313 0.0
train_accuracy: 0.42125
Finished Training
val

352 99 3.465036118030548
352 199 3.547602574825287
352 299 3.487342736721039
352 399 3.5320949792861938
epoch loss: 352 0.0
train_accuracy: 0.449375
Finished Training
val_accuracy: 0.396
353 99 3.5001085090637205
353 199 3.4996259999275208
353 299 3.509457449913025
353 399 3.5050599646568297
epoch loss: 353 0.0
train_accuracy: 0.4525
Finished Training
val_accuracy: 0.4
354 99 3.494653105735779
354 199 3.525382204055786
354 299 3.4999293446540833
354 399 3.514388325214386
epoch loss: 354 0.0
train_accuracy: 0.450625
Finished Training
val_accuracy: 0.398
355 99 3.4999435186386108
355 199 3.5097471189498903
355 299 3.472438061237335
355 399 3.53710853099823
epoch loss: 355 0.0
train_accuracy: 0.454375
Finished Training
val_accuracy: 0.3945
356 99 3.5167026495933533
356 199 3.504045877456665
356 299 3.50087553024292
356 399 3.4910637879371644
epoch loss: 356 0.0
train_accuracy: 0.4575
Finished Training
val_accuracy: 0.3995
357 99 3.501480402946472
357 199 3.511915352344513
357 299 3.481126

train_accuracy: 0.459375
Finished Training
val_accuracy: 0.405
396 99 3.5004845905303954
396 199 3.4784505462646482
396 299 3.50116250038147
396 399 3.509494228363037
epoch loss: 396 0.0
train_accuracy: 0.459375
Finished Training
val_accuracy: 0.404
397 99 3.5206987643241883
397 199 3.5025091934204102
397 299 3.4739237332344057
397 399 3.4911317563056947
epoch loss: 397 0.0
train_accuracy: 0.45875
Finished Training
val_accuracy: 0.4045
398 99 3.4804033064842224
398 199 3.507090208530426
398 299 3.5471406841278075
398 399 3.4571877980232237
epoch loss: 398 0.0
train_accuracy: 0.46
Finished Training
val_accuracy: 0.4035
399 99 3.477029824256897
399 199 3.4819984817504883
399 299 3.529888319969177
399 399 3.471824758052826
epoch loss: 399 0.0
train_accuracy: 0.46875
Finished Training
val_accuracy: 0.41
