In [1]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.models as models
from torch.utils.data import Dataset,DataLoader
import csv
import numpy as np
import librosa
import librosa.display
from utils_train import train, test, fit


np.random.seed(123)
learning_rate = 0.005
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")

In [3]:
def csvToAudioList(filename,sourceDir):
    dataList = []
    with open(filename, "rt") as csvfile:
        lines = csv.reader(csvfile)
        dataList = list(lines)
        dataList.pop(0)
        #delete
        #dataList = dataList[1800:2000]
        #

        audDataset = []
        labelDataset = []
        print(len(dataList))
        for x in dataList:
            audData, freq = librosa.load(sourceDir + x[5] +"/"+ x[0])
            if(len(audData) != 88200):
                audData = fillWithZeros(audData)
            audDataset.append(audData)
            labelDataset.append(labelTrans(x[7]))
        print("Finished")

    return audDataset, labelDataset

def fillWithZeros(audData):
    if(len(audData) < 88200):
        return np.append(audData,np.zeros((88200-len(audData),1),dtype=np.float32))
    else: #One dataset is longer
        audData = audData[:88200]
        return audData


    return audData

def labelTrans(labelString):
    if(labelString == 'siren'):
        return 0
    elif(labelString == 'street_music'):
        return 1
    elif (labelString == 'drilling'):
        return 2
    elif (labelString == 'dog_bark'):
        return 3
    elif (labelString == 'children_playing'):
        return 4
    elif (labelString == 'gun_shot'):
        return 5
    elif (labelString == 'engine_idling'):
        return 6
    elif (labelString == 'air_conditioner'):
        return 7
    elif (labelString == 'jackhammer'):
        return 8
    elif (labelString == 'car_horn'):
        return 9



audList,labelList = csvToAudioList('C:/Users/manud/Documents/atml19/UrbanSound8K.csv','C:/Users/manud/Documents/atml19/audio/fold')

8732
Finished


In [4]:
class AudioDataset(Dataset):
    def __init__(self, data_audio, data_label):

        self.data_set = np.array(data_audio)
        self.data_label1 = np.array(data_label)

    def __len__(self):
        return len(self.data_set)

    def __getitem__(self, index):
        data_entry = self.data_set[index]
        data_entry = torch.from_numpy(data_entry).reshape(4,int(len(self.data_set[index])/4))
        data_lab = torch.from_numpy(np.array([self.data_label1[index]]))

        return data_entry, data_lab.long()


split_refList = int(len(audList)*0.8)
split_refListSec = int(len(audList)*0.9)
train_audList, val_audList,test_audList = audList[:split_refList], audList[split_refList:split_refListSec], audList[split_refListSec:]
train_labelList, val_labelList,test_labelList = labelList[:split_refList], labelList[split_refList:split_refListSec], labelList[split_refListSec:]


In [5]:
class SimpleConvNet(nn.Module):

    def __init__(self):
        super(SimpleConvNet, self).__init__()
           
        self.conv_layer1 = nn.Sequential(
            nn.Conv1d(in_channels=4, out_channels=16, kernel_size=1, stride=1, padding=1),
            nn.ReLU(),
        )
        self.conv_layer2 = nn.Sequential(
            nn.Conv1d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1),
            nn.ReLU()
        )

        self.conv_layer3 = nn.Sequential(
            nn.Conv1d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.AdaptiveAvgPool1d((1))
        )

        self.linear_layer = nn.Sequential(
            nn.Linear(64, 10)
        )

    def forward(self, input):
        output = self.conv_layer1(input)
        output = self.conv_layer2(output)
        output = self.conv_layer3(output)
        
        output = output.view(input.size(0), -1)
        output = self.linear_layer(output)
        return output


In [6]:
class ConvNet(nn.Module):

    def __init__(self):
        super(ConvNet, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv1d(in_channels=4, out_channels=16, kernel_size=1, stride=1, padding=1),
            nn.ReLU(),
            nn.BatchNorm1d(16),
            nn.MaxPool1d(kernel_size=2, stride=2),

            nn.Conv1d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.BatchNorm1d(32),
            nn.MaxPool1d(kernel_size=2, stride=2),

            nn.Conv1d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.BatchNorm1d(64),
            nn.Conv1d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.BatchNorm1d(64),
            nn.MaxPool1d(kernel_size=2, stride=2),

            nn.Conv1d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.BatchNorm1d(64),
            nn.AdaptiveAvgPool1d(1)

        )
        self.linear_layer = nn.Sequential(
            nn.Linear(64, 10)
        )

    def forward(self, input):
        output = self.conv_layers(input)
        output = output.view(input.size(0), -1)
        output = self.linear_layer(output)
        return output


In [7]:
trainDataset = AudioDataset(train_audList,train_labelList)
valDataset = AudioDataset(val_audList,val_labelList)
testDataset = AudioDataset(test_audList,test_labelList)

train_dataloader = torch.utils.data.DataLoader(trainDataset, batch_size=32, shuffle=True, num_workers=0)
val_dataloader = torch.utils.data.DataLoader(valDataset, batch_size=32, num_workers=0)
test_dataloader = torch.utils.data.DataLoader(testDataset, batch_size=32, num_workers=0)



In [8]:
class AudioDatasetRes(Dataset):
    def __init__(self, data_audio, data_label):

        self.data_set = np.array(data_audio)
        self.data_label1 = np.array(data_label)

    def __len__(self):
        return len(self.data_set)

    def __getitem__(self, index):
        data_entry = self.data_set[index]
        data_entry = torch.from_numpy(data_entry).reshape(4,1,int(len(self.data_set[index])/4))
        data_lab = torch.from_numpy(np.array([self.data_label1[index]]))
        
        return data_entry, data_lab.long()


trainDatasetRes = AudioDatasetRes(train_audList,train_labelList)
valDatasetRes = AudioDatasetRes(val_audList,val_labelList)
testDatasetRes = AudioDatasetRes(test_audList,test_labelList)

train_dataloaderRes = torch.utils.data.DataLoader(trainDatasetRes, batch_size=32, shuffle=True, num_workers=0)
val_dataloaderRes = torch.utils.data.DataLoader(valDatasetRes, batch_size=32, num_workers=0)
test_dataloaderRes = torch.utils.data.DataLoader(testDatasetRes, batch_size=32, num_workers=0)


epochs = 8
RES = models.resnet18()

RES.conv1 = nn.Conv2d(4, 64, kernel_size=1, stride=2, padding=3,
                               bias=False)
optimizerRES = torch.optim.Adam(RES.parameters(), lr=learning_rate)
loss_fn = nn.CrossEntropyLoss()
scheduler = torch.optim.lr_scheduler.StepLR(optimizer=optimizerRES, step_size=10, gamma=0.1)
fit(train_dataloaderRes,val_dataloaderRes,RES,optimizerRES,loss_fn,epochs, scheduler )
testLoss, testAcc = test(RES, val_dataloaderRes, loss_fn)
print("The accuracy on the testdata is " + str(testAcc) + "%")


RuntimeError: CUDA error: unspecified launch failure

In [24]:
epochs = 25

SCN = SimpleConvNet()


optimizerSCN = torch.optim.Adam(SCN.parameters(), lr=learning_rate)
loss_fn = nn.CrossEntropyLoss()
scheduler = torch.optim.lr_scheduler.StepLR(optimizer=optimizerSCN, step_size=10, gamma=0.1)
fit(train_dataloader,val_dataloader,SCN,optimizerSCN,loss_fn,epochs, scheduler )
testLoss, testAcc = test(SCN, test_dataloader, loss_fn)
print("The accuracy on the testdata is " + str(testAcc) + "%")


Epoch 1/25: train_loss: 2.2253, train_accuracy: 15.6478, val_loss: 2.3583, val_accuracy: 18.5567
Epoch 2/25: train_loss: 2.0892, train_accuracy: 21.7037, val_loss: 2.3335, val_accuracy: 19.8167
Epoch 3/25: train_loss: 1.9038, train_accuracy: 27.4016, val_loss: 2.2046, val_accuracy: 27.9496
Epoch 4/25: train_loss: 1.8196, train_accuracy: 31.0523, val_loss: 2.2194, val_accuracy: 29.3242
Epoch 5/25: train_loss: 1.7189, train_accuracy: 35.6335, val_loss: 2.0602, val_accuracy: 28.6369
Epoch 6/25: train_loss: 1.6582, train_accuracy: 37.0079, val_loss: 2.0559, val_accuracy: 31.9588
Epoch 7/25: train_loss: 1.5943, train_accuracy: 39.8712, val_loss: 2.0587, val_accuracy: 32.5315
Epoch 8/25: train_loss: 1.5557, train_accuracy: 42.2047, val_loss: 2.1368, val_accuracy: 32.7606
Epoch 9/25: train_loss: 1.5290, train_accuracy: 43.0494, val_loss: 1.9926, val_accuracy: 35.9679
Epoch 10/25: train_loss: 1.4988, train_accuracy: 45.2684, val_loss: 2.1664, val_accuracy: 33.7915
Epoch 11/25: train_loss: 1.43

In [25]:
epochs = 25
CN = ConvNet()

optimizerCN = torch.optim.Adam(CN.parameters(), lr=learning_rate)
loss_fn = nn.CrossEntropyLoss()
scheduler = torch.optim.lr_scheduler.StepLR(optimizer=optimizerCN, step_size=10, gamma=0.1)
fit(train_dataloader,val_dataloader,CN,optimizerCN,loss_fn,epochs, scheduler )
testLoss, testAcc = test(CN, test_dataloader, loss_fn)
print("The accuracy on the testdata is " + str(testAcc) + "%")

Epoch 1/25: train_loss: 1.7334, train_accuracy: 35.6478, val_loss: 2.1843, val_accuracy: 31.3860
Epoch 2/25: train_loss: 1.5222, train_accuracy: 43.8082, val_loss: 2.8044, val_accuracy: 26.2314
Epoch 3/25: train_loss: 1.4250, train_accuracy: 47.1868, val_loss: 2.1841, val_accuracy: 34.9370
Epoch 4/25: train_loss: 1.3581, train_accuracy: 50.3794, val_loss: 2.6924, val_accuracy: 32.8751
Epoch 5/25: train_loss: 1.3083, train_accuracy: 52.4266, val_loss: 2.8161, val_accuracy: 32.3024
Epoch 6/25: train_loss: 1.2487, train_accuracy: 54.3880, val_loss: 2.2441, val_accuracy: 29.8969
Epoch 7/25: train_loss: 1.2161, train_accuracy: 56.6070, val_loss: 2.4473, val_accuracy: 35.5097
Epoch 8/25: train_loss: 1.1731, train_accuracy: 57.9671, val_loss: 2.1733, val_accuracy: 31.6151
Epoch 9/25: train_loss: 1.1420, train_accuracy: 59.3271, val_loss: 2.0919, val_accuracy: 36.8843
Epoch 10/25: train_loss: 1.1130, train_accuracy: 60.7015, val_loss: 2.6222, val_accuracy: 30.6987
Epoch 11/25: train_loss: 1.01