In [1]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
from torch.autograd import Variable

In [2]:
from torch.utils.data import Dataset, DataLoader

In [3]:
import pandas as pd
import pickle

In [4]:
import torch.utils.data as utils

In [5]:
import numpy as np

In [6]:
from sklearn.preprocessing import LabelEncoder


In [7]:
from sklearn.model_selection import train_test_split

In [8]:
import sklearn

### STEP 1: FUNCTIONS

In [9]:
class FreeSoundDataset(Dataset):
    """ FreeSound dataset."""

    # Initialize your data, download, etc.
    def __init__(self, X, y):
        
        self.len = X.shape[0]
        self.x_data = torch.from_numpy(X)
        self.y_data = torch.from_numpy(y)

    def __getitem__(self, index):
        return (self.x_data[index], self.y_data[index])

    def __len__(self):
        return self.len

In [10]:
class SubmitFreeSoundDataset(Dataset):
    """ FreeSound dataset."""

    # Initialize your data, download, etc.
    def __init__(self, X):
        
        self.len = X.shape[0]
        self.x_data = torch.from_numpy(X)

    def __getitem__(self, index):
        return (self.x_data[index])

    def __len__(self):
        return self.len

### STEP 2: LOADING DATASET 

In [11]:
X_train = np.load('../data/processed/mel/train_curated_mel128.npy')

In [12]:
X_test = np.load('../data/processed/mel/test_mel128_len200.npy')

In [13]:
X_train = X_train[:, : ,:128]

In [14]:
X_test = X_test[:, : ,:128]

In [15]:
y_train = np.load('../data/processed/y_onehotenc_train_curated.npy')

In [16]:
print('X_train:', X_train.shape)
print('X_test:', X_test.shape)
print('y_train:', y_train.shape)


X_train: (4970, 128, 128)
X_test: (1120, 128, 128)
y_train: (4970, 80)


In [17]:
train_dataset = FreeSoundDataset(X_train, y_train)

In [18]:
test_dataset = SubmitFreeSoundDataset(X_test)

### STEP 2: MAKING DATASET ITERABLE

In [19]:
batch_size = 32
n_iters = 1000
num_epochs = n_iters / (len(train_dataset) / batch_size)
num_epochs = int(num_epochs)
num_epochs

6

In [20]:
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size= batch_size, 
                                           shuffle=True)

In [21]:
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
                                          batch_size=batch_size, 
                                          shuffle=False)

### STEP 3: CREATE MODEL CLASS

In [22]:
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        
        # Convolution 1
        self.cnn1 = nn.Conv2d(in_channels=1, out_channels=200, kernel_size=3, stride=1, padding=1)
        self.batchnorm1 = nn.BatchNorm2d(200)
        self.relu1 = nn.ReLU()

        # # Max pool 1
        self.maxpool1 = nn.MaxPool2d(kernel_size=2)
        
        
        
        # Convolution 2
        self.cnn2 = nn.Conv2d(in_channels=200, out_channels=100, kernel_size=3, stride=1, padding=1)
        self.batchnorm2 = nn.BatchNorm2d(100)
        self.relu2 = nn.ReLU()

        # # Max pool 2
        self.maxpool2 = nn.MaxPool2d(kernel_size=2)



        # Convolution 3
        self.cnn3 = nn.Conv2d(in_channels=100, out_channels=100, kernel_size=3, stride=1, padding=1)
        self.batchnorm3 = nn.BatchNorm2d(100)
        self.relu3 = nn.ReLU()

        # # Max pool 3
        self.maxpool3 = nn.MaxPool2d(kernel_size=2)


        

        # Fully connected 1 (readout)
        self.fc1 = nn.Linear(100 * 16 * 16, 80) 

        
    
    def forward(self, x):
        # Convolution 1
        out = self.cnn1(x.float())
        out = self.batchnorm1(out)
        out = self.relu1(out)

        # Max pool 1
        out = self.maxpool1(out)



        # Convolution 2
        out = self.cnn2(out)
        out = self.batchnorm2(out)
        out = self.relu2(out)

        # Max pool 2
        out = self.maxpool2(out)



        # Convolution 3
        out = self.cnn3(out)
        out = self.batchnorm3(out)
        out = self.relu3(out)

        # Max pool 3
        out = self.maxpool3(out)


   

        # Dropout 1
        #out = self.dropout(out)

                     
        # Resize
        # Original size: (100, 32, 7, 7)
        # out.size(0): 100
        # New out size: (100, 32*7*7)
        out = out.view(out.size(0), -1)

        # Linear function (readout)
        out = self.fc1(out)
        
        return out

### STEP 4: INSTANTIATE MODEL CLASS

In [23]:
model = CNNModel()

In [24]:
#######################
#  USE GPU FOR MODEL  #
#######################

if torch.cuda.is_available():
    model.cuda()

### STEP 5: INSTANTIATE LOSS CLASS

In [25]:
criterion = nn.MultiLabelSoftMarginLoss()

### STEP 6: INSTANTIATE OPTIMIZER CLASS

In [26]:
learning_rate = 0.01
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

### STEP 7: TRAIN THE MODEL

In [27]:
niter = 0
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        
        #######################
        #  USE GPU FOR MODEL  #
        #######################
        if torch.cuda.is_available():
            images = Variable(images.unsqueeze(1).cuda())
            labels = Variable(labels.float().cuda())
        else:
            images = Variable(images.unsqueeze(1))
            labels = Variable(labels)
        
        # Clear gradients w.r.t. parameters
        optimizer.zero_grad()
        
        # Forward pass to get output/logits
        #images = images.unsqueeze(1).type(torch.FloatTensor).cuda()
        outputs = model(images)
        
        # Calculate Loss: softmax --> cross entropy loss
        loss = criterion(outputs, labels)
        
        # Getting gradients w.r.t. parameters
        loss.backward()
        
        # Updating parameters
        optimizer.step()
        
        niter += 1
        
        if niter % 500 == 0:     
            print('Iteration: {}. Loss: {}.  '.format(niter, loss.data, '\n'))

Iteration: 500. Loss: 0.04613245278596878.  


In [28]:
submit = []
for images in test_loader:
    #######################
    #  USE GPU FOR MODEL  #
    #######################
    if torch.cuda.is_available():
        images = Variable(images.unsqueeze(1).cuda())
    else:
        images = Variable(images.unsqueeze(1))

    # Forward pass only to get logits/output
    outputs = model(images)
    if len(submit):
        submit = np.concatenate((submit, outputs.cpu().detach().numpy()), axis=0) 
    else:
        submit = outputs.cpu().detach().numpy()
    

In [32]:
submit.shape

(1120, 80)

In [35]:
test = pd.read_csv('../data/raw/sample_submission.csv')

In [39]:
test.head()

Unnamed: 0,fname,Accelerating_and_revving_and_vroom,Accordion,Acoustic_guitar,Applause,Bark,Bass_drum,Bass_guitar,Bathtub_(filling_or_washing),Bicycle_bell,...,Toilet_flush,Traffic_noise_and_roadway_noise,Trickle_and_dribble,Walk_and_footsteps,Water_tap_and_faucet,Waves_and_surf,Whispering,Writing,Yell,Zipper_(clothing)
0,000ccb97.wav,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0012633b.wav,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,001ed5f1.wav,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,00294be0.wav,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,003fde7a.wav,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [44]:
submit_final = pd.DataFrame(submit)

In [47]:
submit_final.insert(0, 'fname', test['fname'].values, allow_duplicates = False)

In [40]:
submit_final['fname'] = test['fname']

In [51]:
submit_final.columns = test.columns

In [52]:
submit_final.to_csv('submission.csv', index=False)

Unnamed: 0,fname,Accelerating_and_revving_and_vroom,Accordion,Acoustic_guitar,Applause,Bark,Bass_drum,Bass_guitar,Bathtub_(filling_or_washing),Bicycle_bell,...,Toilet_flush,Traffic_noise_and_roadway_noise,Trickle_and_dribble,Walk_and_footsteps,Water_tap_and_faucet,Waves_and_surf,Whispering,Writing,Yell,Zipper_(clothing)
0,000ccb97.wav,-6.571311,-11.436894,-11.370918,-8.503457,-12.040048,-11.951573,-9.252275,-5.836163,-5.744610,...,-7.820275,-11.793205,-8.573426,-7.611128,-7.069292,-7.815620,-5.621717,-8.543371,-12.619022,-7.194542
1,0012633b.wav,-2.580047,-10.601191,-10.933067,-8.869737,-5.177559,-6.757670,-7.060132,-4.825543,-7.001179,...,-5.871586,-5.804954,-7.404846,-3.512025,-3.395714,-5.420729,-7.950903,-2.688932,-8.617435,-4.072239
2,001ed5f1.wav,-1.622008,-10.748981,-9.986337,-4.716881,-4.852257,-6.823735,-12.943916,-7.427147,-7.045144,...,-3.484841,-9.240122,-7.318274,-4.452039,-3.477637,-8.619655,-6.968554,-6.935326,-5.720869,-2.726440
3,00294be0.wav,-2.981328,-7.647376,-10.258298,-7.845852,-4.331080,-8.231956,-16.801146,-7.779273,-12.477813,...,-0.040343,-8.757894,-8.765834,-5.511958,-3.889632,-4.826019,-8.255265,-6.915016,-10.186332,-1.374347
4,003fde7a.wav,-10.726407,-8.663589,-12.787581,-12.197020,-14.213239,-8.950282,-10.947475,-8.565602,-4.800739,...,-11.440338,-16.991419,-6.169027,-12.964579,-3.856647,-12.621800,-7.152115,-9.068494,-8.391049,-6.208676
5,0040ccc9.wav,-6.897686,-9.363860,-5.387519,-9.110913,-11.705821,-2.607643,-4.205737,-7.485375,-13.296635,...,-5.731539,-6.602752,-13.639380,-7.790086,-6.058382,-8.159457,-4.994622,-4.757012,-11.392841,-9.662615
6,0046b732.wav,-7.210663,-9.587617,-5.634518,-9.587002,-6.699458,-3.684237,-8.121440,-6.339416,-10.919273,...,-9.184093,-5.291418,-7.784307,-7.030206,-6.224391,-7.491456,-1.554338,-5.298212,-8.928688,-6.237615
7,004f3bbc.wav,-2.658517,-13.147040,-7.090862,-14.041443,-10.757718,-8.466377,1.458254,-7.860855,-12.349713,...,-5.021905,-7.085345,-16.759562,-9.614057,-7.883971,-7.327021,-11.763622,-4.991548,-16.261557,-8.994558
8,00526050.wav,-6.715613,-10.259517,-10.426000,-8.265851,-12.255182,-0.351049,-8.025876,-6.667821,-7.655104,...,-8.091774,-8.066121,-10.878940,-7.359135,-6.523279,-8.838409,-6.178656,-7.028317,-10.535287,-7.852357
9,00559da4.wav,-2.798366,-11.834628,-10.573320,-6.727780,-5.741473,-9.632592,-14.401235,-5.196493,-8.097873,...,-5.128996,-8.064126,-6.199074,-2.735042,-3.103791,-4.366065,-5.355380,-4.235670,-9.461807,-3.099753
