In [1]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
from torch.autograd import Variable

In [2]:
from torch.utils.data import Dataset, DataLoader

In [3]:
import pandas as pd
import pickle

In [4]:
import torch.utils.data as utils

In [5]:
import numpy as np

In [6]:
from sklearn.preprocessing import LabelEncoder


In [7]:
from sklearn.model_selection import train_test_split

### STEP 2: LOADING DATASET 

In [8]:
class FreeSoundDataset(Dataset):
    """ FreeSound dataset."""

    # Initialize your data, download, etc.
    def __init__(self, X, y):
        
        self.len = X.shape[0]
        self.x_data = torch.from_numpy(X)
        self.y_data = torch.from_numpy(y)

    def __getitem__(self, index):
        return (self.x_data[index], self.y_data[index])

    def __len__(self):
        return self.len

In [9]:
X = np.load('../data/processed/mel/train_noisy_mel128_len600.npy')

In [10]:
X = X[:, : ,:128]

In [11]:
X.shape

(19815, 128, 128)

In [12]:
labels = pd.read_csv('../data/processed/train_noisy.csv',sep=';')['labels']

In [13]:
labels = [l.split(',')[0] for l in labels]

In [14]:
le = LabelEncoder()
le.fit(labels)

LabelEncoder()

In [15]:
target = le.transform(labels) 

In [16]:
np.min(pd.unique(target))

0

In [17]:
X_train, X_test, y_train, y_test=train_test_split(X, target, test_size=0.3, random_state=47, stratify=target)

In [18]:
print('X_train:', X_train.shape)

print('X_test:', X_test.shape)

print('y_train:', y_train.shape)
print('y_test:', y_test.shape)

X_train: (13870, 128, 128)
X_test: (5945, 128, 128)
y_train: (13870,)
y_test: (5945,)


In [19]:
train_dataset = FreeSoundDataset(X_train, y_train)

In [20]:
test_dataset = FreeSoundDataset(X_test, y_test)

### STEP 2: MAKING DATASET ITERABLE

In [21]:
batch_size = 32
n_iters = 30000
num_epochs = n_iters / (len(train_dataset) / batch_size)
num_epochs = int(num_epochs)
num_epochs #= 100

69

In [22]:
transformations = transforms.Compose([transforms.ToTensor()])

In [23]:
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size= batch_size, 
                                           shuffle=True)

In [24]:
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
                                          batch_size=batch_size, 
                                          shuffle=False)

### STEP 3: CREATE MODEL CLASS

In [25]:
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        
        # Convolution 1
        self.cnn1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.batchnorm1 = nn.BatchNorm2d(32)
        self.relu1 = nn.ReLU()

        # # Max pool 1
        self.maxpool1 = nn.MaxPool2d(kernel_size=2)



        # Convolution 2
        self.cnn2 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.batchnorm2 = nn.BatchNorm2d(32)
        self.relu2 = nn.ReLU()

        # # Max pool 2
        self.maxpool2 = nn.MaxPool2d(kernel_size=2)



        # Convolution 3
        self.cnn3 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.batchnorm3 = nn.BatchNorm2d(32)
        self.relu3 = nn.ReLU()

        # # Max pool 3
        self.maxpool3 = nn.MaxPool2d(kernel_size=2)



        # Convolution 4
        self.cnn4 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.batchnorm4 = nn.BatchNorm2d(32)
        self.relu4 = nn.ReLU()

        # # Max pool 4
        self.maxpool4 = nn.MaxPool2d(kernel_size=2)



        

        # Fully connected 1 (readout)
        self.fc1 = nn.Linear(32 * 8 * 8, 80) 

        
    
    def forward(self, x):
        # Convolution 1
        out = self.cnn1(x.float())
        out = self.batchnorm1(out)
        out = self.relu1(out)

        # Max pool 1
        out = self.maxpool1(out)


        # Convolution 2
        out = self.cnn2(out)
        out = self.batchnorm2(out)
        out = self.relu2(out)

        # Max pool 2
        out = self.maxpool2(out)



        # Convolution 3
        out = self.cnn3(out)
        out = self.batchnorm3(out)
        out = self.relu3(out)

        # Max pool 3
        out = self.maxpool3(out)



        # Convolution 4
        out = self.cnn4(out)
        out = self.batchnorm4(out)
        out = self.relu4(out)

        # Max pool 4
        out = self.maxpool4(out)

  

        # Dropout 1
        #out = self.dropout(out)

                     
        # Resize
        # Original size: (100, 32, 7, 7)
        # out.size(0): 100
        # New out size: (100, 32*7*7)
        out = out.view(out.size(0), -1)

        # Linear function (readout)
        out = self.fc1(out)
        
        return out

### STEP 4: INSTANTIATE MODEL CLASS

In [26]:
model = CNNModel()

In [27]:
#######################
#  USE GPU FOR MODEL  #
#######################

if torch.cuda.is_available():
    model.cuda()

### STEP 5: INSTANTIATE LOSS CLASS

In [28]:
criterion = nn.CrossEntropyLoss()

### STEP 6: INSTANTIATE OPTIMIZER CLASS

In [29]:
learning_rate = 0.01

optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

### STEP 7: TRAIN THE MODEL

In [30]:
niter = 0
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        
        #######################
        #  USE GPU FOR MODEL  #
        #######################
        if torch.cuda.is_available():
            images = Variable(images.unsqueeze(1).cuda())
            labels = Variable(labels.cuda())
        else:
            images = Variable(images.unsqueeze(1))
            labels = Variable(labels)
        
        # Clear gradients w.r.t. parameters
        optimizer.zero_grad()
        
        # Forward pass to get output/logits
        #images = images.unsqueeze(1).type(torch.FloatTensor).cuda()
        outputs = model(images)
        
        # Calculate Loss: softmax --> cross entropy loss
        loss = criterion(outputs, labels)
        
        # Getting gradients w.r.t. parameters
        loss.backward()
        
        # Updating parameters
        optimizer.step()
        
        niter += 1
        
        if niter % 500 == 0:
            # Calculate Accuracy         
            correct = 0
            total = 0
            # Iterate through test dataset
            for images, labels in test_loader:
                #######################
                #  USE GPU FOR MODEL  #
                #######################
                if torch.cuda.is_available():
                    images = Variable(images.unsqueeze(1).cuda())
                else:
                    images = Variable(images.unsqueeze(1))
                
                # Forward pass only to get logits/output
                outputs = model(images)
                
                # Get predictions from the maximum value
                _, predicted = torch.max(outputs.data, 1)
                
                # Total number of labels
                total += labels.size(0)
                
                #######################
                #  USE GPU FOR MODEL  #
                #######################
                # Total correct predictions
                if torch.cuda.is_available():
                    correct += (predicted.cpu() == labels.cpu()).sum()
                else:
                    correct += (predicted == labels).sum()
            
            accuracy = 100 * correct / total
            
            # Print Loss
            print('Iteration: {}. Loss: {}. Accuracy: {}'.format(niter, loss.data, accuracy))

Iteration: 500. Loss: 3.4403905868530273. Accuracy: 15
Iteration: 1000. Loss: 2.945664167404175. Accuracy: 18
Iteration: 1500. Loss: 3.063861846923828. Accuracy: 20
Iteration: 2000. Loss: 2.6702938079833984. Accuracy: 22
Iteration: 2500. Loss: 2.928973913192749. Accuracy: 23
Iteration: 3000. Loss: 2.990509510040283. Accuracy: 23
Iteration: 3500. Loss: 2.329326868057251. Accuracy: 25
Iteration: 4000. Loss: 2.255774736404419. Accuracy: 25
Iteration: 4500. Loss: 2.3030154705047607. Accuracy: 25
Iteration: 5000. Loss: 2.393705368041992. Accuracy: 25
Iteration: 5500. Loss: 1.5665842294692993. Accuracy: 25
Iteration: 6000. Loss: 1.9094985723495483. Accuracy: 25
Iteration: 6500. Loss: 1.5605496168136597. Accuracy: 25
Iteration: 7000. Loss: 1.4165098667144775. Accuracy: 24
Iteration: 7500. Loss: 1.6259392499923706. Accuracy: 24
Iteration: 8000. Loss: 1.0495846271514893. Accuracy: 23
Iteration: 8500. Loss: 1.20859956741333. Accuracy: 23
Iteration: 9000. Loss: 0.7457762360572815. Accuracy: 23
It