In [1]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
from torch.autograd import Variable

In [7]:
from torch.utils.data import Dataset, DataLoader

In [2]:
import pandas as pd
import pickle

In [3]:
import torch.utils.data as utils

In [4]:
import numpy as np

In [5]:
from sklearn.preprocessing import LabelEncoder


In [21]:
from sklearn.model_selection import train_test_split

### STEP 2: LOADING DATASET 

In [25]:
class FreeSDataset(Dataset):
    """ FreeSound dataset."""

    # Initialize your data, download, etc.
    def __init__(self, X, y):
        
        self.len = X.shape[0]
        self.x_data = torch.from_numpy(X)
        self.y_data = torch.from_numpy(y)

    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]

    def __len__(self):
        return self.len

In [27]:
X = np.load('../data/processed/mfcc/train_curated_mfccs.npy')

In [28]:
labels = pd.read_csv('../data/processed/train_curated.csv',sep=';')['labels']

In [29]:
labels = [l.split(',')[0] for l in labels]

In [30]:
le = LabelEncoder()
le.fit(labels)

LabelEncoder()

In [31]:
target = le.transform(labels) 

In [32]:
X_train, X_test, y_train, y_test=train_test_split(X, target, test_size=0.3, random_state=42, stratify=target)

In [33]:
print('X_train:', X_train.shape)

print('X_test:', X_test.shape)

print('y_train:', y_train.shape)
print('y_test:', y_test.shape)

X_train: (3479, 40, 200)
X_test: (1491, 40, 200)
y_train: (3479,)
y_test: (1491,)


In [34]:
train_dataset = FreeSDataset(X_train, y_train)

In [35]:
test_dataset = FreeSDataset(X_test, y_test)

In [37]:
dataset = FreeSoundDataset()
train_loader = DataLoader(dataset=train_dataset,
                          batch_size=32,
                          shuffle=True,
                          num_workers=2)

for epoch in range(2):
    for i, data in enumerate(train_loader, 0):
        # get the inputs
        inputs, labels = data

        # wrap them in Variable
        inputs, labels = Variable(inputs), Variable(labels)

        # Run your training process
print(epoch, i, "inputs", inputs.data, "labels", labels.data)

1 108 inputs tensor([[[-2.5852, -2.2992, -1.9289,  ...,  0.0000,  0.0000,  0.0000],
         [-2.8464, -1.0585,  0.3342,  ...,  0.0000,  0.0000,  0.0000],
         [ 2.6828,  1.9489,  1.1033,  ...,  0.0000,  0.0000,  0.0000],
         ...,
         [-0.0292,  0.1126,  0.3122,  ...,  0.0000,  0.0000,  0.0000],
         [ 1.8315,  2.0205,  2.5487,  ...,  0.0000,  0.0000,  0.0000],
         [ 1.0237,  1.0931,  1.9495,  ...,  0.0000,  0.0000,  0.0000]],

        [[-1.1214, -1.0418, -0.9250,  ...,  0.0000,  0.0000,  0.0000],
         [-2.7008, -2.2666, -1.8155,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.3225,  0.4356,  0.5231,  ...,  0.0000,  0.0000,  0.0000],
         ...,
         [-0.9836, -0.7060,  0.3398,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.2134,  0.5715, -0.2829,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.1545, -0.0332, -0.6623,  ...,  0.0000,  0.0000,  0.0000]],

        [[-0.8149,  0.1893,  0.3068,  ...,  0.0000,  0.0000,  0.0000],
         [-0.8663, -1.0178, -0.8

### STEP 2: MAKING DATASET ITERABLE

In [39]:
batch_size = 100
n_iters = 3000
num_epochs = n_iters / (len(train_dataset) / batch_size)
num_epochs = int(num_epochs)
num_epochs

86

In [40]:
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size=batch_size, 
                                           shuffle=True)

In [41]:
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
                                          batch_size=batch_size, 
                                          shuffle=False)

### STEP 3: CREATE MODEL CLASS

In [55]:
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        
        # Convolution 1
        self.cnn1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5, stride=1, padding=0)
        self.relu1 = nn.ReLU()
        
        # Max pool 1
        self.maxpool1 = nn.MaxPool2d(kernel_size=2)
     
        # Convolution 2
        #self.cnn2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5, stride=1, padding=0)
        #self.relu2 = nn.ReLU()
        
        # Max pool 2
        #self.maxpool2 = nn.MaxPool2d(kernel_size=2)
        
        # Fully connected 1 (readout)
        self.fc1 = nn.Linear(32 * 396 * 36, 80) 
    
    def forward(self, x):
        # Convolution 1
        out = self.cnn1(x)
        out = self.relu1(out)
        
        # Max pool 1
        out = self.maxpool1(out)
        
        # Convolution 2 
        #out = self.cnn2(out)
        #out = self.relu2(out)
        
        # Max pool 2 
        #out = self.maxpool2(out)
        
        # Resize
        # Original size: (100, 32, 7, 7)
        # out.size(0): 100
        # New out size: (100, 32*7*7)
        out = out.view(out.size(0), -1)

        # Linear function (readout)
        out = self.fc1(out)
        
        return out

### STEP 4: INSTANTIATE MODEL CLASS

In [49]:
model = CNNModel()

In [50]:
#######################
#  USE GPU FOR MODEL  #
#######################

if torch.cuda.is_available():
    model.cuda()

### STEP 5: INSTANTIATE LOSS CLASS

In [51]:
criterion = nn.CrossEntropyLoss()

### STEP 6: INSTANTIATE OPTIMIZER CLASS

In [52]:
learning_rate = 0.01

optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

### STEP 7: TRAIN THE MODEL

In [57]:
iter = 0
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        
        #######################
        #  USE GPU FOR MODEL  #
        #######################
        if torch.cuda.is_available():
            images = Variable(images.cuda())
            #images = Variable(images.view(-1, 40*200).cuda())
            labels = Variable(labels.cuda())
        else:
            images = Variable(images)
            labels = Variable(labels)
        
        # Clear gradients w.r.t. parameters
        optimizer.zero_grad()
        
        # Forward pass to get output/logits
        images = images.unsqueeze(1).type(torch.FloatTensor).cuda()
        outputs = model(images)
        
        # Calculate Loss: softmax --> cross entropy loss
        loss = criterion(outputs, labels)
        
        # Getting gradients w.r.t. parameters
        loss.backward()
        
        # Updating parameters
        optimizer.step()
        
        iter += 1
        
        if iter % 500 == 0:
            # Calculate Accuracy         
            correct = 0
            total = 0
            # Iterate through test dataset
            for images, labels in test_loader:
                #######################
                #  USE GPU FOR MODEL  #
                #######################
                if torch.cuda.is_available():
                    images = Variable(images.cuda())
                else:
                    images = Variable(images)
                
                # Forward pass only to get logits/output
                outputs = model(images)
                
                # Get predictions from the maximum value
                _, predicted = torch.max(outputs.data, 1)
                
                # Total number of labels
                total += labels.size(0)
                
                #######################
                #  USE GPU FOR MODEL  #
                #######################
                # Total correct predictions
                if torch.cuda.is_available():
                    correct += (predicted.cpu() == labels.cpu()).sum()
                else:
                    correct += (predicted == labels).sum()
            
            accuracy = 100 * correct / total
            
            # Print Loss
            print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter, loss.data, accuracy))

RuntimeError: size mismatch, m1: [100 x 56448], m2: [456192 x 80] at /opt/conda/conda-bld/pytorch_1544202130060/work/aten/src/THC/generic/THCTensorMathBlas.cu:266

In [None]:
tensor_y = torch.stack([torch.Tensor(i) for i in y])

In [None]:



my_dataset = utils.TensorDataset(tensor_x,tensor_y) # create your datset
my_dataloader = utils.DataLoader(my_dataset) # create your dataloader