In [213]:
from scipy.io import loadmat
import numpy as np
import sklearn

In [256]:
# Load in Data
data = loadmat('MSdata.mat')
X_train = data['trainx']
y_train = data['trainy']
X_test = data['testx']

In [215]:
# Cross-validated 5 folds:
def create_folds(trainx, trainy, k=5):
    folds_x, folds_y = [], []
    n = len(trainx)
    size = n//k
    shuffled_idx = np.random.choice([i for i in range(n)], size = n, replace = False)
    for i in range(k-1):
        idx = shuffled_idx[i*size:(i+1)*size]
        folds_x.append(trainx[idx])
        folds_y.append(trainy[idx])
    folds_x.append(trainx[i*size:])
    folds_y.append(trainy[i*size:])
    return [folds_x, folds_y]

def create_data(folds, i=0):
    x_test, y_test = folds[0][i], folds[1][i]
    x_train = np.concatenate(folds[0][0:i] + folds[0][i+1:])
    y_train = np.concatenate(folds[1][0:i] + folds[1][i+1:])
    return x_train, y_train, x_test, y_test

In [216]:
folds = create_folds(X_train, y_train, k=5)
x_train, y_train, x_test, y_test = create_data(folds, i=0)

### Pytorch
Custom dataset + Dataloader

In [217]:
from __future__ import print_function, division
import os
import torch
import pandas as pd
from skimage import io, transform
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

In [261]:
class SongDataset(Dataset):
    def __init__(self, X_train, y_train, transform = None):
        self.X = X_train
        self.y = y_train
        if transform:
            self.X = transform(self.X)
        
    def __len__(self):
        return self.X.shape[0]
    
    def __getitem__(self, idx):
        x = self.X[idx]
        if not self.y:
            y = -1
        else:
            y = self.y[idx][0]
        sample = {'x': torch.from_numpy(x).type(torch.float32),
                  'y': torch.tensor(np.array([float(y)]))}
        return sample
            

In [219]:
train_dataset = SongDataset(x_train, y_train)
train_dataloader = DataLoader(train_dataset, 
                              batch_size=128,
                              shuffle=True, 
                              num_workers=4)
val_dataset = SongDataset(x_test, y_test)
val_dataloader = DataLoader(val_dataset, 
                            batch_size=128,
                            shuffle=False, 
                            num_workers=4)

In [262]:
test_dataset = SongDataset(X_test, None)
test_dataloader = DataLoader(test_dataset, 
                             batch_size=128,
                             shuffle=False, 
                             num_workers=4)

In [220]:
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F

In [237]:
class DenseNet(nn.Module):
    def __init__(self):
        super(DenseNet, self).__init__()
#         self.conv1 = nn.Conv2d(1, 64, 3)
#         self.norm1 = nn.BatchNorm2d(64)
#         self.conv2 = nn.Conv2d(64, 32, 3)
#         self.norm2 = nn.BatchNorm2d(32)
        self.fc_1 = nn.Linear(90, 128)
        self.norm1 = nn.BatchNorm1d(128)
        self.fc_2 = nn.Linear(128, 64)
        self.norm2 = nn.BatchNorm1d(64)
        self.fc_3 = nn.Linear(64, 32)
        self.norm3 = nn.BatchNorm1d(32)
        self.fc_4 = nn.Linear(32, 1)
        
    def forward(self, x):
#         x = self.norm1(self.conv1(x))
#         x = F.max_pool2d(F.relu(x), 2)
#         x = self.norm2(self.conv2(x))
#         x = F.max_pool2d(F.relu(x), 2)
#         x = x.view(-1, 1)
        x = self.norm1(self.fc_1(x))
        x = F.relu(x)
        x = self.norm2(self.fc_2(x))
        x = F.relu(x)
        x = self.norm3(self.fc_3(x))
        x = F.relu(x)
        x = self.fc_4(x)
        return x
    
model = DenseNet().cuda()
print(model)

DenseNet(
  (fc_1): Linear(in_features=90, out_features=128, bias=True)
  (norm1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc_2): Linear(in_features=128, out_features=64, bias=True)
  (norm2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc_3): Linear(in_features=64, out_features=32, bias=True)
  (norm3): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc_4): Linear(in_features=32, out_features=1, bias=True)
)


In [238]:
import torch.optim as optim
criterion = nn.L1Loss()
optimizer = optim.SGD(model.parameters(), lr=0.0001, momentum=0.9)

In [281]:
model.train()
epochs = 50
for epoch in range(epochs):
    running_loss = 0.0
    for batch_idx, sample in enumerate(train_dataloader):
        optimizer.zero_grad()
        data = sample['x'].cuda()
        target = sample['y'].cuda()
    
        pred = model(data)
        loss = criterion(target, pred)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if batch_idx and batch_idx % 1000 == 0:
            print('  [batch:  %5d] loss: %.5f' % (batch_idx, running_loss/1000))
            running_loss = 0.0
            
    model.eval()
    val_loss = []
    with torch.no_grad():
        for val_sample in val_dataloader:
            data = val_sample['x'].cuda()
            target = val_sample['y'].cuda()
            pred = model(data)
            loss = criterion(target.float(), pred.float())
            val_loss.append(loss.item())
    Val_loss = round(np.mean(val_loss),3)
    print('Epoch: ' + str(epoch+1) + '/' + str(epochs) + ' Val Loss:' + str(Val_loss))
    if Val_loss < 6:
        torch.save(model, "MLP_"+str(Val_loss)+'.pth')
    
    

  [batch:   1000] loss: 6.23690
  [batch:   2000] loss: 6.00955
  [batch:   3000] loss: 5.98047
Epoch: 1/50 Val Loss:6.009
  [batch:   1000] loss: 5.93047
  [batch:   2000] loss: 5.91786
  [batch:   3000] loss: 5.90470
Epoch: 2/50 Val Loss:5.966
  [batch:   1000] loss: 5.88483
  [batch:   2000] loss: 5.86239
  [batch:   3000] loss: 5.85374
Epoch: 3/50 Val Loss:5.916
  [batch:   1000] loss: 5.81334
  [batch:   2000] loss: 5.86747
  [batch:   3000] loss: 5.85836
Epoch: 4/50 Val Loss:6.081
  [batch:   1000] loss: 5.88543
  [batch:   2000] loss: 5.83171
  [batch:   3000] loss: 5.87436
Epoch: 5/50 Val Loss:5.883
  [batch:   1000] loss: 5.82643
  [batch:   2000] loss: 5.85711
  [batch:   3000] loss: 5.82812
Epoch: 6/50 Val Loss:5.914
  [batch:   1000] loss: 5.79403
  [batch:   2000] loss: 5.80010
  [batch:   3000] loss: 5.86151
Epoch: 7/50 Val Loss:6.116
  [batch:   1000] loss: 5.84019
  [batch:   2000] loss: 5.84209
  [batch:   3000] loss: 5.82869
Epoch: 8/50 Val Loss:5.895
  [batch:   1000

In [275]:
result = []
with torch.no_grad():
    for test_sample in test_dataloader:
        data = test_sample['x'].cuda()
        pred = model(data)
        pred = np.squeeze(pred).cpu().data.numpy()
        result = np.concatenate([result, pred])

In [290]:
import pandas as pd

cvs_output = pd.DataFrame.from_dict({'dataid': [i+1 for i in range(len(result))], 'prediction': result})
cvs_output

cvs_output.to_csv('output.csv', index = False)
#np.savetxt('output.csv', result, delimiter=',')