In [1]:
import numpy as np
import pandas as pd
import torch
import torch.optim as optim
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
def eval(experiment):
  def evaluate_model(experiment, x, y):
    model = torch.load('/content/drive/MyDrive/msc_experiments/{}_model.pt'.format(experiment), 
                      map_location=torch.device('cpu'))
    x = np.load('/content/drive/MyDrive/msc_experiments/{}.npz'.format(experiment), allow_pickle=True)[x]
    n_images = x.shape[0]
    y = np.load('/content/drive/MyDrive/msc_experiments/{}.npz'.format(experiment), allow_pickle=True)[y]
    x = torch.Tensor(x)
    y = np.squeeze(torch.LongTensor(y))

    model.eval()
    outputs = model(x)
    probals, predicted = torch.max(outputs, 1)

    c = (predicted == y).squeeze()
    correct = c.sum().item()

    print('samples correct={} training accuracy={}'.format(correct, np.round(correct/n_images, 3)))
  print('TRAIN')
  evaluate_model(experiment, 'x_train', 'y_train')
  print('TEST NONE')
  evaluate_model(experiment, 'x_test_none', 'y_test')
  print('TEST GAUSSIAN')
  evaluate_model(experiment, 'x_test_gaussian', 'y_test')

# training

In [24]:
experiment = 'comp12_int10_umap12rs69_dbscan31_uniform_k5'

In [25]:
path = '/content/drive/MyDrive/msc_experiments/{}.npz'.format(experiment)
loaded = np.load(path, allow_pickle=True)
x_train = loaded['x_train']
y_train = loaded['y_train']
x_train.shape, y_train.shape

((10000, 10143), (10000,))

In [26]:
N, D_in, H1, H2, H3, D_out = x_train.shape[0], x_train.shape[1], 2000, 1000, 500, 10
learning_rate = 0.001

x = torch.Tensor(x_train)
y = np.squeeze(torch.LongTensor(y_train))

In [40]:
model = torch.nn.Sequential(
    torch.nn.Linear(D_in, H1),
    torch.nn.Sigmoid(),
    torch.nn.Dropout(p=0.5),
    torch.nn.Linear(H1, H2),
    torch.nn.Sigmoid(),
    torch.nn.Dropout(p=0.5),
    torch.nn.Linear(H2, H3),
    torch.nn.ReLU(),
    torch.nn.Linear(H3, D_out),
    torch.nn.LogSoftmax(dim = 1),
).cuda()

loss_fn = torch.nn.NLLLoss()
    
losses = []

batchs = 256
    
trainset = torch.utils.data.TensorDataset(x, y)
trainloader = torch.utils.data.DataLoader(trainset, batch_size = batchs,
                                          shuffle=True, num_workers=1)

#optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5)

batchesn = int(N/batchs)
epochs = 25

In [41]:
for epoch in range(epochs):
    running_loss = 0.0
    for i, datap in enumerate(trainloader, 0):
        #print(i)
        # get the inputs
        inputs, labels = datap
        inputs = inputs.to('cuda', non_blocking=True)
        labels = labels.to('cuda', non_blocking=True)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)

        loss = loss_fn(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % batchesn == batchesn-1:
            print('[epoch {}] loss: {}'.format(epoch + 1, running_loss/batchesn))
            running_loss = 0.0
    scheduler.step()

[epoch 1] loss: 2.317834530121241
[epoch 2] loss: 2.2985168481484437
[epoch 3] loss: 2.1455686398041554
[epoch 4] loss: 2.027425487836202
[epoch 5] loss: 1.934690603843102
[epoch 6] loss: 1.8226729509158013
[epoch 7] loss: 1.7686904271443684
[epoch 8] loss: 1.7523250518701015
[epoch 9] loss: 1.7282874431365576
[epoch 10] loss: 1.7070422447644746
[epoch 11] loss: 1.6999369278932228
[epoch 12] loss: 1.6846894331467457
[epoch 13] loss: 1.6758707547799134
[epoch 14] loss: 1.678871023349273
[epoch 15] loss: 1.663789348724561
[epoch 16] loss: 1.6498105495403974
[epoch 17] loss: 1.6613053725315974
[epoch 18] loss: 1.6488660726791773
[epoch 19] loss: 1.6409875582426021
[epoch 20] loss: 1.6314435188586895
[epoch 21] loss: 1.6321374147366254
[epoch 22] loss: 1.6295745709003546
[epoch 23] loss: 1.6220135963880098
[epoch 24] loss: 1.6288754145304363
[epoch 25] loss: 1.6148083362823877


In [42]:
np.save('/content/drive/MyDrive/msc_experiments/{}_losses.npz'.format(experiment), np.array(losses))
model_filen = experiment + '.pt' 

torch.save(model, '/content/drive/MyDrive/msc_experiments/{}_model.pt'.format(experiment))

# testing

In [43]:
eval(experiment)

TRAIN
samples correct=4436 training accuracy=0.444
TEST NONE
samples correct=1090 training accuracy=0.363
TEST GAUSSIAN
samples correct=1084 training accuracy=0.361
