In [1]:
#needed imports
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from torch.optim.lr_scheduler import StepLR
from mnist1d.data import make_dataset, get_dataset_args
from Convo1D import Convo

In [2]:
#set up data with mnist1d module then turn each needed part into a tensor and then dataloader
args = get_dataset_args()
data = make_dataset(args)
train_data_x = data['x'].transpose()
train_data_y = data['y']
val_data_x = data['x_test'].transpose()
val_data_y = data['y_test']
x_train = torch.tensor(train_data_x.transpose().astype('float32'))
y_train = torch.tensor(train_data_y.astype('long')).long()
x_val= torch.tensor(val_data_x.transpose().astype('float32'))
y_val = torch.tensor(val_data_y.astype('long')).long()
data_loader = DataLoader(TensorDataset(x_train,y_train), batch_size=32, shuffle=True)

In [3]:
#move model to gpu
model = Convo().to('cuda')
#initialize weights
model.weights_init()
#set optimizer to SGD with adjustable hyperparams
optimizer = optim.SGD(model.parameters(), lr=0.15, momentum=0.8, weight_decay=0.0001)
#set loss to cross entropy
lossfn = nn.CrossEntropyLoss()
#set step lr adjuster with respective hyperparams to be adjusted
scheduler = StepLR(optimizer, step_size=40, gamma=0.95)

In [4]:
n_epoch = 200
# store the loss and the % correct at each epoch
losses_train = np.zeros(n_epoch)
errors_train = np.zeros(n_epoch)
losses_val = np.zeros(n_epoch)
errors_val = np.zeros(n_epoch)

for epoch in range(n_epoch):
  # loop over batches
  for i, data in enumerate(data_loader):
    # retrieve inputs and labels for this batch and move to gpu
    x_batch, y_batch = data
    x_batch = x_batch.to('cuda')
    y_batch = y_batch.to('cuda')
    # zero the parameter gradients
    optimizer.zero_grad()
    # forward pass
    pred = model(x_batch[:,None,:])
    # compute the loss
    loss = lossfn(pred, y_batch)
    # backward pass
    loss.backward()
    # SGD update
    optimizer.step()

  # Run whole dataset to get statistics to understand how training is going wrt hyperparams
  x_train = x_train.to('cuda')
  x_val = x_val.to('cuda')
  y_val = y_val.to('cuda')
  y_train = y_train.to('cuda')
  pred_train = model(x_train[:,None,:])
  pred_val = model(x_val[:,None,:])
  _, predicted_train_class = torch.max(pred_train.data, 1)
  _, predicted_val_class = torch.max(pred_val.data, 1)
  errors_train[epoch] = 100 - 100 * (predicted_train_class == y_train).float().sum() / len(y_train)
  errors_val[epoch]= 100 - 100 * (predicted_val_class == y_val).float().sum() / len(y_val)
  losses_train[epoch] = lossfn(pred_train, y_train).item()
  losses_val[epoch]= lossfn(pred_val, y_val).item()
  print(f'Epoch {epoch:5d}, train loss {losses_train[epoch]:.6f}, train error {errors_train[epoch]:3.2f},  val loss {losses_val[epoch]:.6f}, percent error {errors_val[epoch]:3.2f}')
  
  #tell scheduler it might want to update
  scheduler.step()

Epoch     0, train loss 2.232693, train error 81.07,  val loss 2.242393, percent error 81.70
Epoch     1, train loss 2.153369, train error 71.20,  val loss 2.169378, percent error 72.60
Epoch     2, train loss 2.081299, train error 61.97,  val loss 2.095272, percent error 63.10
Epoch     3, train loss 2.088432, train error 62.60,  val loss 2.099898, percent error 64.60
Epoch     4, train loss 2.076801, train error 62.72,  val loss 2.083306, percent error 63.90
Epoch     5, train loss 2.032767, train error 57.05,  val loss 2.040206, percent error 58.00
Epoch     6, train loss 2.029868, train error 56.60,  val loss 2.037768, percent error 57.80
Epoch     7, train loss 2.016539, train error 56.12,  val loss 2.036477, percent error 58.10
Epoch     8, train loss 2.014074, train error 55.82,  val loss 2.032438, percent error 57.50
Epoch     9, train loss 2.008653, train error 55.37,  val loss 2.041232, percent error 58.50
Epoch    10, train loss 2.007514, train error 55.15,  val loss 2.03338