In [2]:
# import libraries
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader,TensorDataset
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
import matplotlib_inline.backend_inline
matplotlib_inline.backend_inline.set_matplotlib_formats('svg')

In [6]:
# import dataset (comes with colab!)
data = pd.read_csv('train.csv')

# extract labels (number IDs) and remove from data
data.describe()
labels = data['label']
data   = data.drop('label',axis=1)
data.describe()
data = data.values
dataNorm = data / np.max(data)

In [10]:
class ANNdigit(nn.Module):
  def __init__(self,nUnits,nLayers):
    super().__init__()

    # create dictionary to store the layers
    self.layers = nn.ModuleDict()
    self.nLayers = nLayers#nLayers#

    ### input layer
    self.layers['input'] = nn.Linear(784,nUnits)

    ### hidden layers
    for i in range(nLayers):
      self.layers[f'hidden{i}'] = nn.Linear(nUnits,nUnits)

    ### output layer
    self.layers['output'] = nn.Linear(nUnits,10)


  # forward pass
  def forward(self,x):
    # input layer (note: the code in the video omits the relu after this layer)
    x = F.relu( self.layers['input'](x) )

    # hidden layers
    for i in range(self.nLayers):
      if i < self.nLayers:
        x = F.relu( self.layers[f'hidden{i}'](x) )

    # return output layer
    x = self.layers['output'](x)
    return x

In [7]:
dataT   = torch.tensor( dataNorm ).float()
labelsT = torch.tensor( labels.values ).long() # long = int64

# Step 2: use scikitlearn to split the data
train_data,test_data, train_labels,test_labels = train_test_split(dataT, labelsT, test_size=.1)


# Step 3: convert into PyTorch Datasets
train_data = TensorDataset(train_data,train_labels)
test_data  = TensorDataset(test_data,test_labels)

# Step 4: translate into dataloader objects
batchsize    = 32
train_loader = DataLoader(train_data,batch_size=batchsize,shuffle=True,drop_last=True)
test_loader  = DataLoader(test_data,batch_size=test_data.tensors[0].shape[0])

In [11]:
def createTheMNISTNet(units,layers):

  
      # NEW HERE: log-softmax the output, because I'm using NLLLoss instead of CrossEntropyLoss
  
  # create the model instance
  net = ANNdigit(nUnits=units,nLayers=layers)
  
  # loss function
  lossfun = nn.CrossEntropyLoss()

  # optimizer
  optimizer = torch.optim.SGD(net.parameters(),lr=.01)

  return net,lossfun,optimizer

In [12]:
# test the model with one batch
net,lossfun,optimizer = createTheMNISTNet(1,50)

X,y = next(iter(train_loader))
yHat = net(X)

# values are log-probability of each number (0-9)
# print(torch.exp(yHat))

# now let's compute the loss
loss = lossfun(yHat,y)
print(' ')
print('Loss:')
print(loss)

 
Loss:
tensor(2.4437, grad_fn=<NllLossBackward0>)


In [16]:
def function2trainTheModel(units,layers):

  # number of epochs
  numepochs = 60
  
  # create a new model
  net,lossfun,optimizer = createTheMNISTNet(units=units,layers=layers)

  # initialize losses
  losses    = torch.zeros(numepochs)
  trainAcc  = []
  testAcc   = []


  # loop over epochs
  for epochi in range(numepochs):

    # loop over training data batches
    batchAcc  = []
    batchLoss = []
    for X,y in train_loader:

      # forward pass and loss
      yHat = net(X)
      loss = lossfun(yHat,y)

      # backprop
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

      # loss from this batch
      batchLoss.append(loss.item())

      # compute accuracy
      matches = torch.argmax(yHat,axis=1) == y     # booleans (false/true)
      matchesNumeric = matches.float()             # convert to numbers (0/1)
      accuracyPct = 100*torch.mean(matchesNumeric) # average and x100
      batchAcc.append( accuracyPct )               # add to list of accuracies
    # end of batch loop...

    # now that we've trained through the batches, get their average training accuracy
    trainAcc.append( np.mean(batchAcc) )

    # and get average losses across the batches
    losses[epochi] = np.mean(batchLoss)

    # test accuracy
    X,y = next(iter(test_loader)) # extract X,y from test dataloader
    yHat = net(X)
      
    # compare the following really long line of code to the training accuracy lines
    testAcc.append( 100*torch.mean((torch.argmax(yHat,axis=1)==y).float()) )

  # end epochs

  # function output
  return testAcc,losses

In [25]:
testacc = np.zeros((3,5))
losses_exp = np.zeros((3,5))
for layer in range(1,4):
    for units in range(50,251,50):

       t,l = function2trainTheModel(units=units,layers=layer)
       col = units//50-1
       testacc[layer-1,col] = np.mean(t)
       print(l)
       losses_exp[layer-1,col] = torch.mean(l)


tensor([1.6178, 0.4992, 0.3745, 0.3300, 0.3012, 0.2778, 0.2589, 0.2416, 0.2261,
        0.2129, 0.2005, 0.1889, 0.1786, 0.1690, 0.1606, 0.1527, 0.1454, 0.1394,
        0.1330, 0.1280, 0.1224, 0.1172, 0.1134, 0.1094, 0.1050, 0.1019, 0.0978,
        0.0945, 0.0915, 0.0878, 0.0854, 0.0830, 0.0800, 0.0772, 0.0747, 0.0725,
        0.0706, 0.0679, 0.0659, 0.0639, 0.0617, 0.0599, 0.0581, 0.0559, 0.0545,
        0.0528, 0.0514, 0.0496, 0.0481, 0.0468, 0.0453, 0.0436, 0.0423, 0.0410,
        0.0400, 0.0387, 0.0372, 0.0363, 0.0349, 0.0342])
tensor([1.5339, 0.4810, 0.3678, 0.3229, 0.2935, 0.2685, 0.2482, 0.2304, 0.2130,
        0.1989, 0.1855, 0.1743, 0.1639, 0.1550, 0.1464, 0.1385, 0.1314, 0.1252,
        0.1186, 0.1134, 0.1081, 0.1036, 0.0984, 0.0940, 0.0899, 0.0859, 0.0825,
        0.0790, 0.0751, 0.0721, 0.0694, 0.0665, 0.0640, 0.0615, 0.0589, 0.0563,
        0.0544, 0.0521, 0.0502, 0.0481, 0.0464, 0.0446, 0.0429, 0.0411, 0.0396,
        0.0379, 0.0366, 0.0350, 0.0335, 0.0321, 0.0312, 0.0299,

In [26]:
print(testacc)

[[95.44961548 95.7285614  96.1361084  96.1007843  96.19087982]
 [94.99523926 95.63967896 95.86626434 96.03134155 96.27976227]
 [93.11230469 94.5103302  94.65079498 94.96705627 95.30435944]]
