# Shallow Neural Net in Pytorch

#### Load Dependencies

In [1]:
import torch
import torch.nn as nn

from torchvision.datasets import MNIST
from torchvision import transforms

from torchsummary import summary

#### Load Data

In [2]:
train = MNIST('data', train = True, transform = transforms.ToTensor(), download = True)
test = MNIST('data', train = False, transform = transforms.ToTensor())

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting data/MNIST/raw/train-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/raw



#### Batch Loader

In [3]:
train_loader = torch.utils.data.DataLoader(train, batch_size = 128, shuffle = True)
test_loader = torch.utils.data.DataLoader(test, batch_size = 128)

#### Design Neural Network

In [4]:
n_input = 784
n_dense1 = 64
n_dense2 = 64
n_dense3 = 64
n_out = 10

In [5]:
model = nn.Sequential(
    #first hidden layer
    nn.Linear(n_input, n_dense1), #hidden layer
    nn.ReLU(), #activation function

    #second hidden layer
    nn.Linear(n_dense1, n_dense2), #hidden layer
    nn.ReLU(), #activation function

    #third hidden layer
    nn.Linear(n_dense2, n_dense3), #hidden layer
    nn.ReLU(), #activation function
    nn.Dropout(),

    #Output Layer
    nn.Linear(n_dense3, n_out)
)

In [6]:
summary(model, (1, n_input))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                [-1, 1, 64]          50,240
              ReLU-2                [-1, 1, 64]               0
            Linear-3                [-1, 1, 64]           4,160
              ReLU-4                [-1, 1, 64]               0
            Linear-5                [-1, 1, 64]           4,160
              ReLU-6                [-1, 1, 64]               0
           Dropout-7                [-1, 1, 64]               0
            Linear-8                [-1, 1, 10]             650
Total params: 59,210
Trainable params: 59,210
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.23
Estimated Total Size (MB): 0.23
----------------------------------------------------------------


#### Configure Training Hyperparameters

In [7]:
cost_fxn = nn.CrossEntropyLoss()

In [10]:
optimizer = torch.optim.Adam(model.parameters())

#### Train!

In [11]:
def accuracy_pct(pred_y, true_y):
  _, prediction = torch.max(pred_y, 1) # returns maximum values, indices; fed tensor, dim to reduce
  correct = (prediction == true_y).sum().item()
  return (correct / true_y.shape[0]) * 100.0

In [12]:
n_batches = len(train_loader)
n_batches

469

In [13]:
n_epochs = 5 

print('Training for {} epochs. \n'.format(n_epochs))

for epoch in range(n_epochs):
  
  avg_cost = 0.0
  avg_accuracy = 0.0
  
  for i, (x, y) in enumerate(train_loader): # enumerate() provides count of iterations  
    
    # forward propagation:
    x_flat = x.view(x.shape[0], -1)
    y_hat = model(x_flat)
    cost = cost_fxn(y_hat, y)
    avg_cost += cost / n_batches
    
    # backprop and optimization via gradient descent: 
    optimizer.zero_grad() # set gradients to zero; .backward() accumulates them in buffers
    cost.backward()
    optimizer.step()
    
    # calculate accuracy metric:
    accuracy = accuracy_pct(y_hat, y)
    avg_accuracy += accuracy / n_batches
    
    if (i + 1) % 100 == 0:
      print('Step {}'.format(i + 1))
    
  print('Epoch {}/{} complete. Cost: {:.3f}, Accuracy: {:.1f}% \n'
        .format(epoch + 1, n_epochs, avg_cost, avg_accuracy)) 

print('Training complete.')

Training for 5 epochs. 

Step 100
Step 200
Step 300
Step 400
Epoch 1/5 complete. Cost: 0.657, Accuracy: 79.9% 

Step 100
Step 200
Step 300
Step 400
Epoch 2/5 complete. Cost: 0.273, Accuracy: 92.7% 

Step 100
Step 200
Step 300
Step 400
Epoch 3/5 complete. Cost: 0.200, Accuracy: 94.6% 

Step 100
Step 200
Step 300
Step 400
Epoch 4/5 complete. Cost: 0.167, Accuracy: 95.5% 

Step 100
Step 200
Step 300
Step 400
Epoch 5/5 complete. Cost: 0.137, Accuracy: 96.4% 

Training complete.


#### Test Model

In [14]:
n_test_batches = len(test_loader)
n_test_batches

79

In [15]:
model.eval() # disables dropout (and batch norm)

with torch.no_grad(): # disables autograd, reducing memory consumption
  
  avg_test_cost = 0.0
  avg_test_acc = 0.0
  
  for X, y in test_loader:
    
    # make predictions: 
    X_flat = X.view(X.shape[0], -1)
    y_hat = model(X_flat)
    
    # calculate cost: 
    cost = cost_fxn(y_hat, y)
    avg_test_cost += cost / n_test_batches
    
    # calculate accuracy:
    test_accuracy = accuracy_pct(y_hat, y)
    avg_test_acc += test_accuracy / n_test_batches

print('Test cost: {:.3f}, Test accuracy: {:.1f}%'.format(avg_test_cost, avg_test_acc))

# model.train() # 'undoes' model.eval()

Test cost: 0.109, Test accuracy: 96.7%
