In [26]:
import torch
import numpy as np
from torchvision import datasets
import torchvision.transforms as transforms
import sys
from tqdm import tqdm

In [23]:
# global variable
cuda = torch.cuda.is_available()
device = torch.device("cuda" if cuda else "cpu")
sys.version
print(cuda, sys.version)

True 3.7.12 (default, Sep 10 2021, 00:21:48) 
[GCC 7.5.0]


In [24]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [25]:

# number of subprocesses to use for data loading
num_workers = 0
# how many samples per batch to load
batch_size = 20

# convert data to torch.FloatTensor
transform = transforms.ToTensor()

# choose the training and test datasets
train_data = datasets.MNIST(root='data', train=True,
                                   download=True, transform=transform)
test_data = datasets.MNIST(root='data', train=False,
                                  download=True, transform=transform)

# prepare data loaders
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size,
    num_workers=num_workers)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, 
    num_workers=num_workers)

In [61]:

import torch.nn as nn
import torch.nn.functional as F

## Define the NN architecture
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()

        self.conv1 = nn.Conv2d(1, 16, 3)
        self.relu1 = nn.ReLU()
        self.conv2 = nn.Conv2d(16, 32, 3)
        self.relu2 = nn.ReLU()
        self.conv3 = nn.Conv2d(32, 16, 3)
        self.relu3 = nn.ReLU()
        self.Linear = nn.Linear(16*22*22, 10)

    def forward(self, x):
        # x = self.layers(x)
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.conv3(x)
        x = self.relu3(x)
        x = x.view(x.size(0), -1)
        x = self.Linear(x)     
        return x 

# initialize the NN
model = CNNModel()
model.to(device)
print(model)

CNNModel(
  (conv1): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1))
  (relu1): ReLU()
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1))
  (relu2): ReLU()
  (conv3): Conv2d(32, 16, kernel_size=(3, 3), stride=(1, 1))
  (relu3): ReLU()
  (Linear): Linear(in_features=7744, out_features=10, bias=True)
)


In [62]:
## Specify loss and optimization functions

# specify loss function
criterion = nn.CrossEntropyLoss()
# model.load_state_dict(torch.load('mnist.pth'))
# specify optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

In [67]:

# number of epochs to train the model
n_epochs = 30  # suggest training between 20-50 epochs

model.train() # prep model for training
min_train_loss = 100
for epoch in range(n_epochs):
    # monitor training loss
    train_loss = 0.0
    
    ###################
    # train the model #
    ###################
    for data, target in tqdm(train_loader):
      # clear the gradients of all optimized variables
      optimizer.zero_grad()
      # forward pass: compute predicted outputs by passing inputs to the model
      data, target = data.to(device), target.to(device)
      output = model(data)
      # calculate the loss
      loss = criterion(output, target)
      # backward pass: compute gradient of the loss with respect to model parameters
      loss.backward()
      # perform a single optimization step (parameter update)
      optimizer.step()
      # update running training loss
      train_loss += loss.item()*data.size(0)
        
    # print training statistics 
    # calculate average loss over an epoch
    train_loss = train_loss/len(train_loader.dataset)
    if train_loss < min_train_loss:
      torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict()}, 
                "/content/mnist_cnn.pth")
    print('Epoch: {} \tTraining Loss: {:.6f}'.format(
        epoch+1, 
        train_loss
        ))

100%|██████████| 3000/3000 [00:09<00:00, 307.54it/s]


Epoch: 1 	Training Loss: 0.077928


100%|██████████| 3000/3000 [00:09<00:00, 306.24it/s]


Epoch: 2 	Training Loss: 0.061799


100%|██████████| 3000/3000 [00:09<00:00, 307.99it/s]


Epoch: 3 	Training Loss: 0.051434


100%|██████████| 3000/3000 [00:09<00:00, 308.52it/s]


Epoch: 4 	Training Loss: 0.043965


100%|██████████| 3000/3000 [00:09<00:00, 307.96it/s]


Epoch: 5 	Training Loss: 0.038093


100%|██████████| 3000/3000 [00:09<00:00, 306.20it/s]


Epoch: 6 	Training Loss: 0.033314


100%|██████████| 3000/3000 [00:09<00:00, 309.51it/s]


Epoch: 7 	Training Loss: 0.029111


100%|██████████| 3000/3000 [00:09<00:00, 309.15it/s]


Epoch: 8 	Training Loss: 0.025555


100%|██████████| 3000/3000 [00:09<00:00, 307.99it/s]


Epoch: 9 	Training Loss: 0.022339


100%|██████████| 3000/3000 [00:09<00:00, 309.13it/s]


Epoch: 10 	Training Loss: 0.019555


100%|██████████| 3000/3000 [00:09<00:00, 308.32it/s]


Epoch: 11 	Training Loss: 0.017217


100%|██████████| 3000/3000 [00:09<00:00, 309.92it/s]


Epoch: 12 	Training Loss: 0.015113


100%|██████████| 3000/3000 [00:09<00:00, 309.41it/s]


Epoch: 13 	Training Loss: 0.014161


100%|██████████| 3000/3000 [00:09<00:00, 311.64it/s]


Epoch: 14 	Training Loss: 0.012924


100%|██████████| 3000/3000 [00:09<00:00, 310.58it/s]


Epoch: 15 	Training Loss: 0.011359


100%|██████████| 3000/3000 [00:09<00:00, 307.88it/s]


Epoch: 16 	Training Loss: 0.010956


100%|██████████| 3000/3000 [00:09<00:00, 309.91it/s]


Epoch: 17 	Training Loss: 0.009829


100%|██████████| 3000/3000 [00:09<00:00, 308.81it/s]


Epoch: 18 	Training Loss: 0.008456


100%|██████████| 3000/3000 [00:09<00:00, 305.57it/s]


Epoch: 19 	Training Loss: 0.007910


100%|██████████| 3000/3000 [00:09<00:00, 309.87it/s]


Epoch: 20 	Training Loss: 0.006614


100%|██████████| 3000/3000 [00:09<00:00, 305.66it/s]


Epoch: 21 	Training Loss: 0.005385


100%|██████████| 3000/3000 [00:09<00:00, 304.18it/s]


Epoch: 22 	Training Loss: 0.005149


100%|██████████| 3000/3000 [00:09<00:00, 307.22it/s]


Epoch: 23 	Training Loss: 0.005532


100%|██████████| 3000/3000 [00:09<00:00, 309.87it/s]


Epoch: 24 	Training Loss: 0.005007


100%|██████████| 3000/3000 [00:09<00:00, 309.27it/s]


Epoch: 25 	Training Loss: 0.005496


100%|██████████| 3000/3000 [00:09<00:00, 309.55it/s]


Epoch: 26 	Training Loss: 0.005038


100%|██████████| 3000/3000 [00:09<00:00, 307.67it/s]


Epoch: 27 	Training Loss: 0.004595


100%|██████████| 3000/3000 [00:09<00:00, 308.49it/s]


Epoch: 28 	Training Loss: 0.003080


100%|██████████| 3000/3000 [00:09<00:00, 309.72it/s]


Epoch: 29 	Training Loss: 0.002805


100%|██████████| 3000/3000 [00:09<00:00, 308.47it/s]

Epoch: 30 	Training Loss: 0.003138





In [68]:
test_loss = 0.0
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
model.eval() # prep model for *evaluation*

for data, target in test_loader:
    # forward pass: compute predicted outputs by passing inputs to the model
    data, target = data.to(device), target.to(device)
    output = model(data)
    # calculate the loss
    loss = criterion(output, target)
    # update test loss 
    test_loss += loss.item()*data.size(0)
    # convert output probabilities to predicted class
    _, pred = torch.max(output, 1)
    # compare predictions to true label
    correct = np.squeeze(pred.eq(target.data.view_as(pred)))
    # calculate test accuracy for each object class
    for i in range(batch_size):
        label = target.data[i]
        class_correct[label] += correct[i].item()
        class_total[label] += 1

# calculate and print avg test loss
test_loss = test_loss/len(test_loader.dataset)
print('Test Loss: {:.6f}\n'.format(test_loss))

for i in range(10):
    if class_total[i] > 0:
        print('Test Accuracy of %5s: %2d%% (%2d/%2d)' % (
            str(i), 100 * class_correct[i] / class_total[i],
            np.sum(class_correct[i]), np.sum(class_total[i])))
    else:
        print('Test Accuracy of %5s: N/A (no training examples)' % (classes[i]))

print('\nTest Accuracy (Overall): %2d%% (%2d/%2d)' % (
    100. * np.sum(class_correct) / np.sum(class_total),
    np.sum(class_correct), np.sum(class_total)))

Test Loss: 0.104124

Test Accuracy of     0: 99% (974/980)
Test Accuracy of     1: 99% (1129/1135)
Test Accuracy of     2: 98% (1016/1032)
Test Accuracy of     3: 98% (999/1010)
Test Accuracy of     4: 97% (962/982)
Test Accuracy of     5: 97% (873/892)
Test Accuracy of     6: 98% (939/958)
Test Accuracy of     7: 97% (1006/1028)
Test Accuracy of     8: 96% (942/974)
Test Accuracy of     9: 98% (991/1009)

Test Accuracy (Overall): 98% (9831/10000)


In [70]:

checkpoint = torch.load('/content/mnist_cnn.pth')
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])


In [73]:
model.state_dict().keys()


odict_keys(['conv1.weight', 'conv1.bias', 'conv2.weight', 'conv2.bias', 'conv3.weight', 'conv3.bias', 'Linear.weight', 'Linear.bias'])