In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
%matplotlib inline



In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision.utils import make_grid

In [3]:
transform = transforms.ToTensor()

In [4]:
train_data = datasets.MNIST(root='Data', train=True, download=True, transform=transform)
test_data = datasets.MNIST(root='Data', train=False, download=True, transform=transform)

  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [5]:
train_data

Dataset MNIST
    Number of datapoints: 60000
    Root location: Data
    Split: Train
    StandardTransform
Transform: ToTensor()

In [6]:
test_data

Dataset MNIST
    Number of datapoints: 10000
    Root location: Data
    Split: Test
    StandardTransform
Transform: ToTensor()

In [7]:
train_loader = DataLoader(train_data, batch_size=10, shuffle=True)
test_loader = DataLoader(test_data, batch_size=10)

In [8]:
len(train_loader)

6000

In [9]:
len(test_loader)

1000

### Define CNN Architecture

In [24]:
class ConvolutionalNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 6, 3, 1)
        self.conv2 = nn.Conv2d(6, 16, 3, 1)
        self.fc1 = nn.Linear(5*5*16, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84,10)

    def forward(self, X):
        X = F.relu(self.conv1(X))
        X = F.max_pool2d(X, 2, 2)
        X = F.relu(self.conv2(X))
        X = F.max_pool2d(X, 2, 2)
        X = X.view(-1, 5*5*16)
        X = F.relu(self.fc1(X))
        X = F.relu(self.fc2(X))
        X = self.fc3(X)
        return F.log_softmax(X, dim=1)

In [25]:
torch.manual_seed(42)
model = ConvolutionalNetwork()
model

ConvolutionalNetwork(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

In [27]:
total_params = 0
for param in model.parameters():
    total_params+=param.numel()
    
print(f'total parameters: {total_params}')

total parameters: 60074


In [28]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
import time
start_time = time.time()

epocha = 3
train_losses = []
test_losses = []
train_correct = []
test_correct = []

for i in range(epochs):
    trn_corr = 0
    tst_corr = 0
    
    for b, (X_train, y_train) in enumerate(train_loader):
        b+=1
        
        y_pred = model(X_train)
        loss = criterion(y_pred, y_train)
        
        predicted_class = torch.max(y_pred.data, 1)[1]
        batch_corr = (predicted_class == y_train).sum()
        trn_corr+=batch_corr
        
        # update params
        optimizer.zero_grad()     # clear gradients since this is a new batch
        loss.backward()           # calculate gradients for given batch loss
        optimizer.step()          # apply gradients and update parameters
        
        if b%600 == 0:
            print(f'epoch:{i} batch:{b} loss:{loss.item()}')
    
    # for each epoch
    train_losses.append(trn_loss)
    train_correct.append(trn_corr)

    # test after each epoch
    # use 'no_grad' since we won't be using loss.backward() with test 'y_val' to save 
    with torch.no_grad():
        for b, (X_test, y_test) in enumerate(test_loader):
            
            y_val = model(X_test)
            
            predicted_class = torch.max(y_val.data, 1)[1]
            tst_corr += (predicted_class == y_test).sum()
    
    # get test loss
    loss = criterion(y_val, y_test)
    test_losses.append(loss)
    test_correct.append(tst_corr)

curr_time = time.time()
total_elapsed = curr_time - start_time
print(f'Training time: {total_elapsed/60} minutes')