In [131]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision.utils import make_grid

from sklearn.metrics import confusion_matrix
import pandas as np
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [132]:
# Convert MNIST image files to 4D tensors (# images, height, width, colour)
transform = transforms.ToTensor()

In [133]:
# train data
train_data = datasets.MNIST(root='cnn_data',
                            train=True, 
                            download=True, 
                            transform=transform
                           )

# test data
test_data = datasets.MNIST(root='cnn_data',
                            train=False, 
                            download=True, 
                            transform=transform
                           )

In [134]:
# create small batch size for images
train_loader = DataLoader(train_data,
                          batch_size=10,
                          shuffle=True
                         )

test_loader = DataLoader(test_data,
                         batch_size=10,
                         shuffle=False
                        )


In [135]:
# define our CNN model
# describe convolutional layer and what it's doing (2 convolutional layers)

# input size, output channels, kernel size, stride (shift before applying next kernel)
conv1 = nn.Conv2d(1, 6, 3, 1)
conv2 = nn.Conv2d(6, 16, 3, 1) # input has to match output of previous layer

In [136]:
# grab 1 mnist record
for i, (X_Train, y_train) in enumerate(train_data):
    break

In [137]:
X_Train.shape

torch.Size([1, 28, 28])

In [138]:
# convert to 4D
x = X_Train.view(1,1,28,28)
x.shape

torch.Size([1, 1, 28, 28])

In [139]:
# perform first convolution
x = F.relu(conv1(x)) # rectified linear unit for our activation function

In [140]:
# 1 single image, 6 filters, 26x26 image --> lose 1 pixel off each side due to padding
x.shape

torch.Size([1, 6, 26, 26])

In [141]:
# pass through pooling layer
x = F.max_pool2d(x, 2,2) # kernel size of 2, stride of 2

In [142]:
x.shape  # 26/2 = 13 from scombination of kernel and stride (I think)

torch.Size([1, 6, 13, 13])

In [143]:
# 2nd convolutional layer
x  = F.relu(conv2(x))

In [144]:
x.shape # lose 1 pixel on edges due to no padding

torch.Size([1, 16, 11, 11])

In [145]:
# pooling layer
x = F.max_pool2d(x, 2,2)

In [146]:
x.shape

torch.Size([1, 16, 5, 5])