In [None]:
%matplotlib inline

# Simple CNN in PyTorch

Hi class, lets create a simple shallow multi layer CNN 

First, lets specify the architecture 

 * You can refer to https://pytorch.org/docs/stable/nn.html for functions and explenations

In [None]:
import torch.nn as nn                                                                      
from torchvision import models

# lets name oue neural network "CNN"
class CNN(nn.Module):
    
    # this is our CNN initilization function     
    def __init__(self, size, num_classes):
        # self is ... well ourself! (https://www.askpython.com/python/python-self-variable)
        # other variables are ones we made up
        #   size is the number of spectral bands in our image (e.g., MNIST is 1, RGB imagery is 3)
        #   num_classes is the number of output neurons (e.g., num_classes = 10 for 10 classes in MNIST)

        # lets call our super function (https://www.pythonforbeginners.com/super/working-python-super-function)
        # we do this first,  call its init fx
        super(CNN, self).__init__()

        # here is our "feature extraction" via convolutional layers 
        #   note: assume we got a single channel (grayscale) MNIST image of size 28x28x1
        #         first layer
        #             28x28x1 -> convolution (1 stride, 1 padd, 3x3 kernel, 2 kernels) -> 28x28x2
        #         pooling
        #             pool of 2x2 => 28x28 / 2 => 14x14 now (technically, 14x14x2 right!)
        #         second layer
        #             14x14x2 -> convolution (1 stride, 1 padd, 3x3 kernel, 4 kernels) -> 14x14x4
        #         pooling
        #             pool of 2x2 => 14x14 / 2 => 7x7 now (technically, 7x7x4 right!)
        self.extract = nn.Sequential( # lets make a 2D convolution layer
                                      nn.Conv2d( in_channels = size, out_channels = 2, 
                                                 kernel_size = 3, stride = 1, padding = 1), 
                                                     # in_channels = 1 for MNIST and 3 for RGB image
                                                     # out_channels = 2 means 2 shared weights/features
                                                     # kernel_size = 3 means a 3x3 size kernel
                                                     # stride = 1 means move one pixel at a time in each dim
                                                     # padding = adds one pixel of zeros to each side of each dim
                                                     #           note, thats what keeps our spatial dims the same for a 3x3 kernel
                                                     #           it also lets us process each location, even that border!!!
                                      # its a NN, lets run a non-linearity on each of those results!
                                      nn.ReLU(inplace = True),
                                                     # could also use torch.nn.Sigmoid or etc.
                                                     # inplace means don't have to return a result, do it on the data
                                      # ----------------------------------------------------------- 
                                      # !!! hey, we just made a layer of convolution/nonlin !!!
                                      # ----------------------------------------------------------- 
                                      # lets pool using a 2x2 region that is not overlapping
                                      nn.MaxPool2d(2),                                                  
                                      # lets do dropout with a small percentage/rate               
                                      nn.Dropout(0.1),
                                      # ----------------------------------------------------------- 
                                      # now, lets make another layer of convolution, pooling, and drop out
                                      nn.Conv2d( in_channels = 2, out_channels = 4, 
                                                 kernel_size = 3, stride = 1, padding = 1),
                                                 # in_channels here needs to match out_channels above
                                                 # lets use 5 filters 
                                      nn.ReLU(inplace = True),
                                      nn.MaxPool2d(2),
                                      nn.Dropout(0.1), )

        # ok, now we are going to make a simple MLP classifier on the end of our above features
        self.decimate = nn.Sequential( nn.Linear(4*(7*7), 12),  
                                            # take our 4 filters whose response fields are 7x7 to 12 neurons
                                       nn.ReLU(inplace = True), # run a nonlinearity
                                       nn.Dropout(0.2), # some drop out
                                       nn.Linear(12, num_classes) ) # map the 32 down to our number of output classes
 
    #----------------------------
    # Model: Invoke Forward Pass
    #----------------------------

    def forward(self, x):

        features = self.extract(x) # easy, pass input (x) to our "feature extraction" above
        features = features.view(features.size()[0], -1) # now, flatten 7x7x4 matrix to 1D array of 7*7*4 size
        myresult = self.decimate(features) # pass that to our MLP classifier, and done!!!

        return myresult

Next, lets load our training data set for MNIST

In [None]:
import os
import torch
import numpy as np
from torchvision import datasets
import torchvision

# nice built in functions for common data sets 
#  go read https://pytorch.org/docs/stable/torchvision/datasets.html
train = datasets.MNIST( root = './', # where to download data set to
                       train = True, # If True, creates dataset from training.pt, otherwise from test.pt
                       transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor()]), # convert data into tensor vs PIL image 
                       download = True)

How many data points and lets look at an image

In [None]:
print("[num of images, image x size, image y size]")
print(train.data.shape)

print("what type of data is it?")
print(type(train.data[0]))

print("what is min and max values?")
print(torch.max(train.data[0]))
print(torch.min(train.data[0]))

# lets plot it
import matplotlib.pyplot as plt
plt.imshow(train.data[0])

If you want to work with validation or test data, follow

    valid = datasets.MNIST( root = './', train = False, download = True)
    test = datasets.MNIST( root = './', train = False, download = True)

Lets now make a data loader object to hold onto our data that we can use for batch processing and stuff

In [None]:
import torch.utils.data as tech 

# how big of batches do you guys/gals want?
batch_size = 16

# our data loader that we will use to manage our data
train_ld = tech.DataLoader(dataset = train, shuffle = True, batch_size = batch_size)       

Again, you can work with validation and test data as well
    
    valid = tech.DataLoader(dataset = valid, shuffle = False, batch_size = batch_size)      
    test = tech.DataLoader(dataset = test, shuffle = False, batch_size = 1) 

Lets create an instance of our CNN

In [None]:
input_size = 1 # just 1 band for MNIST
num_classes = 10 # we have 10 classes in MNIST
model = CNN(input_size, num_classes) 

Now, pick optimization algorithm and error function

In [None]:
learning_rate = 0.0001
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)
costfx = torch.nn.CrossEntropyLoss()

Training time!

In [None]:
from tqdm import tqdm_notebook as tqdm

num_epochs = 1                                        
for epoch in range(num_epochs): # how many epochs? 
    
    epoch_loss = [] # keep track of our loss?
    
    for batch_id, train_params in enumerate(tqdm(train_ld)):  # lets grab a bunch of mini-batches from our training data set
        
        # samples are our images, labels are their class labels
        samples, labels = train_params
        
        # we need to convert these into tensors
        samples = samples.type('torch.FloatTensor') 
        labels = labels.type('torch.LongTensor')  

        # lets predict (forward pass)
        prediction = model(samples)
        # evaluate our error
        loss = costfx(prediction, labels)
        # keep track of that loss
        epoch_loss.append(loss.item())
        # zero our gradients
        optimizer.zero_grad()  
        # calc our gradients
        loss.backward()     
        # do our update
        optimizer.step()
    
    # keep track of loss over our batches
    epoch_loss = sum(epoch_loss)/len(epoch_loss)  

Save the model

In [None]:
torch.save(model, './my_mnist_model.pt') 

Render the filters

In [None]:
for i in range(2):
    plt.figure()
    plt.imshow( np.squeeze( model.extract[0].weight[i,:,:,:].detach().numpy() ) )

Next, lets scrub all the junk in our net that was needed at training time, like dropout

In [None]:
model = model.eval()

Lets do resub, load back up a data point and see how we did ...

In [None]:
%matplotlib inline
from tqdm import tqdm_notebook as tqdm

# resub because we are loading our MNIST training data set
test = tech.DataLoader(dataset = train, shuffle = False, batch_size = 1) 

# how did we do...
ConfusionMatrix = torch.zeros((10,10))
for sample, label in tqdm(test):
    # what is its label?
    label = int(label.numpy())
    #print("Real label is")
    #print(label)
    # convert the sample (image) to a tensor for PyTorch
    sample = sample.type('torch.FloatTensor')
    # do forward pass (i.e., prediction)
    prediction = model(sample) 
    # take the largest output and return integer of which it was (make a classification decision)
    prediction = int(torch.argmax(prediction).numpy())
    # what was our prediction?
    #print(prediction)
    ConfusionMatrix[label,prediction] = ConfusionMatrix[label,prediction] + 1

Lets plot a confusion matrix (see https://seaborn.pydata.org/generated/seaborn.heatmap.html)

In [None]:
import seaborn as sn  # yes, I had to "conda install seaborn"
import pandas as pd
import matplotlib.pyplot as plt

df_cm = pd.DataFrame(np.asarray(ConfusionMatrix), index = [i for i in "0123456789"],
                  columns = [i for i in "0123456789"])
plt.figure(figsize = (10,7))
sn.heatmap(df_cm, annot=True)
plt.show()

Lets do it on the test data (not seen before)

In [None]:
test = datasets.MNIST( root = './', 
                       train = False, 
                       transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor()]), # convert data into tensor vs PIL image                       
                       download = True)

# resub because we are loading our MNIST training data set
test2 = tech.DataLoader(dataset = test, shuffle = False, batch_size = 1) 

# how did we do...
ConfusionMatrix = torch.zeros((10,10))
for sample, label in tqdm(test2):
    # what is its label?
    label = int(label.numpy())
    #print("Real label is")
    #print(label)
    # convert the sample (image) to a tensor for PyTorch
    sample = sample.type('torch.FloatTensor')
    # do forward pass (i.e., prediction)
    prediction = model(sample) 
    # take the largest output and return integer of which it was (make a classification decision)
    prediction = int(torch.argmax(prediction).numpy())
    # what was our prediction?
    #print(prediction)
    ConfusionMatrix[label,prediction] = ConfusionMatrix[label,prediction] + 1
    
df_cm = pd.DataFrame(np.asarray(ConfusionMatrix), index = [i for i in "0123456789"],
                  columns = [i for i in "0123456789"])
plt.figure(figsize = (10,7))
sn.heatmap(df_cm, annot=True)
plt.show()