**Number classifier on MNIST dataset using convolutional neurons**

In [None]:
import torch
from torchvision import datasets # get the MNIST dataset
import torchvision.transforms as transforms
import numpy as np # view images
import matplotlib.pyplot as plt # allows us to plot the images

In [None]:
batch_size = 20 # how many samples in each batch of the data
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using {}".format(device))
# convert data to a PyTorch tensor
transform = transforms.ToTensor()

# choose the training and test datasets
train_data = datasets.MNIST(root='data', train=True,
                                   download=True, transform=transform)
test_data = datasets.MNIST(root='data', train=False,
                                  download=True, transform=transform)

# prepare data loaders, which download the dataset to train on
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size,
    num_workers=0)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, 
    num_workers=0)

In [None]:
%matplotlib inline 
# command for python notebooks

# obtain one batch of training images
dataiter = iter(train_loader)
images, labels = dataiter.next()
images = images.numpy()

# plot the images in the batch, along with the corresponding labels
fig = plt.figure(figsize=(25, 4))
for idx in np.arange(20): # get 20 images
    ax = fig.add_subplot(2, 20/2, idx+1, xticks=[], yticks=[])
    ax.imshow(np.squeeze(images[idx]), cmap='gray') # add the black and white images
    # print out the correct label for each image
    # .item() gets the value contained in a Tensor
    ax.set_title(str(labels[idx].item()))

In [None]:
import torch.nn as nn
## Define the NN architecture
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # Convolutional Layer
        self.cnn = nn.Sequential(
                # We can calculate the output size of each convolutional layer using the following formula:
                # [(in_channel + 2*padding - kernel_size) / stride] + 1
                # you can derive this if you so choose but for the purposes of this intersession, this will do.
                # We have in_channels=1 because MNIST only has one color
                nn.Conv2d(in_channels=1, out_channels=12, kernel_size=3, padding=1, stride=1),
                nn.Sigmoid(),
                nn.MaxPool2d(kernel_size=2),
                nn.Conv2d(in_channels=12, out_channels=24, kernel_size=3, padding=1, stride=1),
                nn.Sigmoid(),
                nn.MaxPool2d(kernel_size=2) 
        )
        # Linear layer
        self.linear = nn.Sequential(
                # We have the output_channel=24 of our second conv layer, and 7*7 is derived by the formula
                # which is the output of each convolutional layer
                nn.Linear(in_features=24*7*7, out_features=64),          
                nn.Sigmoid(),
                nn.Dropout(p=0.2),
                nn.Linear(64, 10)
        )

    # Defining the forward pass 
    def forward(self, x):
        x = x.to(device)
        x = self.cnn(x)
        # After we get the output of our convolutional layer we must flatten it or rearrange the output into a vector
        x = x.view(x.size(0), -1)
        # Then pass it through the linear layer
        x = self.linear(x)
        return x.to(device)

# initialize the NN
model = Net()
model = model.to(device)
print(model) # see the model structure

In [None]:
criterion = nn.CrossEntropyLoss() # error function to use for optimization
optimizer = torch.optim.SGD(model.parameters(), lr=0.01) # use gradient descent

In [None]:
# number of epochs to train the model
epochs = 15

model.train() # prep model for training (PyTorch handles things like weight init, dropout, etc.)

for epoch in range(epochs):
    # monitor training loss
    train_loss = 0.0
    for data, target in train_loader:
        target = target.to(device)
        # clear the gradients of all optimized variables
        optimizer.zero_grad()
        # forward prop: compute predicted outputs by passing inputs to the model
        output = model(data).to(device)
        # calculate the loss
        loss = criterion(output, target)
        # backprop: compute gradient of the loss with respect to model parameters
        loss.backward()
        # perform a single gradient descent step
        optimizer.step()
        # update running training loss
        train_loss += loss.item()*data.size(0)
        
    # print training statistics 
    # calculate average loss over an epoch
    train_loss = train_loss/len(train_loader.dataset)

    print('Epoch: {} \tTraining Loss: {:.6f}'.format(
        epoch+1, 
        train_loss
        ))



Now that our model has been trained, let's see what it can do!

In [None]:
# obtain one batch of test images
dataiter = iter(test_loader)
images, labels = dataiter.next()
images, labels = images.to(device), labels.to(device)

# perform prediction
output = model(images).to(device)
# convert output probabilities to predicted class
_, preds = torch.max(output, 1)
# prep images for display
images = images.cpu().numpy()

# plot the images in the batch, along with predicted and true labels
fig = plt.figure(figsize=(25, 4))
for idx in np.arange(20):
    ax = fig.add_subplot(2, 20/2, idx+1, xticks=[], yticks=[])
    ax.imshow(np.squeeze(images[idx]), cmap='gray')
    ax.set_title("{} (correct is {})".format(str(preds[idx].item()), str(labels[idx].item())),
                 color=("green" if preds[idx]==labels[idx] else "red")) # if the prediction is correct, set the label green, otherwise, set it to be red