#Introduction to Machine Learning  
**Computer Vision Hackathon  
Wintersession  
Tuesday, January 24, 2023**

This notebook trains a simple CNN on the MNIST dataset. The code comes from a [PyTorch example on GitHub](https://github.com/pytorch/examples/blob/master/mnist/main.py).

# About Your Colab Session

Learn about the CPU-cores for your session:

In [None]:
cat /proc/cpuinfo

In [None]:
import os
num_cores = min(os.cpu_count(), 2)
print(num_cores)

Let's see which GPU we are using (probably a Tesla T4):

In [None]:
!nvidia-smi

# Data Preparation

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.optim.lr_scheduler import StepLR
from matplotlib import pyplot as plt

We want to use a GPU when one is available:

In [None]:
use_cuda = torch.cuda.is_available()
print(use_cuda)

In [None]:
torch.manual_seed(42)
device = torch.device("cuda") if use_cuda else torch.device("cpu")

train_kwargs = {'batch_size': 64}
test_kwargs  = {'batch_size': 1000}
if use_cuda:
    cuda_kwargs = {'num_workers': num_cores, 'pin_memory': True, 'shuffle': True}
    train_kwargs.update(cuda_kwargs)
    test_kwargs.update(cuda_kwargs)

In [None]:
transform=transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])
train_data = datasets.MNIST('/tmp', train=True, download=True,
                       transform=transform)
test_data = datasets.MNIST('/tmp', train=False,
                       transform=transform)
train_loader = torch.utils.data.DataLoader(train_data, **train_kwargs)
test_loader  = torch.utils.data.DataLoader(test_data, **test_kwargs)

In [None]:
# plot several random examples
figure = plt.figure(figsize=(8, 8))
cols, rows = 3, 3
for i in range(1, cols * rows + 1):
    sample_idx = torch.randint(len(train_data), size=(1,)).item()
    img, label = train_data[sample_idx]
    figure.add_subplot(rows, cols, i)
    plt.axis("off")
    plt.imshow(img.squeeze(), cmap="gray")
plt.show()

# Model Definition

Create a Python class called Net that derives from the nn.Module of PyTorch. The \_\_init__() method defines the network layers and regularization method while the forward method describes the forward pass.

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        # first convolutional layer
        self.conv1 = nn.Conv2d(in_channels=1,   # input image is greyscale, each pixel has 1 dimension
                               out_channels=32, # create 32 filters
                               kernel_size=3,   # each filter is 3x3x1
                               stride=1)        # slide the filters without making jumps
        # when you stack the feature maps, this outputs a 26x26x32 "image"

        # second convolutional layer
        self.conv2 = nn.Conv2d(in_channels=32,  # we have 32 feature maps (26x26x32) from the last Conv2d
                               out_channels=64, # create 64 filters 
                               kernel_size=3,   # each filter is 3x3x32
                               stride=1)        # slide the filters without making jumps
        # when you stack the feature maps, this outputs a 24x24x64 "image"

        # dropout randomly "drops out" a tensor so that the model doesn't overtrain
        self.dropout1 = nn.Dropout(0.25) 
        self.dropout2 = nn.Dropout(0.5) 

        # flattened images are passed to the NN (after pooling, 12x12x64=9216)
        self.fc1 = nn.Linear(in_features=9216,  # weights and biases 
                             out_features=128)
        self.fc2 = nn.Linear(in_features=128,
                             out_features=10)

    def forward(self, x):
        # apply convolutional layers
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, kernel_size=2)
        x = self.dropout1(x)
        # flatten and feed to a NN
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        output = F.log_softmax(x, dim=1) # log_softmax + nll_loss = cross entropy loss
        return output

Instantiate the network and move it to the device (which is a GPU when available). Create the optimizer.

In [None]:
model = Net().to(device)
optimizer = optim.Adadelta(model.parameters(), lr=1.0)

In [None]:
from torchsummary import summary
summary(model, input_size=(1, 28, 28))

# Train and Test Methods

In [None]:
def train(model, device, train_loader, optimizer, epoch):
    model.train() # sets the model in training mode (i.e., dropout enabled)
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

In [None]:
def test(model, device, test_loader):
    model.eval() # sets the model in evaluation mode (i.e., dropout disabled)
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

Train for some number of epochs while reporting the accuracy on the test set periodically:

In [None]:
epochs = 5
scheduler = StepLR(optimizer, step_size=1, gamma=0.7)
for epoch in range(1, epochs + 1):
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)
    scheduler.step()