# Problem 3

Use this notebook to write your code for problem 3.

In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
import torch

print(torch.cuda.is_available())
print(torch.cuda.current_device())
print(torch.cuda.get_device_name(0))
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

True
0
GeForce GTX 1050 Ti with Max-Q Design


## 3D - Convolutional network

As in problem 2, we have conveniently provided for your use code that loads and preprocesses the MNIST data.

In [3]:
# load MNIST data into PyTorch format
import torch
import torchvision
import torchvision.transforms as transforms


# set batch size
batch_size = 32

# load training data downloaded into data/ folder
mnist_training_data = torchvision.datasets.MNIST('data/', train=True, download=True,
                                                transform=transforms.ToTensor())
# transforms.ToTensor() converts batch of images to 4-D tensor and normalizes 0-255 to 0-1.0
training_data_loader = torch.utils.data.DataLoader(mnist_training_data,
                                                  batch_size=batch_size,
                                                  shuffle=True)

# load test data
mnist_test_data = torchvision.datasets.MNIST('data/', train=False, download=True,
                                                transform=transforms.ToTensor())
test_data_loader = torch.utils.data.DataLoader(mnist_test_data,
                                                  batch_size=batch_size,
                                                  shuffle=False)

In [4]:
# look at the number of batches per epoch for training and validation
print(f'{len(training_data_loader)} training batches')
print(f'{batch_size} samples in each batch')
print(f'{len(training_data_loader) * batch_size} total training samples')
print(f'{len(test_data_loader)} validation batches')

1875 training batches
32 samples in each batch
60000 total training samples
313 validation batches


In [5]:
# sample model
import torch.nn as nn

model = nn.Sequential(
    nn.Conv2d(1, 70, kernel_size=(3,3)),
    nn.BatchNorm2d(70),
    nn.ReLU(),
    nn.MaxPool2d((2,2)),
    nn.Dropout(p=0.2),
    
    nn.Conv2d(70, 70, kernel_size=(3,3)),
    nn.BatchNorm2d(70),
    nn.ReLU(),
    nn.MaxPool2d((2,2)),
    nn.Dropout(p=0.2),
    
    nn.Flatten(),
    nn.Linear(25*70, 70),
    nn.ReLU(),
    nn.Linear(70, 10)
    # PyTorch implementation of cross-entropy loss includes softmax layer
)

In [6]:
# why don't we take a look at the shape of the weights for each layer
for p in model.parameters():
    print(p.data.shape)

torch.Size([70, 1, 3, 3])
torch.Size([70])
torch.Size([70])
torch.Size([70])
torch.Size([70, 70, 3, 3])
torch.Size([70])
torch.Size([70])
torch.Size([70])
torch.Size([70, 1750])
torch.Size([70])
torch.Size([10, 70])
torch.Size([10])


In [7]:
# our model has some # of parameters:
count = 0
for p in model.parameters():
    n_params = np.prod(list(p.data.shape)).item()
    count += n_params
print(f'total params: {count}')

total params: 168430


In [8]:
# For a multi-class classification problem
import torch.optim as optim
criterion = nn.CrossEntropyLoss()
optimizer = optim.RMSprop(model.parameters())

# Final test accuracy when trained for 10 epochs

In [9]:
# Train the model for 10 epochs, iterating on the data in batches
n_epochs = 10

# store metrics
training_accuracy_history = np.zeros([n_epochs, 1])
training_loss_history = np.zeros([n_epochs, 1])
validation_accuracy_history = np.zeros([n_epochs, 1])
validation_loss_history = np.zeros([n_epochs, 1])

for epoch in range(n_epochs):
    print(f'Epoch {epoch+1}/10:', end='')
    train_total = 0
    train_correct = 0
    # train
    model.train()
    for i, data in enumerate(training_data_loader):
        images, labels = data
        optimizer.zero_grad()
        # forward pass
        output = model(images)
        # calculate categorical cross entropy loss
        loss = criterion(output, labels)
        # backward pass
        loss.backward()
        optimizer.step()
        
        # track training accuracy
        _, predicted = torch.max(output.data, 1)
        train_total += labels.size(0)
        train_correct += (predicted == labels).sum().item()
        # track training loss
        training_loss_history[epoch] += loss.item()
        # progress update after 180 batches (~1/10 epoch for batch size 32)
        if i % 180 == 0: print('.',end='')
    training_loss_history[epoch] /= len(training_data_loader)
    training_accuracy_history[epoch] = train_correct / train_total
    print(f'\n\tloss: {training_loss_history[epoch,0]:0.4f}, acc: {training_accuracy_history[epoch,0]:0.4f}',end='')
        
    # validate
    test_total = 0
    test_correct = 0
    with torch.no_grad():
        model.eval()
        for i, data in enumerate(test_data_loader):
            images, labels = data
            # forward pass
            output = model(images)
            # find accuracy
            _, predicted = torch.max(output.data, 1)
            test_total += labels.size(0)
            test_correct += (predicted == labels).sum().item()
            # find loss
            loss = criterion(output, labels)
            validation_loss_history[epoch] += loss.item()
        validation_loss_history[epoch] /= len(test_data_loader)
        validation_accuracy_history[epoch] = test_correct / test_total
    print(f', val loss: {validation_loss_history[epoch,0]:0.4f}, val acc: {validation_accuracy_history[epoch,0]:0.4f}')

Epoch 1/10:...........
	loss: 0.5945, acc: 0.8859, val loss: 0.0622, val acc: 0.9799
Epoch 2/10:...........
	loss: 0.0962, acc: 0.9719, val loss: 0.0487, val acc: 0.9849
Epoch 3/10:...........
	loss: 0.0805, acc: 0.9772, val loss: 0.0506, val acc: 0.9855
Epoch 4/10:...........
	loss: 0.0744, acc: 0.9796, val loss: 0.0463, val acc: 0.9853
Epoch 5/10:...........
	loss: 0.0694, acc: 0.9803, val loss: 0.0483, val acc: 0.9863
Epoch 6/10:...........
	loss: 0.0669, acc: 0.9810, val loss: 0.0610, val acc: 0.9833
Epoch 7/10:...........
	loss: 0.0663, acc: 0.9817, val loss: 0.0680, val acc: 0.9816
Epoch 8/10:...........
	loss: 0.0630, acc: 0.9831, val loss: 0.0515, val acc: 0.9861
Epoch 9/10:...........
	loss: 0.0649, acc: 0.9822, val loss: 0.0583, val acc: 0.9844
Epoch 10/10:...........
	loss: 0.0612, acc: 0.9837, val loss: 0.0391, val acc: 0.9893


# Test accuracy for the 10 dropout probabilities

In [12]:
p_s = np.linspace(0, 1, num = 10)

for p in p_s:
    model = nn.Sequential(
        nn.Conv2d(1, 64, kernel_size=(3,3)),
        nn.BatchNorm2d(64),
        nn.ReLU(),
        nn.MaxPool2d((2,2)),
        nn.Dropout(p= p),

        nn.Conv2d(64, 64, kernel_size=(3,3)),
        nn.BatchNorm2d(64),
        nn.ReLU(),
        nn.MaxPool2d((2,2)),
        nn.Dropout(p= p),

        nn.Flatten(),
        nn.Linear(25*64, 64),
        nn.ReLU(),
        nn.Linear(64, 10)
        # PyTorch implementation of cross-entropy loss includes softmax layer
    )
    print(f'p = {p}:', end='')
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.RMSprop(model.parameters())
    # store metrics
    training_accuracy = 0
    training_loss = 0
    validation_accuracy = 0
    validation_loss = 0

    train_total = 0
    train_correct = 0
    # train
    model.train()
    for i, data in enumerate(training_data_loader):
        images, labels = data
        optimizer.zero_grad()
        # forward pass
        output = model(images)
        # calculate categorical cross entropy loss
        loss = criterion(output, labels)
        # backward pass
        loss.backward()
        optimizer.step()

        # track training accuracy
        _, predicted = torch.max(output.data, 1)
        train_total += labels.size(0)
        train_correct += (predicted == labels).sum().item()
        # track training loss
        training_loss += loss.item()
        # progress update after 180 batches
        if i % 180 == 0: print('.',end='')
    training_loss /= len(training_data_loader)
    training_accuracy = train_correct / train_total
    print(f'\n\tloss: {training_loss:0.4f}, acc: {training_accuracy:0.4f}',end='')

    # validate
    test_total = 0
    test_correct = 0
    with torch.no_grad():
        model.eval()
        for i, data in enumerate(test_data_loader):
            images, labels = data
            # forward pass
            output = model(images)
            # find accuracy
            _, predicted = torch.max(output.data, 1)
            test_total += labels.size(0)
            test_correct += (predicted == labels).sum().item()
            # find loss
            loss = criterion(output, labels)
            validation_loss += loss.item()
        validation_loss /= len(test_data_loader)
        validation_accuracy = test_correct / test_total
    print(f', val loss: {validation_loss:0.4f}, val acc: {validation_accuracy:0.4f}')


p = 0.0:...........
	loss: 0.4287, acc: 0.9413, val loss: 0.0652, val acc: 0.9805
p = 0.1111111111111111:...........
	loss: 0.4703, acc: 0.8885, val loss: 0.0521, val acc: 0.9848
p = 0.2222222222222222:...........
	loss: 0.4737, acc: 0.9250, val loss: 0.0784, val acc: 0.9763
p = 0.3333333333333333:...........
	loss: 0.4459, acc: 0.9008, val loss: 0.0772, val acc: 0.9792
p = 0.4444444444444444:...........
	loss: 0.6755, acc: 0.8090, val loss: 0.0953, val acc: 0.9724
p = 0.5555555555555556:...........
	loss: 0.6432, acc: 0.8130, val loss: 0.1249, val acc: 0.9631
p = 0.6666666666666666:...........
	loss: 0.5966, acc: 0.8671, val loss: 0.0955, val acc: 0.9703
p = 0.7777777777777777:...........
	loss: 0.7372, acc: 0.7992, val loss: 0.1612, val acc: 0.9547
p = 0.8888888888888888:...........
	loss: 1.3541, acc: 0.5495, val loss: 0.3817, val acc: 0.9119
p = 1.0:...........
	loss: 2.3027, acc: 0.1084, val loss: 146.6301, val acc: 0.1011


Above, we output the training loss/accuracy as well as the validation loss and accuracy. Not bad! Let's see if you can do better.