# Problem 3

Use this notebook to write your code for problem 3.

In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

## 3D - Convolutional network

As in problem 2, we have conveniently provided for your use code that loads and preprocesses the MNIST data.

In [2]:
# load MNIST data into PyTorch format
import torch
import torchvision
import torchvision.transforms as transforms

# set batch size
batch_size = 32

# load training data downloaded into data/ folder
mnist_training_data = torchvision.datasets.MNIST('data/', train=True, download=True,
                                                transform=transforms.ToTensor())
# transforms.ToTensor() converts batch of images to 4-D tensor and normalizes 0-255 to 0-1.0
training_data_loader = torch.utils.data.DataLoader(mnist_training_data,
                                                  batch_size=batch_size,
                                                  shuffle=True)

# load test data
mnist_test_data = torchvision.datasets.MNIST('data/', train=False, download=True,
                                                transform=transforms.ToTensor())
test_data_loader = torch.utils.data.DataLoader(mnist_test_data,
                                                  batch_size=batch_size,
                                                  shuffle=False)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 108758076.59it/s]


Extracting data/MNIST/raw/train-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 44453465.62it/s]


Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 47010742.74it/s]


Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 12725804.12it/s]

Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/raw






In [3]:
# look at the number of batches per epoch for training and validation
print(f'{len(training_data_loader)} training batches')
print(f'{len(training_data_loader) * batch_size} training samples')
print(f'{len(test_data_loader)} validation batches')

1875 training batches
60000 training samples
313 validation batches


In [None]:
# sample model
import torch.nn as nn

model = nn.Sequential(
    nn.Conv2d(1, 8, kernel_size=(3,3)),
    nn.ReLU(),
    nn.MaxPool2d(2),
    nn.Dropout(p=0.5),

    nn.Conv2d(8, 8, kernel_size=(3,3)),
    nn.ReLU(),
    nn.MaxPool2d(2),
    nn.Dropout(p=0.5),

    nn.Flatten(),
    nn.Linear(25*8, 64),
    nn.ReLU(),
    nn.Linear(64, 10)
    # PyTorch implementation of cross-entropy loss includes softmax layer
)

In [None]:
# why don't we take a look at the shape of the weights for each layer
for p in model.parameters():
    print(p.data.shape)

torch.Size([8, 1, 3, 3])
torch.Size([8])
torch.Size([8, 8, 3, 3])
torch.Size([8])
torch.Size([64, 200])
torch.Size([64])
torch.Size([10, 64])
torch.Size([10])


In [None]:
# our model has some # of parameters:
count = 0
for p in model.parameters():
    n_params = np.prod(list(p.data.shape)).item()
    count += n_params
print(f'total params: {count}')

total params: 14178


In [None]:
# For a multi-class classification problem
import torch.optim as optim
criterion = nn.CrossEntropyLoss()
optimizer = optim.RMSprop(model.parameters())

In [None]:
# Train the model for 10 epochs, iterating on the data in batches
n_epochs = 10

# store metrics
training_accuracy_history = np.zeros([n_epochs, 1])
training_loss_history = np.zeros([n_epochs, 1])
validation_accuracy_history = np.zeros([n_epochs, 1])
validation_loss_history = np.zeros([n_epochs, 1])

for epoch in range(n_epochs):
    print(f'Epoch {epoch+1}/10:', end='')
    train_total = 0
    train_correct = 0
    # train
    model.train()
    for i, data in enumerate(training_data_loader):
        images, labels = data
        optimizer.zero_grad()
        # forward pass
        output = model(images)
        # calculate categorical cross entropy loss
        loss = criterion(output, labels)
        # backward pass
        loss.backward()
        optimizer.step()

        # track training accuracy
        _, predicted = torch.max(output.data, 1)
        train_total += labels.size(0)
        train_correct += (predicted == labels).sum().item()
        # track training loss
        training_loss_history[epoch] += loss.item()
        # progress update after 180 batches (~1/10 epoch for batch size 32)
        if i % 180 == 0: print('.',end='')
    training_loss_history[epoch] /= len(training_data_loader)
    training_accuracy_history[epoch] = train_correct / train_total
    print(f'\n\tloss: {training_loss_history[epoch,0]:0.4f}, acc: {training_accuracy_history[epoch,0]:0.4f}',end='')

    # validate
    test_total = 0
    test_correct = 0
    with torch.no_grad():
        model.eval()
        for i, data in enumerate(test_data_loader):
            images, labels = data
            # forward pass
            output = model(images)
            # find accuracy
            _, predicted = torch.max(output.data, 1)
            test_total += labels.size(0)
            test_correct += (predicted == labels).sum().item()
            # find loss
            loss = criterion(output, labels)
            validation_loss_history[epoch] += loss.item()
        validation_loss_history[epoch] /= len(test_data_loader)
        validation_accuracy_history[epoch] = test_correct / test_total
    print(f', val loss: {validation_loss_history[epoch,0]:0.4f}, val acc: {validation_accuracy_history[epoch,0]:0.4f}')

Epoch 1/10:...........
	loss: 0.7707, acc: 0.7498, val loss: 0.1970, val acc: 0.9395
Epoch 2/10:...........
	loss: 0.4323, acc: 0.8638, val loss: 0.1714, val acc: 0.9510
Epoch 3/10:...........
	loss: 0.4024, acc: 0.8753, val loss: 0.1528, val acc: 0.9566
Epoch 4/10:...........
	loss: 0.3935, acc: 0.8793, val loss: 0.1412, val acc: 0.9591
Epoch 5/10:...........
	loss: 0.3900, acc: 0.8809, val loss: 0.1615, val acc: 0.9581
Epoch 6/10:...........
	loss: 0.3850, acc: 0.8804, val loss: 0.1517, val acc: 0.9577
Epoch 7/10:...........
	loss: 0.3826, acc: 0.8806, val loss: 0.1188, val acc: 0.9652
Epoch 8/10:...........
	loss: 0.3817, acc: 0.8829, val loss: 0.1522, val acc: 0.9602
Epoch 9/10:...........
	loss: 0.3808, acc: 0.8845, val loss: 0.1270, val acc: 0.9689
Epoch 10/10:...........
	loss: 0.3671, acc: 0.8895, val loss: 0.1549, val acc: 0.9615


Above, we output the training loss/accuracy as well as the validation loss and accuracy. Not bad! Let's see if you can do better.

# Problem G

### Dropout probabilities

In [None]:
import torch.nn as nn
import torch.optim as optim

dropout_probabilities = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]

for i in range(11):
  print(f'Dropout probabilty: {dropout_probabilities[i]}')

  # define model with specific dropout probability
  model = nn.Sequential(
      nn.Conv2d(1, 8, kernel_size=(3,3)),
      nn.ReLU(),
      nn.MaxPool2d(2),
      nn.Dropout(p=dropout_probabilities[i]),

      nn.Conv2d(8, 8, kernel_size=(3,3)),
      nn.ReLU(),
      nn.MaxPool2d(2),
      nn.Dropout(p=dropout_probabilities[i]),

      nn.Flatten(),
      nn.Linear(25*8, 64),
      nn.ReLU(),
      nn.Linear(64, 10)
      # PyTorch implementation of cross-entropy loss includes softmax layer
  )

  # For a multi-class classification problem
  criterion = nn.CrossEntropyLoss()
  optimizer = optim.RMSprop(model.parameters())

  # Train the model for 10 epochs, iterating on the data in batches
  n_epochs = 1

  # store metrics
  training_accuracy_history = np.zeros([n_epochs, 1])
  training_loss_history = np.zeros([n_epochs, 1])
  validation_accuracy_history = np.zeros([n_epochs, 1])
  validation_loss_history = np.zeros([n_epochs, 1])

  for epoch in range(1):
      print(f'Epoch {epoch+1}:', end='')
      train_total = 0
      train_correct = 0
      # train
      model.train()
      for i, data in enumerate(training_data_loader):
          images, labels = data
          optimizer.zero_grad()
          # forward pass
          output = model(images)
          # calculate categorical cross entropy loss
          loss = criterion(output, labels)
          # backward pass
          loss.backward()
          optimizer.step()

          # track training accuracy
          _, predicted = torch.max(output.data, 1)
          train_total += labels.size(0)
          train_correct += (predicted == labels).sum().item()
          # track training loss
          training_loss_history[epoch] += loss.item()
          # progress update after 180 batches (~1/10 epoch for batch size 32)
          if i % 180 == 0: print('.',end='')
      training_loss_history[epoch] /= len(training_data_loader)
      training_accuracy_history[epoch] = train_correct / train_total
      print(f'\n\tloss: {training_loss_history[epoch,0]:0.4f}, acc: {training_accuracy_history[epoch,0]:0.4f}',end='')

      # validate
      test_total = 0
      test_correct = 0
      with torch.no_grad():
          model.eval()
          for i, data in enumerate(test_data_loader):
              images, labels = data
              # forward pass
              output = model(images)
              # find accuracy
              _, predicted = torch.max(output.data, 1)
              test_total += labels.size(0)
              test_correct += (predicted == labels).sum().item()
              # find loss
              loss = criterion(output, labels)
              validation_loss_history[epoch] += loss.item()
          validation_loss_history[epoch] /= len(test_data_loader)
          validation_accuracy_history[epoch] = test_correct / test_total
      print(f', val loss: {validation_loss_history[epoch,0]:0.4f}, val acc: {validation_accuracy_history[epoch,0]:0.4f}')

Dropout probabilty: 0
Epoch 1:...........
	loss: 0.2960, acc: 0.9077, val loss: 0.1808, val acc: 0.9444
Dropout probabilty: 0.1
Epoch 1:...........
	loss: 0.3631, acc: 0.8872, val loss: 0.1230, val acc: 0.9618
Dropout probabilty: 0.2
Epoch 1:...........
	loss: 0.3841, acc: 0.8849, val loss: 0.1038, val acc: 0.9686
Dropout probabilty: 0.3
Epoch 1:...........
	loss: 0.4281, acc: 0.8760, val loss: 0.1305, val acc: 0.9602
Dropout probabilty: 0.4
Epoch 1:...........
	loss: 1.2842, acc: 0.5237, val loss: 0.1925, val acc: 0.9497
Dropout probabilty: 0.5
Epoch 1:...........
	loss: 0.7564, acc: 0.7462, val loss: 0.2887, val acc: 0.9313
Dropout probabilty: 0.6
Epoch 1:...........
	loss: 0.9391, acc: 0.6875, val loss: 0.3984, val acc: 0.8884
Dropout probabilty: 0.7
Epoch 1:...........
	loss: 1.3519, acc: 0.5323, val loss: 0.6488, val acc: 0.8746
Dropout probabilty: 0.8
Epoch 1:...........
	loss: 1.3765, acc: 0.5319, val loss: 0.9178, val acc: 0.7946
Dropout probabilty: 0.9
Epoch 1:...........
	los

In [None]:
import torch.nn as nn
import torch.optim as optim


# define model with specific dropout probability
model = nn.Sequential(
    nn.Conv2d(1, 16, kernel_size=(3,3)),
    nn.BatchNorm2d(16),
    nn.ReLU(),
    nn.Conv2d(16, 8, kernel_size=(3,3)),
    nn.ReLU(),
    nn.MaxPool2d(2),
    nn.Dropout(p=0.1),

    nn.Conv2d(8, 8, kernel_size=(3,3)),
    nn.BatchNorm2d(8),
    nn.ReLU(),
    nn.MaxPool2d(2),
    nn.Dropout(p=0.1),

    nn.Flatten(),
    nn.Linear(25*8, 64),
    nn.ReLU(),
    nn.Linear(64, 10)
    # PyTorch implementation of cross-entropy loss includes softmax layer
)

In [None]:
# our model has some # of parameters:
count = 0
for p in model.parameters():
    n_params = np.prod(list(p.data.shape)).item()
    count += n_params
print(f'total params: {count}')

total params: 15466


In [None]:
# For a multi-class classification problem
criterion = nn.CrossEntropyLoss()
optimizer = optim.RMSprop(model.parameters())

# Train the model for 10 epochs, iterating on the data in batches
n_epochs = 10

# store metrics
training_accuracy_history = np.zeros([n_epochs, 1])
training_loss_history = np.zeros([n_epochs, 1])
validation_accuracy_history = np.zeros([n_epochs, 1])
validation_loss_history = np.zeros([n_epochs, 1])

for epoch in range(10):
    print(f'Epoch {epoch+1}/10:', end='')
    train_total = 0
    train_correct = 0
    # train
    model.train()
    for i, data in enumerate(training_data_loader):
        images, labels = data
        optimizer.zero_grad()
        # forward pass
        output = model(images)
        # calculate categorical cross entropy loss
        loss = criterion(output, labels)
        # backward pass
        loss.backward()
        optimizer.step()

        # track training accuracy
        _, predicted = torch.max(output.data, 1)
        train_total += labels.size(0)
        train_correct += (predicted == labels).sum().item()
        # track training loss
        training_loss_history[epoch] += loss.item()
        # progress update after 180 batches (~1/10 epoch for batch size 32)
        if i % 180 == 0: print('.',end='')
    training_loss_history[epoch] /= len(training_data_loader)
    training_accuracy_history[epoch] = train_correct / train_total
    print(f'\n\tloss: {training_loss_history[epoch,0]:0.4f}, acc: {training_accuracy_history[epoch,0]:0.4f}',end='')

    # validate
    test_total = 0
    test_correct = 0
    with torch.no_grad():
        model.eval()
        for i, data in enumerate(test_data_loader):
            images, labels = data
            # forward pass
            output = model(images)
            # find accuracy
            _, predicted = torch.max(output.data, 1)
            test_total += labels.size(0)
            test_correct += (predicted == labels).sum().item()
            # find loss
            loss = criterion(output, labels)
            validation_loss_history[epoch] += loss.item()
        validation_loss_history[epoch] /= len(test_data_loader)
        validation_accuracy_history[epoch] = test_correct / test_total
    print(f', val loss: {validation_loss_history[epoch,0]:0.4f}, val acc: {validation_accuracy_history[epoch,0]:0.4f}')

Epoch 1/10:...........
	loss: 0.3093, acc: 0.9100, val loss: 0.0895, val acc: 0.9732
Epoch 2/10:...........
	loss: 0.1232, acc: 0.9629, val loss: 0.0885, val acc: 0.9692
Epoch 3/10:...........
	loss: 0.1029, acc: 0.9683, val loss: 0.0917, val acc: 0.9705
Epoch 4/10:...........
	loss: 0.0945, acc: 0.9711, val loss: 0.0622, val acc: 0.9811
Epoch 5/10:...........
	loss: 0.0891, acc: 0.9741, val loss: 0.0571, val acc: 0.9822
Epoch 6/10:...........
	loss: 0.0826, acc: 0.9751, val loss: 0.0496, val acc: 0.9849
Epoch 7/10:...........
	loss: 0.0805, acc: 0.9762, val loss: 0.0551, val acc: 0.9833
Epoch 8/10:...........
	loss: 0.0769, acc: 0.9771, val loss: 0.0554, val acc: 0.9830
Epoch 9/10:...........
	loss: 0.0776, acc: 0.9771, val loss: 0.0513, val acc: 0.9853
Epoch 10/10:...........
	loss: 0.0749, acc: 0.9773, val loss: 0.0505, val acc: 0.9850
