# Assignment 2 Part 2 - Convolutional Neural Network

## (2) Experimenting

In [None]:
%run "Assignment2-2-1.ipynb" import DigitClassifier, train, eval

### (0) Getting a baseline

In [None]:
model = DigitClassifier()
train(model)
eval(model)

This baseline uses all the default parameters defined in the other file.

### (1) Adjusting the hyperparameters such as learning rate, number of training epochs, convolutional kernel size, stride, pooling strategy etc.

#### Epochs

In [None]:
for num_epochs in [2, 4, 6]:
    print()
    print(f"Epochs: {num_epochs}", end="\n\n")
    model = DigitClassifier()
    train(model, num_epochs=num_epochs)
    eval(model)

Increasing the number of epochs improves training accuracy, but eventually leads to overfitting issues. Furthermore, the performance impact is very high. I will stay with the baseline value of 4 epochs.

In [None]:
NUM_EPOCHS = 4

#### Learning rate

In [None]:
for learning_rate in [0.1, 0.01, 0.001, 0.0001]:
    print()
    print(f"Learning rate: {learning_rate}", end="\n\n")
    model = DigitClassifier()
    train(model, num_epochs=NUM_EPOCHS, learning_rate=learning_rate)
    eval(model)


A lower learning rate seems to bring about better accuracy, so I will stick with the baseline learning rate of 0.001.

In [None]:
LEARNING_RATE = 0.001

#### Kernel size

In [None]:
for kernel_size in [3, 5, 8]:
    print()
    print(f"Kernel size: {kernel_size}", end="\n\n")
    model = DigitClassifier(kernel_size=kernel_size)
    train(model, learning_rate=LEARNING_RATE, num_epochs=NUM_EPOCHS)
    eval(model)

A higher kernel size captures higher level information more reliably. This is generally what we would like here. For this problem, a kernel size of 5 seems best.

In [None]:
KERNEL_SIZE = 5

#### Stride

In [None]:
for stride in [1, 2, 3]:
    print()
    print(f"Stride: {stride}", end="\n\n")
    model = DigitClassifier(kernel_size=KERNEL_SIZE, stride=stride)
    train(model, learning_rate=LEARNING_RATE, num_epochs=NUM_EPOCHS)
    eval(model)

A higher stride seems to lead to poorer accuracy. I will stay with a stride value of 1.

In [None]:
STRIDE = 1

#### Pooling strategy

In [None]:
import torch.nn as nn

for pooling_strategy in [nn.MaxPool2d(2), nn.AvgPool2d(2)]:
    print()
    print(f"Pooling strategy: {pooling_strategy}", end="\n\n")
    model = DigitClassifier(kernel_size=KERNEL_SIZE, stride=STRIDE, pooling_strategy=pooling_strategy)
    train(model, learning_rate=LEARNING_RATE, num_epochs=NUM_EPOCHS)
    eval(model)

Based on the experiment, it seems average pooling yields a slightly better result.

In [None]:
POOLING_STRATEGY = nn.AvgPool2d(2)

### (2) Changing the number of feature maps

In [None]:
for feature_maps in [8, 16, 24]:
    print()
    print(f"Number of feature maps: {feature_maps}", end="\n\n")
    model = DigitClassifier(kernel_size=KERNEL_SIZE, stride=STRIDE, pooling_strategy=POOLING_STRATEGY, feature_maps=feature_maps)
    train(model, learning_rate=LEARNING_RATE, num_epochs=NUM_EPOCHS)
    eval(model)

Based on the experimental results, I will go with 8 for the number of feature maps.

In [None]:
FEATURE_MAPS = 8

### (3) Including an additional convolutional layer

In [None]:
print(f"Baseline", end="\n\n")
model = DigitClassifier(kernel_size=KERNEL_SIZE, stride=STRIDE, pooling_strategy=POOLING_STRATEGY, feature_maps=FEATURE_MAPS)
train(model, learning_rate=LEARNING_RATE, num_epochs=NUM_EPOCHS)
eval(model)

print()
print(f"Additional convolutional layer", end="\n\n")
model = DigitClassifier(kernel_size=KERNEL_SIZE, stride=STRIDE, pooling_strategy=POOLING_STRATEGY, feature_maps=FEATURE_MAPS, additional_conv=True)
train(model, learning_rate=LEARNING_RATE, num_epochs=NUM_EPOCHS)
eval(model)

The performance impact of adding another convolutional layer is significant. But the predictive accuracy is increased, so I will go with another layer.

In [None]:
ADDITIONAL_CONV_LAYER = True

### (4) Adding an extra fully connected layer

In [None]:
print(f"Baseline", end="\n\n")
model = DigitClassifier(kernel_size=KERNEL_SIZE, stride=STRIDE, pooling_strategy=POOLING_STRATEGY, feature_maps=FEATURE_MAPS, additional_conv=ADDITIONAL_CONV_LAYER)
train(model, learning_rate=LEARNING_RATE, num_epochs=NUM_EPOCHS)
eval(model)

print()
print(f"Additional fully connected layer", end="\n\n")
model = DigitClassifier(kernel_size=KERNEL_SIZE, stride=STRIDE, pooling_strategy=POOLING_STRATEGY, feature_maps=FEATURE_MAPS, additional_conv=ADDITIONAL_CONV_LAYER, additional_fc=True)
train(model, learning_rate=LEARNING_RATE, num_epochs=NUM_EPOCHS)
eval(model)

This does not seem to bring any noticeable benefit, so I will not add another fully connected layer.

In [None]:
ADDITIONAL_FC_LAYER = False