# AI6121 Computer Vision Course Project

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import time
import torch
import torchvision
from torch import nn
from torchvision import transforms

torch.backends.cudnn.benchmark = True
if torch.cuda.is_available():
  dev = torch.device('cuda')
else:
  dev = torch.device('cpu')
dev

## Load MNIST Dataset

In [None]:
train_dataset = torchvision.datasets.MNIST(root = '', download = True, transform = transforms.ToTensor())
test_dataset = torchvision.datasets.MNIST(root = '', download = True, train = False, transform = transforms.ToTensor())

In [None]:
display(train_dataset.data.shape)
display(test_dataset.data.shape)
display(train_dataset.targets.shape)
display(test_dataset.targets.shape)

## Utils

In [None]:
def calculate_padding(input_size, kernel_size = 3, stride = 1):
  # Based on formula Output_Size = [(Input_Size - Kernel_Size + 2 * Padding)/Stride] + 1
  return int(((input_size - 1) * stride + kernel_size - input_size) / 2)

class AverageMeter(object):
  # Code from https://github.com/pytorch/examples/blob/master/imagenet/main.py
  """Computes and stores the average and current value"""
  def __init__(self, name, fmt=':f'):
    self.name = name
    self.fmt = fmt
    self.reset()

  def reset(self):
    self.val = 0
    self.avg = 0
    self.sum = 0
    self.count = 0

  def update(self, val, n=1):
    self.val = val
    self.sum += val * n
    self.count += n
    self.avg = self.sum / self.count

  def __str__(self):
    fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
    return fmtstr.format(**self.__dict__)

class ProgressMeter(object):
  # Code from https://github.com/pytorch/examples/blob/master/imagenet/main.py
  def __init__(self, num_batches, meters, prefix=""):
    self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
    self.meters = meters
    self.prefix = prefix

  def display(self, batch):
    entries = [self.prefix + self.batch_fmtstr.format(batch)]
    entries += [str(meter) for meter in self.meters]
    print('\t'.join(entries))

  def _get_batch_fmtstr(self, num_batches):
    num_digits = len(str(num_batches // 1))
    fmt = '{:' + str(num_digits) + 'd}'
    return '[' + fmt + '/' + fmt.format(num_batches) + ']'

def accuracy(output, target, topk=(1,)):
  # Code from https://github.com/pytorch/examples/blob/master/imagenet/main.py
  """Computes the accuracy over the k top predictions for the specified values of k"""
  with torch.no_grad():
    maxk = max(topk)
    batch_size = target.size(0)

    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
      correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
      res.append(correct_k.mul_(100.0 / batch_size))
    return res

def train(train_loader, model, criterion, optimizer, epoch, show_progress = True):
  # Code from https://github.com/pytorch/examples/blob/master/imagenet/main.py
  batch_time = AverageMeter('Time', ':6.3f')
  data_time = AverageMeter('Data', ':6.3f')
  losses = AverageMeter('Loss', ':.4e')
  top1 = AverageMeter('Acc@1', ':6.2f')
  top5 = AverageMeter('Acc@5', ':6.2f')
  losses_ = []
  top1_ = []
  top5_ = []
  progress = ProgressMeter(
    len(train_loader),
    [batch_time, data_time, losses, top1, top5],
    prefix="Epoch: [{}] Train:".format(epoch))

  # switch to train mode
  model.train()

  end = time.time()
  for i, (images, target) in enumerate(train_loader):
    # measure data loading time
    data_time.update(time.time() - end)

    images = images.to(dev, non_blocking=True)
    target = target.to(dev, non_blocking=True)

    # compute output
    output = model(images)
    loss = criterion(output, target)

    # measure accuracy and record loss
    acc1, acc5 = accuracy(output, target, topk=(1, 5))
    losses.update(loss.item(), images.size(0))
    top1.update(acc1[0], images.size(0))
    top5.update(acc5[0], images.size(0))
    losses_.append(loss.item())
    top1_.append(acc1)
    top5_.append(acc5)

    # compute gradient and do SGD step
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # measure elapsed time
    batch_time.update(time.time() - end)
    end = time.time()

    if show_progress:
      progress.display(i)

  return losses_, top1_, top5_

def validate(val_loader, model, criterion, epoch, show_progress = True):
  # Code from https://github.com/pytorch/examples/blob/master/imagenet/main.py
  batch_time = AverageMeter('Time', ':6.3f')
  losses = AverageMeter('Loss', ':.4e')
  top1 = AverageMeter('Acc@1', ':6.2f')
  top5 = AverageMeter('Acc@5', ':6.2f')
  losses_ = []
  top1_ = []
  top5_ = []
  progress = ProgressMeter(
    len(val_loader),
    [batch_time, losses, top1, top5],
    prefix="Epoch: [{}] Test:".format(epoch))

  # switch to evaluate mode
  model.eval()

  with torch.no_grad():
    end = time.time()
    for i, (images, target) in enumerate(val_loader):
      images = images.to(dev, non_blocking=True)
      target = target.to(dev, non_blocking=True)

      # compute output
      output = model(images)
      loss = criterion(output, target)

      # measure accuracy and record loss
      acc1, acc5 = accuracy(output, target, topk=(1, 5))
      losses.update(loss.item(), images.size(0))
      top1.update(acc1[0], images.size(0))
      top5.update(acc5[0], images.size(0))
      losses_.append(loss.item())
      top1_.append(acc1)
      top5_.append(acc5)

      # measure elapsed time
      batch_time.update(time.time() - end)
      end = time.time()

    if show_progress:
      progress.display(i)

    # TODO: this should also be done with the ProgressMeter
    print('Epoch {epoch:d} * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'.format(epoch=epoch, top1=top1, top5=top5))

  return losses_, top1_, top5_

def train_epochs(net, bs, criterion, optimizer, epochs, start_epoch = 1, shuffle = True, num_workers = 0, scheduler = None, show_progress = True):
  train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size = bs,
    shuffle = shuffle,
    num_workers = num_workers,
    pin_memory = True)

  val_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size = bs,
    num_workers = num_workers,
    pin_memory = True)

  train_losses = []
  train_top1 = []
  train_top5 = []
  val_losses = []
  val_top1 = []
  val_top5 = []

  for epoch in range(start_epoch, epochs + 1):
    log = []
    a, b, c = train(train_loader, net, criterion, optimizer, epoch, show_progress)
    train_losses.append(a)
    train_top1.append(b)
    train_top5.append(c)
    a, b,c = validate(val_loader, net, criterion, epoch, show_progress)
    val_losses.append(a)
    val_top1.append(b)
    val_top5.append(c)
    if scheduler != None:
      scheduler.step()

  return train_losses, train_top1, train_top5, val_losses, val_top1, val_top5

## Basic Convolutional Neural Network

In [None]:
class SimpleCNN(nn.Module):

  def __init__(self, in_channels, input_size, hidden_size, output_size, kernel_size = 3, stride = 1):
    super(SimpleCNN, self).__init__()
    self.conv1 = nn.Conv2d(in_channels = in_channels, out_channels = hidden_size, kernel_size = kernel_size, stride = 1, padding = calculate_padding(input_size))
    self.conv2 = nn.Conv2d(in_channels = hidden_size, out_channels = hidden_size * 2, kernel_size = kernel_size, stride = 1, padding = calculate_padding(input_size / 2))
    self.linear = nn.Linear(in_features = int(((input_size/4)**2) * (hidden_size*2)), out_features = output_size)
    self.relu = nn.ReLU()
    self.pool2d = nn.MaxPool2d(kernel_size = 2, stride = 2)

  def forward(self, x):
    x = self.conv1(x)
    x = self.relu(x)
    x = self.pool2d(x)
    x = self.conv2(x)
    x = self.relu(x)
    x = self.pool2d(x)
    x = x.view(-1, self.linear.in_features)
    x = self.linear(x)
    return x

In [None]:
net = SimpleCNN(1, 28, 50, 10).to(dev)
net

In [None]:
epochs = 1
bs = 128
criterion = nn.CrossEntropyLoss().to(dev)
optimizer = torch.optim.SGD(net.parameters(), lr)
A = train_epochs(net, bs, criterion, optimizer, epochs, show_progress = False)

In [None]:
x_label = np.arange(1, len(A[1][0]) + 1)
fig, ax = plt.subplots(figsize = (20,10))
ax.plot(x_label, A[1][0], label = 'SimpleCNN')
ax.set_xlabel('Iteration')
ax.set_ylabel('Accuracy')
plt.show()
fig.savefig('SimpleCNN.png')

## Learning Rate

In [None]:
epochs = 1
bs = 128
criterion = nn.CrossEntropyLoss().to(dev)

lr = 0.01
net = SimpleCNN(1, 28, 50, 10).to(dev)
optimizer = torch.optim.SGD(net.parameters(), lr)
A = train_epochs(net, bs, criterion, optimizer, epochs, show_progress = False)

lr = 0.1
net = SimpleCNN(1, 28, 50, 10).to(dev)
optimizer = torch.optim.SGD(net.parameters(), lr)
B = train_epochs(net, bs, criterion, optimizer, epochs, show_progress = False)

lr = 1.0
net = SimpleCNN(1, 28, 50, 10).to(dev)
optimizer = torch.optim.SGD(net.parameters(), lr)
C = train_epochs(net, bs, criterion, optimizer, epochs, show_progress = False)

lr = 10
net = SimpleCNN(1, 28, 50, 10).to(dev)
optimizer = torch.optim.SGD(net.parameters(), lr)
D = train_epochs(net, bs, criterion, optimizer, epochs, show_progress = False)

In [None]:
x_label = np.arange(1, len(A[1][0]) + 1)
fig, ax = plt.subplots(figsize = (20,10))
ax.plot(x_label, A[1][0], label = 'LR = 0.01')
ax.plot(x_label, B[1][0], label = 'LR = 0.1')
ax.plot(x_label, C[1][0], label = 'LR = 1.0')
ax.plot(x_label, D[1][0], label = 'LR = 10')
ax.set_xlabel('Iteration')
ax.set_ylabel('Accuracy')
ax.legend()
plt.show()
fig.savefig('Learning Rate.png')

## Batch Size

In [None]:
epochs = 1
lr = 0.1
criterion = nn.CrossEntropyLoss().to(dev)

bs = 128
net = SimpleCNN(1, 28, 50, 10).to(dev)
optimizer = torch.optim.SGD(net.parameters(), lr)
A = train_epochs(net, bs, criterion, optimizer, epochs, show_progress = False)

bs = 256
net = SimpleCNN(1, 28, 50, 10).to(dev)
optimizer = torch.optim.SGD(net.parameters(), lr)
B = train_epochs(net, bs, criterion, optimizer, epochs, show_progress = False)

bs = 512
net = SimpleCNN(1, 28, 50, 10).to(dev)
optimizer = torch.optim.SGD(net.parameters(), lr)
C = train_epochs(net, bs, criterion, optimizer, epochs, show_progress = False)

bs = 2048
net = SimpleCNN(1, 28, 50, 10).to(dev)
optimizer = torch.optim.SGD(net.parameters(), lr)
D = train_epochs(net, bs, criterion, optimizer, epochs, show_progress = False)

In [None]:
fig, ax = plt.subplots(figsize = (20,10))
ax.plot(np.arange(1, len(A[1][0]) + 1), A[1][0], label = 'BS = 32')
ax.plot(np.arange(1, len(B[1][0]) + 1), B[1][0], label = 'BS = 64')
ax.plot(np.arange(1, len(C[1][0]) + 1), C[1][0], label = 'BS = 128')
ax.plot(np.arange(1, len(D[1][0]) + 1), D[1][0], label = 'BS = 2048')
ax.set_xlabel('Iteration')
ax.set_ylabel('Accuracy')
ax.legend()
fig.savefig('Batch Size.png')

## Weighted CrossEntropyLoss vs Unbalanced CrossEntropyLoss

In [None]:
weight = torch.Tensor([0.506500084,
                       0.444971818,
                       0.503524673,
                       0.489316588,
                       0.513522766,
                       0.553403431,
                       0.506928016,
                       0.478850758,
                       0.512732866,
                       0.504286435])

weight = torch.Tensor([1.013000169,
                       0.889943637,
                       1.007049345,
                       0.978633176,
                       1.027045532,
                       1.106806862,
                       1.013856032,
                       0.957701516,
                       1.025465732,
                       1.008572869])

epochs = 1
lr = 0.1
bs = 128

net = SimpleCNN(1, 28, 50, 10).to(dev)
criterion = nn.CrossEntropyLoss().to(dev)
optimizer = torch.optim.SGD(net.parameters(), lr)
A = train_epochs(net, bs, criterion, optimizer, epochs, show_progress = False)

net = SimpleCNN(1, 28, 50, 10).to(dev)
criterion = nn.CrossEntropyLoss(weight = weight).to(dev)
optimizer = torch.optim.SGD(net.parameters(), lr)
B = train_epochs(net, bs, criterion, optimizer, epochs, show_progress = False)

In [None]:
fig, ax = plt.subplots(figsize = (20,10))
ax.plot(np.arange(1, len(A[1][0]) + 1), A[1][0], label = 'Unbalanced CrossEntropyLoss')
ax.plot(np.arange(1, len(B[1][0]) + 1), B[1][0], label = 'Weighted CrossEntropyLoss')
ax.set_xlabel('Iteration')
ax.set_ylabel('Accuracy')
ax.legend()
fig.savefig('Loss Functions.png')

## SGD with Momentum

In [None]:
epochs = 1
lr = 0.1
bs = 128
criterion = nn.CrossEntropyLoss().to(dev)

net = SimpleCNN(1, 28, 50, 10).to(dev)
optimizer = torch.optim.SGD(net.parameters(), lr, momentum = 0.0)
A = train_epochs(net, bs, criterion, optimizer, epochs, show_progress = False)

net = SimpleCNN(1, 28, 50, 10).to(dev)
optimizer = torch.optim.SGD(net.parameters(), lr, momentum = 0.1)
B = train_epochs(net, bs, criterion, optimizer, epochs, show_progress = False)

net = SimpleCNN(1, 28, 50, 10).to(dev)
optimizer = torch.optim.SGD(net.parameters(), lr, momentum = 0.9)
C = train_epochs(net, bs, criterion, optimizer, epochs, show_progress = False)

net = SimpleCNN(1, 28, 50, 10).to(dev)
optimizer = torch.optim.SGD(net.parameters(), lr, momentum = 5.0)
D = train_epochs(net, bs, criterion, optimizer, epochs, show_progress = False)

In [None]:
fig, ax = plt.subplots(figsize = (20,10))
ax.plot(np.arange(1, len(A[1][0]) + 1), A[1][0], label = 'Momentum = 0.0')
ax.plot(np.arange(1, len(B[1][0]) + 1), B[1][0], label = 'Momentum = 0.1')
ax.plot(np.arange(1, len(C[1][0]) + 1), C[1][0], label = 'Momentum = 0.9')
ax.plot(np.arange(1, len(D[1][0]) + 1), D[1][0], label = 'Momentum = 5.0')
ax.set_xlabel('Iteration')
ax.set_ylabel('Accuracy')
ax.legend()
fig.savefig('Momentum.png')

## L2 Regularization

In [None]:
epochs = 1
lr = 0.1
bs = 128
criterion = nn.CrossEntropyLoss().to(dev)

net = SimpleCNN(1, 28, 50, 10).to(dev)
optimizer = torch.optim.SGD(net.parameters(), lr, weight_decay = 0.0)
A = train_epochs(net, bs, criterion, optimizer, epochs, show_progress = False)

net = SimpleCNN(1, 28, 50, 10).to(dev)
optimizer = torch.optim.SGD(net.parameters(), lr, weight_decay = 0.01)
B = train_epochs(net, bs, criterion, optimizer, epochs, show_progress = False)

net = SimpleCNN(1, 28, 50, 10).to(dev)
optimizer = torch.optim.SGD(net.parameters(), lr, weight_decay = 0.1)
C = train_epochs(net, bs, criterion, optimizer, epochs, show_progress = False)

net = SimpleCNN(1, 28, 50, 10).to(dev)
optimizer = torch.optim.SGD(net.parameters(), lr, weight_decay = 1.0)
D = train_epochs(net, bs, criterion, optimizer, epochs, show_progress = False)

In [None]:
fig, ax = plt.subplots(figsize = (20,10))
ax.plot(np.arange(1, len(A[1][0]) + 1), A[1][0], label = 'Weight Decay = 0.0')
ax.plot(np.arange(1, len(B[1][0]) + 1), B[1][0], label = 'Weight Decay = 0.01')
ax.plot(np.arange(1, len(C[1][0]) + 1), C[1][0], label = 'Weight Decay = 0.1')
ax.plot(np.arange(1, len(D[1][0]) + 1), D[1][0], label = 'Weight Decay = 1.0')
ax.set_xlabel('Iteration')
ax.set_ylabel('Accuracy')
ax.legend()
fig.savefig('L2 Regularization.png')

## Optimizer

In [None]:
epochs = 1
bs = 128
criterion = nn.CrossEntropyLoss().to(dev)

lr = 0.1
net = SimpleCNN(1, 28, 50, 10).to(dev)
optimizer = torch.optim.SGD(net.parameters(), lr, momentum = 0.9)
A = train_epochs(net, bs, criterion, optimizer, epochs, show_progress = False)

lr = 0.01
net = SimpleCNN(1, 28, 50, 10).to(dev)
optimizer = torch.optim.Adagrad(net.parameters(), lr)
B = train_epochs(net, bs, criterion, optimizer, epochs, show_progress = False)

lr = 0.001
net = SimpleCNN(1, 28, 50, 10).to(dev)
optimizer = torch.optim.Adam(net.parameters(), lr)
C = train_epochs(net, bs, criterion, optimizer, epochs, show_progress = False)

In [None]:
fig, ax = plt.subplots(figsize = (20,10))
ax.plot(np.arange(1, len(A[1][0]) + 1), A[1][0], label = 'SGD + Momentum + Decay')
ax.plot(np.arange(1, len(B[1][0]) + 1), B[1][0], label = 'Adagrad + Weight Decay')
ax.plot(np.arange(1, len(C[1][0]) + 1), C[1][0], label = 'Adam + Weight Decay')
ax.set_xlabel('Iteration')
ax.set_ylabel('Accuracy')
ax.legend()
fig.savefig('Optimizer.png')

## Network Width

In [None]:
epochs = 1
lr = 0.1
bs = 128
criterion = nn.CrossEntropyLoss().to(dev)

net = SimpleCNN(1, 28, 5, 10).to(dev)
optimizer = torch.optim.SGD(net.parameters(), lr)
A = train_epochs(net, bs, criterion, optimizer, epochs, show_progress = False)

net = SimpleCNN(1, 28, 10, 10).to(dev)
optimizer = torch.optim.SGD(net.parameters(), lr)
B = train_epochs(net, bs, criterion, optimizer, epochs, show_progress = False)

net = SimpleCNN(1, 28, 50, 10).to(dev)
optimizer = torch.optim.SGD(net.parameters(), lr)
C = train_epochs(net, bs, criterion, optimizer, epochs, show_progress = False)

net = SimpleCNN(1, 28, 100, 10).to(dev)
optimizer = torch.optim.SGD(net.parameters(), lr)
D = train_epochs(net, bs, criterion, optimizer, epochs, show_progress = False)

In [None]:
fig, ax = plt.subplots(figsize = (20,10))
ax.plot(np.arange(1, len(A[1][0]) + 1), A[1][0], label = 'Conv1 Channels = 5')
ax.plot(np.arange(1, len(B[1][0]) + 1), B[1][0], label = 'Conv1 Channels = 10')
ax.plot(np.arange(1, len(C[1][0]) + 1), C[1][0], label = 'Conv1 Channels = 50')
ax.plot(np.arange(1, len(D[1][0]) + 1), D[1][0], label = 'Conv1 Channels = 100')
ax.set_xlabel('Iteration')
ax.set_ylabel('Accuracy')
ax.legend()
fig.savefig('Network Width')

## Network Depth

In [None]:
class Depth1(nn.Module):

  def __init__(self, in_channels, input_size, hidden_size, output_size, kernel_size = 3, stride = 1):
    super(Depth1, self).__init__()
    self.conv1 = nn.Conv2d(in_channels = in_channels, out_channels = hidden_size, kernel_size = kernel_size, stride = 1, padding = calculate_padding(input_size))
    self.linear = nn.Linear(in_features = int(((input_size/2)**2) * hidden_size), out_features = output_size)
    self.relu = nn.ReLU()
    self.pool2d = nn.MaxPool2d(kernel_size = 2, stride = 2)

  def forward(self, x):
    x = self.conv1(x)
    x = self.relu(x)
    x = self.pool2d(x)
    x = x.view(-1, self.linear.in_features)
    x = self.linear(x)
    return x

class Depth3(nn.Module):

  def __init__(self, in_channels, input_size, hidden_size, output_size, kernel_size = 3, stride = 1):
    super(Depth3, self).__init__()
    self.conv1 = nn.Conv2d(in_channels = in_channels, out_channels = hidden_size, kernel_size = kernel_size, stride = 1, padding = calculate_padding(input_size))
    self.conv2 = nn.Conv2d(in_channels = hidden_size, out_channels = hidden_size * 2, kernel_size = kernel_size, stride = 1, padding = calculate_padding(input_size / 2))
    self.conv3 = nn.Conv2d(in_channels = hidden_size * 2, out_channels = hidden_size * 4, kernel_size = kernel_size, stride = 1, padding = calculate_padding(input_size / 4))
    self.linear = nn.Linear(in_features = int((np.floor(input_size/8)**2) * (hidden_size*4)), out_features = output_size)
    self.relu = nn.ReLU()
    self.pool2d = nn.MaxPool2d(kernel_size = 2, stride = 2)

  def forward(self, x):
    x = self.conv1(x)
    x = self.relu(x)
    x = self.pool2d(x)
    x = self.conv2(x)
    x = self.relu(x)
    x = self.pool2d(x)
    x = self.conv3(x)
    x = self.relu(x)
    x = self.pool2d(x)
    x = x.view(-1, self.linear.in_features)
    x = self.linear(x)
    return x

In [None]:
epochs = 1
lr = 0.1
bs = 128
criterion = nn.CrossEntropyLoss().to(dev)

net1 = Depth1(1, 28, 50, 10).to(dev)
optimizer = torch.optim.SGD(net1.parameters(), lr)
A = train_epochs(net1, bs, criterion, optimizer, epochs, show_progress = False)

net2 = SimpleCNN(1, 28, 50, 10).to(dev)
optimizer = torch.optim.SGD(net2.parameters(), lr)
B = train_epochs(net2, bs, criterion, optimizer, epochs, show_progress = False)

net3 = Depth3(1, 28, 50, 10).to(dev)
optimizer = torch.optim.SGD(net3.parameters(), lr)
C = train_epochs(net3, bs, criterion, optimizer, epochs, show_progress = False)

In [None]:
fig, ax = plt.subplots(figsize = (20,10))
ax.plot(np.arange(1, len(A[1][0]) + 1), A[1][0], label = 'Conv Layers = 1')
ax.plot(np.arange(1, len(B[1][0]) + 1), B[1][0], label = 'Conv Layers = 2')
ax.plot(np.arange(1, len(C[1][0]) + 1), C[1][0], label = 'Conv Layers = 3')
ax.set_xlabel('Iteration')
ax.set_ylabel('Accuracy')
ax.legend()
fig.savefig('Network Depth by Convolutional Layers')

## Network Depth by Fully Connected Layers

In [None]:
class Depth2(nn.Module):

  def __init__(self, in_channels, input_size, hidden_size, output_size, kernel_size = 3, stride = 1):
    super(Depth2, self).__init__()
    self.conv1 = nn.Conv2d(in_channels = in_channels, out_channels = hidden_size, kernel_size = kernel_size, stride = 1, padding = calculate_padding(input_size))
    self.conv2 = nn.Conv2d(in_channels = hidden_size, out_channels = hidden_size * 2, kernel_size = kernel_size, stride = 1, padding = calculate_padding(input_size / 2))
    self.linear1 = nn.Linear(in_features = int(((input_size/4)**2) * (hidden_size*2)), out_features = int(((input_size/4)**2) * hidden_size))
    self.linear2 = nn.Linear(in_features = self.linear1.out_features, out_features = output_size)
    self.relu = nn.ReLU()
    self.pool2d = nn.MaxPool2d(kernel_size = 2, stride = 2)

  def forward(self, x):
    x = self.conv1(x)
    x = self.relu(x)
    x = self.pool2d(x)
    x = self.conv2(x)
    x = self.relu(x)
    x = self.pool2d(x)
    x = x.view(-1, self.linear1.in_features)
    x = self.linear1(x)
    x = self.linear2(x)
    return x

class Depth3(nn.Module):

  def __init__(self, in_channels, input_size, hidden_size, output_size, kernel_size = 3, stride = 1):
    super(Depth3, self).__init__()
    self.conv1 = nn.Conv2d(in_channels = in_channels, out_channels = hidden_size, kernel_size = kernel_size, stride = 1, padding = calculate_padding(input_size))
    self.conv2 = nn.Conv2d(in_channels = hidden_size, out_channels = hidden_size * 2, kernel_size = kernel_size, stride = 1, padding = calculate_padding(input_size / 2))
    self.linear1 = nn.Linear(in_features = int(((input_size/4)**2) * (hidden_size*2)), out_features = int(((input_size/4)**2) * hidden_size))
    self.linear2 = nn.Linear(in_features = self.linear1.out_features, out_features = int(self.linear1.out_features/2))
    self.linear3 = nn.Linear(in_features = self.linear2.out_features, out_features = output_size)
    self.relu = nn.ReLU()
    self.pool2d = nn.MaxPool2d(kernel_size = 2, stride = 2)

  def forward(self, x):
    x = self.conv1(x)
    x = self.relu(x)
    x = self.pool2d(x)
    x = self.conv2(x)
    x = self.relu(x)
    x = self.pool2d(x)
    x = x.view(-1, self.linear1.in_features)
    x = self.linear1(x)
    x = self.linear2(x)
    x = self.linear3(x)
    return x

In [None]:
epochs = 1
lr = 0.1
bs = 128
criterion = nn.CrossEntropyLoss().to(dev)

net1 = SimpleCNN(1, 28, 50, 10).to(dev)
optimizer = torch.optim.SGD(net1.parameters(), lr)
A = train_epochs(net1, bs, criterion, optimizer, epochs, show_progress = False)

net2 = Depth2(1, 28, 50, 10).to(dev)
optimizer = torch.optim.SGD(net2.parameters(), lr)
B = train_epochs(net2, bs, criterion, optimizer, epochs, show_progress = False)

net3 = Depth3(1, 28, 50, 10).to(dev)
optimizer = torch.optim.SGD(net3.parameters(), lr)
C = train_epochs(net3, bs, criterion, optimizer, epochs, show_progress = False)

In [None]:
fig, ax = plt.subplots(figsize = (20,10))
ax.plot(np.arange(1, len(A[1][0]) + 1), A[1][0], label = 'FC Layers = 1')
ax.plot(np.arange(1, len(B[1][0]) + 1), B[1][0], label = 'FC Layers = 2')
ax.plot(np.arange(1, len(C[1][0]) + 1), C[1][0], label = 'FC Layers = 3')
ax.set_xlabel('Iteration')
ax.set_ylabel('Accuracy')
ax.legend()
fig.savefig('Network Depth by Fully Connected Layers')

## ResNet

In [None]:
epochs = 1
lr = 0.1
bs = 128
criterion = nn.CrossEntropyLoss().to(dev)

resnet18 = torchvision.models.resnet18(pretrained = True)
resnet18.conv1 = nn.Conv2d(1, 64, kernel_size = 7, stride = 2, padding = 3, bias = False)
resnet18.to(dev)
optimizer = torch.optim.SGD(resnet18.parameters(), lr)
A = train_epochs(resnet18, bs, criterion, optimizer, epochs, show_progress = False)

resnet34 = torchvision.models.resnet18(pretrained = True)
resnet34.conv1 = nn.Conv2d(1, 64, kernel_size = 7, stride = 2, padding = 3, bias = False)
resnet34.to(dev)
optimizer = torch.optim.SGD(resnet34.parameters(), lr)
B = train_epochs(resnet34, bs, criterion, optimizer, epochs, show_progress = False)

resnet50 = torchvision.models.resnet18(pretrained = True)
resnet50.conv1 = nn.Conv2d(1, 64, kernel_size = 7, stride = 2, padding = 3, bias = False)
resnet50.to(dev)
optimizer = torch.optim.SGD(resnet50.parameters(), lr)
C = train_epochs(resnet50, bs, criterion, optimizer, epochs, show_progress = False)

resnet101 = torchvision.models.resnet18(pretrained = True)
resnet101.conv1 = nn.Conv2d(1, 64, kernel_size = 7, stride = 2, padding = 3, bias = False)
resnet101.to(dev)
optimizer = torch.optim.SGD(resnet101.parameters(), lr)
D = train_epochs(resnet101, bs, criterion, optimizer, epochs, show_progress = False)

In [None]:
fig, ax = plt.subplots(figsize = (20,10))
ax.plot(np.arange(1, len(A[1][0]) + 1), A[1][0], label = 'ResNet18')
ax.plot(np.arange(1, len(B[1][0]) + 1), B[1][0], label = 'ResNet34')
ax.plot(np.arange(1, len(C[1][0]) + 1), C[1][0], label = 'ResNet50')
ax.plot(np.arange(1, len(D[1][0]) + 1), D[1][0], label = 'ResNet101')
ax.set_xlabel('Iteration')
ax.set_ylabel('Accuracy')
ax.legend()
fig.savefig('ResNet')

## Possible Improvements: DropOut Layers

In [None]:
class DropOut(nn.Module):

  def __init__(self, in_channels, input_size, hidden_size, output_size, kernel_size = 3, stride = 1):
    super(DropOut, self).__init__()
    self.conv1 = nn.Conv2d(in_channels = in_channels, out_channels = hidden_size, kernel_size = kernel_size, stride = 1, padding = calculate_padding(input_size))
    self.conv2 = nn.Conv2d(in_channels = hidden_size, out_channels = hidden_size * 2, kernel_size = kernel_size, stride = 1, padding = calculate_padding(input_size / 2))
    self.linear1 = nn.Linear(in_features = int(((input_size/4)**2) * (hidden_size*2)), out_features = int(((input_size/4)**2) * hidden_size))
    self.linear2 = nn.Linear(in_features = self.linear1.out_features, out_features = output_size)
    self.relu = nn.ReLU()
    self.pool2d = nn.MaxPool2d(kernel_size = 2, stride = 2)
    self.dropout = nn.Dropout()

  def forward(self, x):
    x = self.conv1(x)
    x = self.relu(x)
    x = self.pool2d(x)
    x = self.conv2(x)
    x = self.relu(x)
    x = self.pool2d(x)
    x = x.view(-1, self.linear1.in_features)
    x = self.dropout(x)
    x = self.linear1(x)
    x = self.dropout(x)
    x = self.linear2(x)
    return x

In [None]:
epochs = 1
lr = 0.1
bs = 128
criterion = nn.CrossEntropyLoss().to(dev)

net1 = SimpleCNN(1, 28, 50, 10).to(dev)
optimizer = torch.optim.SGD(net1.parameters(), lr)
A = train_epochs(net1, bs, criterion, optimizer, epochs, show_progress = False)

net2 = DropOut(1, 28, 50, 10).to(dev)
optimizer = torch.optim.SGD(net2.parameters(), lr)
B = train_epochs(net2, bs, criterion, optimizer, epochs, show_progress = False)

net3 = DropOut(1, 28, 50, 10).to(dev)
optimizer = torch.optim.SGD(net3.parameters(), lr, momentum = 0.9, weight_decay = 0.01)
C = train_epochs(net3, bs, criterion, optimizer, epochs, show_progress = False)

epochs = 10
net4 = DropOut(1, 28, 100, 10).to(dev)
optimizer = torch.optim.SGD(net4.parameters(), lr, momentum = 0.9)
D = train_epochs(net4, bs, criterion, optimizer, epochs, show_progress = False)

In [None]:
fig, ax = plt.subplots(figsize = (20,10))
ax.plot(np.arange(1, len(A[1][0]) + 1), A[1][0], label = 'Vanilla')
ax.plot(np.arange(1, len(B[1][0]) + 1), B[1][0], label = 'Dropout')
ax.plot(np.arange(1, len(C[1][0]) + 1), C[1][0], label = 'Dropout + Momentum + Weight Decay')
ax.plot(np.arange(1, len(D[1][9]) + 1), D[1][9], label = '100 Channels + Momentum + 10 Epochs')
ax.set_xlabel('Iteration')
ax.set_ylabel('Accuracy')
ax.legend()
fig.savefig('Improvements')