In [12]:
%matplotlib inline 

import numpy as np
import torch as th
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim

import torch.nn as nn
import torch.nn.functional as F

import matplotlib.pyplot as plt

In [13]:
# Use GPU for computation if possible
device = th.device("cuda" if th.cuda.is_available() else "gpu")
device

device(type='cuda')

In [14]:
# Global Constants
NUM_EPOCHS = 10
NUM_WORKERS = 5
CLASSES = ["airplanes", "cars", "dog", "faces", "keyboard"]
LEARNING_RATE = 0.01

# Convolution Neural Network Properties

* **First hidden layer**: conv layer with filter size 7x7, stride 2, padding 3, 64 channels, followed by Batch Normalization and ReLu
* **Second hidden layer**: max pooling with filter size 3x3, stride 2 and padding 0
* **Third hidden layer**: conv layer with filter size 3x3, stride 1, padding 1, 64 channels, followed by Batch Normalization and Relu
* **Fourth hidden layer**:  max pooling with filter size 3x3, stride 2, padding 0

In [15]:
class ConvolutionNeuralNetwork(nn.Module):
  def __init__(self):
    super().__init__()
    self.conv1 = nn.Conv2d(in_channels=3, kernel_size=7, filter=64, stride=2, padding=3)
    self.pool1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=0)
    self.conv2 = nn.Conv2d(in_channels=3, kernel_size=3, stride=1, padding=1, filter=64)
    self.pool2 = nn.MaxPool2d(kernel_size=3, stride=2, padding=0)

    # Fully connected layer with output of 5 channels (classes)
    self.fc = nn.Linear(3 * 250 * 250, 5)

  def forwrd(self, x):
    x = F.relu(nn.BatchNorm2d(self.conv1))
    x = self.pool1(x)
    x = F.relu(nn.BatchNorm2d(self.conv2))
    x = self.pool2(x)
    x = self.fc(x)
    return x 

In [16]:
def lossAndOptimizer(net, learning_rate=LEARNING_RATE):
  criterion = nn.CrossEntropyLoss()
  optimizer = optim.Adam(net.parameters(), lr=learning_rate)
  return criterion, optimizer

In [17]:
class LinearClassifier(nn.Module):
  def __init__(self):
    super().__init__()
    self.linear = nn.Linear(250 * 250 * 3, 5)

  def forward(self, x):
    # Flatten input 3x250x250 -> 187500
    x = x.view(x.size(0), -1)
    return self.linear(x)