### Import statements and data extraction


In [34]:
import h5py as h5py
import torch
import numpy as np
from torch.nn.parameter import Parameter
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

filename = 'MNIST_synthetic.h5'
f = h5py.File(filename, 'r')

train_dataset = f['train_dataset'][...]
train_labels = f['train_labels'][...]
test_dataset = f['test_dataset'][...]
f.close()

# if you have less cores or not a lot of RAM change the num_workers value
trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=4,
                                          shuffle=True, num_workers=6)

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset2 = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader2 = torch.utils.data.DataLoader(trainset2, batch_size=4,
                                          shuffle=True, num_workers=2)

### Pre processing data


In [35]:
# TODO: pre process data

# Convolutional neural network


In [36]:
# NOT DONE

# For building CNN layers, we have the following hyperparameters :
# Kernel size : Sets the size of the filter inside the layer. Kernel = Filter
# Out channels : Sets the number of filters. One filter produces one output channel.
# Out features : Sets the size of the output tensor
# Inside a convolutional layer, the input channels are paired with a convolutional filter
# to perform the convolutional operation. The filter convolves the input channel and
# the result of this operation is an output channel.

# Each layer in NN has 2 primary components : transformation and collection of weights
# transformation represented as code
# collection of weights represented as data
# pytorch layers are defined by classes
# Every pytorch nn.Module has a forward method that needs to be implemented
# When we're building layers, we need to implement forward. Forward is the transformation
# pytorch neural network

# CNN requires linear and convolution layers
# We should have different configs to determine which config is better
# e.g. 1 config with more conv layers, or different kernel sizes, etc
# Convolutional layers reduce memory usage and compute faster.
# Convolutional layers work better than fully connected ones because
# they are lighter and more efficient at learning spatial features.
# https://www.sicara.ai/blog/2019-10-31-convolutional-layer-convolution-kernel
# https://icecreamlabs.com/2018/08/19/3x3-convolution-filters%E2%80%8A-%E2%80%8Aa-popular-choice/

# we will use 3x3 for kernel size, also its faster to train than 5x5 kernel size
class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        # in_channels 1 because our data is grayscale? it's 56000x64x64x1
        # our input image is 1x64x64
        # kernel_size being 3 means that we're doing 3x3 square convolution
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=3) # image dimension is 3xNxN
        # output has dimensions 6x62x62
        # with max_pool it's 16*31x31
        # second convolutional layer, i.e. the output of the first layer will be the input of this layer
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=3)
        # output has dimensions 16 * 29 * 29
        # with max pool it's 16 * 14 * 14

        # now that we've done some convolution, we want to flatten the output
        # and use it as input for linear layers

        # after we run our image through the first 2 convolutional layers
        self.fc1 = nn.Linear(in_features=16 * 14 * 14 , out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=84)

        # output layer
        self.fc3 = nn.Linear(in_features=84, out_features=11) # 11 because we have 11 labels

    # Takes in tensor t and transform it using layer, new tensor is then returned
    def forward(self, x):
        # Picking our activation function for our cnn :
        # We decided to use ReLu instead of a sigmoid function.
        # Training a neural network is computationally expensive, ReLu is less expensive to compute
        # because it does not need to compute exponential operations like Sigmoid.
        # From stackoverflow:
        # The biggest advantage of ReLu is indeed non-saturation of its gradient,
        # which greatly accelerates the convergence of stochastic gradient descent compared to the sigmoid / tanh functions
        # http://www.cs.toronto.edu/~fritz/absps/imagenet.pdf

        # max pooling over a (2,2) window (window of 4 elements)
        x = F.max_pool2d(F.relu(self.conv1(x)), 2)
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)

        # before we can pass it into the fully connected layer, i.e. fc1,f2,fc3
        # we need to flatten it down to a single tensor
        x = x.view(-1, self.num_flat_features(x)) # output shape is (1, 16 * 14 * 14)

        # We run it through our linear layers now
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)

        return x

    def num_flat_features(self, x):
        """
        Helper function
        :param x:
        :return: the number of flat features that we have
        """
        size = x.size()[1:] # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features


In [37]:
network = Network()
print(network)

Network(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=3136, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=11, bias=True)
)


### Loss function and Optimizer


In [38]:
# cross entropy is pretty good for classification problems
# discuss pros of cross entropy in the report
criterion = nn.CrossEntropyLoss()
# might need to play a bit with learning rate and momentum if needed
# SGD is computationally efficient
# GD is terrible, uses too much memory
optimizer = optim.SGD(network.parameters(), lr=0.001, momentum=0.9)

### Training CNN


In [40]:
# loop over dataset multiple times
for epoch in range(2):
    running_loss = 0 # counter
    # get inputs, data is a list of [inputs, labels]
    for i, data in enumerate(trainloader, 0):
        print(data)
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = Network(inputs)
        loss = criterion(outputs, labels)
        loss.backward() # computing gradients wrt model's weights
        optimizer.step() # update using learning rate

        # print statistics
        running_loss += loss.item()
        # print every 2000 minibatches
        if i % 2000 == 1999:
            print('[%d, %5d loss: %.3f' %
                  (epoch + 1, i + 1, running_loss/2000))
            running_loss = 0

print('Done')

tensor([[[[0],
          [0],
          [0],
          ...,
          [0],
          [0],
          [0]],

         [[0],
          [0],
          [0],
          ...,
          [0],
          [0],
          [0]],

         [[0],
          [0],
          [0],
          ...,
          [0],
          [0],
          [0]],

         ...,

         [[0],
          [0],
          [0],
          ...,
          [0],
          [0],
          [0]],

         [[0],
          [0],
          [0],
          ...,
          [0],
          [0],
          [0]],

         [[0],
          [0],
          [0],
          ...,
          [0],
          [0],
          [0]]],


        [[[0],
          [0],
          [0],
          ...,
          [0],
          [0],
          [0]],

         [[0],
          [0],
          [0],
          ...,
          [0],
          [0],
          [0]],

         [[0],
          [0],
          [0],
          ...,
          [0],
          [0],
          [0]],

         ...,

     

ValueError: too many values to unpack (expected 2)