In [134]:
from collections import OrderedDict
from collections import namedtuple
from itertools import product

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim # For optimizer to update weights

import torchvision
from torchvision import transforms, datasets
from torch.utils.tensorboard import SummaryWriter

import numpy as np
import matplotlib.pyplot as plt
import PIL

torch.set_printoptions(linewidth = 120)
torch.set_num_threads=16

import time
from IPython.display import clear_output
import pandas as pd
import json

from tqdm.notebook import tqdm

In [212]:
class Network(nn.Module):
    def __init__(self): # layers are defined in the class constructor as object attributes
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
        
        self.fc1 = nn.Linear(in_features=12*22*22, out_features=200)
        self.fc2 = nn.Linear(in_features=200, out_features=60)
        self.out = nn.Linear(in_features=60, out_features=3)
        
    def forward(self, t): # Forward transformation the network performs on tensors
        # (1) input layer
        t = t
        
        # (2) hidden conv layer
        t = self.conv1(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        # (3) hidden conv layer
        t = self.conv2(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size=2, stride=2)

        # (4) hidden linear layer
        t = t.reshape(-1, 12 * 22 * 22)
        t = self.fc1(t)
        t = F.relu(t)
        
        # (5) hidden linear layer
        t = self.fc2(t)
        t = F.relu(t)
        
        # (6) output layer
        t = self.out(t)
        #t = F.softmax(t, dim=1)
        
        return t

In [None]:
def get_num_correct(preds, labels):
    return preds.argmax(dim = 1).eq(labels).sum().item()


In [216]:
class Unit(nn.Module):
    def __init__(self,in_channels,out_channels):
        super(Unit,self).__init__()
        
        self.conv = nn.Conv2d(in_channels=in_channels,kernel_size=3,out_channels=out_channels,stride=1,padding=1)
        self.bn = nn.BatchNorm2d(num_features=out_channels)
        self.relu = nn.ReLU()

    def forward(self,input):
        output = self.conv(input)
        output = self.bn(output)
        output = self.relu(output)

        return output

class SimpleNet(nn.Module):
    def __init__(self,num_classes=3):
        super(SimpleNet,self).__init__()
        
        #Create 14 layers of the unit with max pooling in between
        self.unit1 = Unit(in_channels=1,out_channels=32)
        self.unit2 = Unit(in_channels=32, out_channels=32)
        self.unit3 = Unit(in_channels=32, out_channels=32)

        self.pool1 = nn.MaxPool2d(kernel_size=2)

        self.unit4 = Unit(in_channels=32, out_channels=64)
        self.unit5 = Unit(in_channels=64, out_channels=64)
        self.unit6 = Unit(in_channels=64, out_channels=64)
        self.unit7 = Unit(in_channels=64, out_channels=64)

        self.pool2 = nn.MaxPool2d(kernel_size=2)

        self.unit8 = Unit(in_channels=64, out_channels=128)
        self.unit9 = Unit(in_channels=128, out_channels=128)
        self.unit10 = Unit(in_channels=128, out_channels=128)
        self.unit11 = Unit(in_channels=128, out_channels=128)

        self.pool3 = nn.MaxPool2d(kernel_size=2)

        self.unit12 = Unit(in_channels=128, out_channels=128)
        self.unit13 = Unit(in_channels=128, out_channels=128)
        self.unit14 = Unit(in_channels=128, out_channels=128)

        self.avgpool = nn.AvgPool2d(kernel_size=4)
        
        #Add all the units into the Sequential layer in exact order
        self.net = nn.Sequential(self.unit1, self.unit2, self.unit3, self.pool1, self.unit4, self.unit5, self.unit6
                                 ,self.unit7, self.pool2, self.unit8, self.unit9, self.unit10, self.unit11, self.pool3,
                                 self.unit12, self.unit13, self.unit14, self.avgpool)

        self.fc = nn.Linear(in_features=128,out_features=num_classes)

    def forward(self, input):
        output = self.net(input)
        output = output.view(-1,128)
        output = self.fc(output)
        return output

In [213]:
t

## Optimizing single batch 

In [215]:
network = Network()

optimizer = optim.Adam(network.parameters(), lr=0.01)

batch = next(iter(loader))
images, labels = batch

for epoch in range(10):

    preds = network(images) # Pass Batch
    loss = F.cross_entropy(preds, labels) # Calculate Loss

    optimizer.zero_grad() # Whenever pytorch calculates gradients it always adds it to whatever it has, so we need to reset it each batch.
    loss.backward() # Calculate Gradients - the gradient is the direction we need to move towards the loss function minimum (LR will tell us how far to step)
    optimizer.step() # Update Weights - the optimizer is able to update the weights because we passed it the weights as an argument in line 4.

    print('Epoch:', epoch, 'num correct:', get_num_correct(preds, labels), 'Batch loss:', loss.item())

Epoch: 0 num correct: 414 Batch loss: 1.0951335430145264
Epoch: 1 num correct: 414 Batch loss: 3.022399663925171
Epoch: 2 num correct: 321 Batch loss: 1.2766258716583252
Epoch: 3 num correct: 265 Batch loss: 1.1170005798339844
Epoch: 4 num correct: 414 Batch loss: 1.097657561302185
Epoch: 5 num correct: 414 Batch loss: 1.0929067134857178
Epoch: 6 num correct: 414 Batch loss: 1.0926114320755005
Epoch: 7 num correct: 414 Batch loss: 1.0920337438583374
Epoch: 8 num correct: 414 Batch loss: 1.09086012840271
Epoch: 9 num correct: 414 Batch loss: 1.0894432067871094


## Running several epochs on all batches 

In [196]:
network = Network()

optimizer = optim.Adam(network.parameters(), lr=0.01)

for epoch in range(10):

    total_loss = 0
    total_correct = 0
    for i, batch in enumerate(loader): # Get Batch
        print(f"Trained on {i*1000} images. Now on batch {i+1}")
        images, labels = batch

        preds = network(images) # Pass Batch
        loss = F.cross_entropy(preds, labels) # Calculate Loss
        print('batch loss is:', loss.item())

        optimizer.zero_grad() # Whenever pytorch calculates gradients it always adds it to whatever it has, so we need to reset it each batch.
        loss.backward() # Calculate Gradients - the gradient is the direction we need to move towards the loss function minimum (LR will tell us how far to step)
        optimizer.step() # Update Weights - the optimizer is able to update the weights because we passed it the weights as an argument in line 4.

        total_loss += loss.item()
        total_correct += get_num_correct(preds, labels)

    print('Epoch:', epoch, 'Total correct:', total_correct, 'Total loss:', total_loss)

Trained on 0 images. Now on batch 1
batch loss is: 1.0988587141036987
Trained on 1000 images. Now on batch 2
batch loss is: 1.0741850137710571
Trained on 2000 images. Now on batch 3
batch loss is: 1.2512097358703613
Trained on 3000 images. Now on batch 4
batch loss is: 1.0716195106506348
Trained on 4000 images. Now on batch 5
batch loss is: 1.0847065448760986
Trained on 5000 images. Now on batch 6
batch loss is: 1.0906085968017578
Trained on 6000 images. Now on batch 7
batch loss is: 1.0900161266326904
Trained on 7000 images. Now on batch 8
batch loss is: 1.0859262943267822
Trained on 8000 images. Now on batch 9
batch loss is: 1.0851609706878662
Trained on 9000 images. Now on batch 10
batch loss is: 1.0796763896942139
Trained on 10000 images. Now on batch 11
batch loss is: 1.0855610370635986
Trained on 11000 images. Now on batch 12
batch loss is: 1.076778531074524


KeyboardInterrupt: 

In [194]:
batch = next(iter(loader))
images, labels = batch # unpacking images and labels

In [195]:
labels.unique()

tensor([0, 1, 2])

In [155]:

for epoch in tqdm(range(3)):

    m.begin_epoch()
    for i, batch in enumerate(loader): # Get Batch
        print(f"Trained on {i*1000} images. Now on batch {i+1}")
        images, labels = batch

        preds = network(images) # Pass Batch
        print(preds, labels)
        loss = F.cross_entropy(preds, labels) # Calculate Loss
        print(f"Loss is: {loss}")
        optimizer.zero_grad() # Whenever pytorch calculates gradients it always adds it to whatever it has, so we need to reset it each batch.
        loss.backward() # Calculate Gradients - the gradient is the direction we need to move towards the loss function minimum (LR will tell us how far to step)
        optimizer.step() # Update Weights - the optimizer is able to update the weights because we passed it the weights as an argument in line 4.

        m.track_loss(loss)
        m.track_num_correct(preds, labels)


    m.end_epoch()
m.end_run()
# m.save('results')

HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))

Trained on 0 images. Now on batch 1
tensor([[-0.0212, -0.0864,  0.0767],
        [-0.0208, -0.0864,  0.0761],
        [-0.0205, -0.0895,  0.0757],
        ...,
        [-0.0215, -0.0913,  0.0840],
        [-0.0214, -0.0918,  0.0835],
        [-0.0216, -0.0913,  0.0835]], grad_fn=<AddmmBackward>) tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

Trained on 3000 images. Now on batch 4
tensor([[ 18.5918,  -8.4982,  -8.6772],
        [ 18.6221,  -8.5121,  -8.6909],
        [ 18.7720,  -8.5792,  -8.7613],
        ...,
        [ 23.2400, -10.5737, -10.8648],
        [ 23.2265, -10.5656, -10.8592],
        [ 23.1874, -10.5496, -10.8413]], grad_fn=<AddmmBackward>) tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

Trained on 6000 images. Now on batch 7
tensor([[137.0536, -70.5448, -69.5150],
        [137.8104, -70.9336, -69.8986],
        [130.8834, -67.3670, -66.3806],
        ...,
        [110.6932, -57.0448, -56.1430],
        [109.2981, -56.3296, -55.4356],
        [105.6522, -54.4501, -53.5807]], grad_fn=<AddmmBackward>) tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

Trained on 9000 images. Now on batch 10
tensor([[ 286.1292, -157.9450, -154.4220],
        [ 275.7314, -152.2051, -148.8143],
        [ 274.1805, -151.3493, -147.9765],
        ...,
        [ 303.7617, -167.7678, -163.9117],
        [ 300.4006, -165.9187, -162.0990],
        [ 299.0574, -165.1836, -161.3765]], grad_fn=<AddmmBackward>) tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 

Trained on 12000 images. Now on batch 13
tensor([[ 487.1906, -280.5033, -273.6254],
        [ 484.4077, -278.9038, -272.0615],
        [ 443.3365, -255.2384, -249.0113],
        ...,
        [ 543.4390, -312.9735, -305.2512],
        [ 526.5491, -303.2473, -295.7651],
        [ 515.7699, -297.0383, -289.7124]], grad_fn=<AddmmBackward>) tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

KeyboardInterrupt: 

In [None]:
class StratifiedSampler(Sampler):
    """Stratified Sampling
    Provides equal representation of target classes in each batch
    """
    def __init__(self, class_vector, batch_size):
        """
        Arguments
        ---------
        class_vector : torch tensor
            a vector of class labels
        batch_size : integer
            batch_size
        """
        self.n_splits = int(class_vector.size(0) / batch_size)
        self.class_vector = class_vector

    def gen_sample_array(self):
        try:
            from sklearn.model_selection import StratifiedShuffleSplit
        except:
            print('Need scikit-learn for this functionality')
        import numpy as np
        
        s = StratifiedShuffleSplit(n_splits=self.n_splits, test_size=0.5)
        X = th.randn(self.class_vector.size(0),2).numpy()
        y = self.class_vector.numpy()
        s.get_n_splits(X, y)

        train_index, test_index = next(s.split(X, y))
        return np.hstack([train_index, test_index])
