In [1]:
from helpers import get_cpu, get_gpu, get_dataloader, train, test, make_Net

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import matplotlib.pyplot as plt

In [2]:
gpu = get_gpu()
trainloader = get_dataloader(256)
testloader = get_dataloader(256, train=False)

# Test 1 - Different Conv layer count (1 to 6)

In [3]:
# Test 1 - Different neural network depths
# c1 = [32, 5]
c1 = {"kernel_size": 5, "out_channels": 32, "padding": 3}
# c2 = [64, 3]
c2 = {"kernel_size": 3, "out_channels": 64, "padding": 2}

nets1 = []
accs1 = []
for i in range(6):
    print(f"Now training for {i} conv. layers...")
    cnn = [c1]
    for _ in range(i):
        cnn.append(c2)
    Net = make_Net(cnn, [1200, 84*5], pool=nn.MaxPool2d(2, 2))
    nets1.append(train(Net, trainloader, optim.SGD, device=gpu, lr=0.007, momentum=0.9))
    accs1.append(test(nets1[-1], testloader, device=gpu))
    print(f"Acc: {accs1[-1]: .0f}")

Now training for 0 conv. layers...


  return F.softmax(self.last_layer(x))


Acc:  58
Now training for 1 conv. layers...
Acc:  62
Now training for 2 conv. layers...
Acc:  60
Now training for 3 conv. layers...
Acc:  59
Now training for 4 conv. layers...
Acc:  60
Now training for 5 conv. layers...
Acc:  60


# Test 2 - Different Lin layer count (1 to 6)

In [10]:
conv = [{"kernel_size": 5, "out_channels": 32, "padding": 3},
        {"kernel_size": 3, "out_channels": 64, "padding": 2}]

nets2 = []
accs2 = []
for i in range(6):
    print(f"Now training for {i+1} lin. layers")
    lins = []
    for _ in range(i):
        lins.append(1200)
    Net = make_Net(conv, lins, pool=nn.MaxPool2d(2, 2))
    nets2.append(train(Net, trainloader, optim.SGD, device=gpu, lr=0.007, momentum=0.9))
    accs2.append(test(nets2[-1], testloader, device=gpu))
    print(f"Acc: {accs2[-1]: .0f}")

Now training for 0 lin. layers
Acc:  58
Now training for 1 lin. layers
Acc:  60
Now training for 2 lin. layers
Acc:  63
Now training for 3 lin. layers
Acc:  63
Now training for 4 lin. layers
Acc:  63
Now training for 5 lin. layers
Acc:  62


# Test 3 - Different permutations of 2 hidden layer sizes

In [26]:
from itertools import permutations, product

conv = [{"kernel_size": 5, "out_channels": 32, "padding": 3},
        {"kernel_size": 3, "out_channels": 64, "padding": 2}]

sizes = [10, 200, 400, 1200, 2400, 6000]

nets3 = []
accs3 = [] 

for lins in list(product(sizes, repeat=2)):
    print(f"Now training for linear layers of size: {lins[0]} & {lins[1]}")
    Net = make_Net(conv, lins, pool=nn.MaxPool2d(2, 2))
    nets3.append(train(Net, trainloader, optim.SGD, device=gpu, lr=0.007, momentum=0.9))
    accs3.append(test(nets3[-1], testloader, device=gpu))
    print(f"Acc: {accs3[-1]: .0f}") 

Now training for linear layers of size: 10 & 10
Acc:  38
Now training for linear layers of size: 10 & 200
Acc:  54
Now training for linear layers of size: 10 & 400
Acc:  57
Now training for linear layers of size: 10 & 1200
Acc:  57
Now training for linear layers of size: 10 & 2400
Acc:  54
Now training for linear layers of size: 10 & 6000
Acc:  58
Now training for linear layers of size: 200 & 10
Acc:  41
Now training for linear layers of size: 200 & 200
Acc:  61
Now training for linear layers of size: 200 & 400
Acc:  60
Now training for linear layers of size: 200 & 1200
Acc:  63
Now training for linear layers of size: 200 & 2400
Acc:  62
Now training for linear layers of size: 200 & 6000
Acc:  64
Now training for linear layers of size: 400 & 10
Acc:  38
Now training for linear layers of size: 400 & 200
Acc:  60
Now training for linear layers of size: 400 & 400
Acc:  60
Now training for linear layers of size: 400 & 1200
Acc:  62
Now training for linear layers of size: 400 & 2400
Acc:  6

# Test 4 - Dropping Softmax on Output

In [3]:
from itertools import permutations, product

conv = [{"kernel_size": 5, "out_channels": 32, "padding": 3},
        {"kernel_size": 3, "out_channels": 64, "padding": 2}]

sizes = [10, 200, 400, 1200, 2400, 6000]

nets4 = []
accs4 = [] 

for lins in list(product(sizes, repeat=2)):
    print(f"Now training for linear layers of size: {lins[0]} & {lins[1]}")
    Net = make_Net(conv, lins, pool=nn.MaxPool2d(2, 2), drop_softmax=True)
    nets4.append(train(Net, trainloader, optim.SGD, device=gpu, lr=0.007, momentum=0.9))
    accs4.append(test(nets4[-1], testloader, device=gpu))
    print(f"Acc: {accs4[-1]: .0f}") 

Now training for linear layers of size: 10 & 10
Acc:  60
Now training for linear layers of size: 10 & 200
Acc:  63
Now training for linear layers of size: 10 & 400
Acc:  65
Now training for linear layers of size: 10 & 1200
Acc:  65
Now training for linear layers of size: 10 & 2400
Acc:  65
Now training for linear layers of size: 10 & 6000
Acc:  63
Now training for linear layers of size: 200 & 10
Acc:  65
Now training for linear layers of size: 200 & 200
Acc:  69
Now training for linear layers of size: 200 & 400
Acc:  68
Now training for linear layers of size: 200 & 1200
Acc:  70
Now training for linear layers of size: 200 & 2400
Acc:  69
Now training for linear layers of size: 200 & 6000
Acc:  70
Now training for linear layers of size: 400 & 10
Acc:  64
Now training for linear layers of size: 400 & 200
Acc:  69
Now training for linear layers of size: 400 & 400
Acc:  69
Now training for linear layers of size: 400 & 1200
Acc:  68
Now training for linear layers of size: 400 & 2400
Acc:  6

# Note on Softmax
This is trained using F.CrossEntropyLoss(), which applies softmax internally - probably explaining why dropping the model's softmax improves perfromance.

# Test 5 - Lin Layer Combos pt. 2

In [8]:
from itertools import permutations, product

conv = [{"kernel_size": 5, "out_channels": 32, "padding": 3},
        {"kernel_size": 3, "out_channels": 64, "padding": 2}]

sizes = [200, 400, 1200, 2400]

nets5 = []
accs5 = [] 

for lins in list(product(sizes, repeat=3)):
    print(f"Now training for linear layers of size: {lins}")
    Net = make_Net(conv, lins, pool=nn.MaxPool2d(2, 2), drop_softmax=True)
    nets5.append(train(Net, trainloader, optim.SGD, device=gpu, lr=0.007, momentum=0.9))
    accs5.append(test(nets5[-1], testloader, device=gpu))
    print(f"Acc: {accs5[-1]: .0f}") 

Now training for linear layers of size: (200, 200, 200)
Acc:  69
Now training for linear layers of size: (200, 200, 400)
Acc:  68
Now training for linear layers of size: (200, 200, 1200)
Acc:  68
Now training for linear layers of size: (200, 200, 2400)
Acc:  70
Now training for linear layers of size: (200, 400, 200)
Acc:  68
Now training for linear layers of size: (200, 400, 400)
Acc:  68
Now training for linear layers of size: (200, 400, 1200)
Acc:  69
Now training for linear layers of size: (200, 400, 2400)
Acc:  68
Now training for linear layers of size: (200, 1200, 200)
Acc:  68
Now training for linear layers of size: (200, 1200, 400)
Acc:  69
Now training for linear layers of size: (200, 1200, 1200)
Acc:  69
Now training for linear layers of size: (200, 1200, 2400)
Acc:  69
Now training for linear layers of size: (200, 2400, 200)
Acc:  69
Now training for linear layers of size: (200, 2400, 400)
Acc:  68
Now training for linear layers of size: (200, 2400, 1200)
Acc:  70
Now trainin

# Trying ResNet

In [22]:
# https://www.digitalocean.com/community/tutorials/writing-resnet-from-scratch-in-pytorch
class ResBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super().__init__()
        self.conv1 = nn.Sequential(nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1),
                                   nn.BatchNorm2d(out_channels),
                                   nn.ReLU())
        self.conv2 = nn.Sequential(nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1),
                                   nn.BatchNorm2d(out_channels))
        self.downsample = downsample
        self.relu = nn.ReLU()
        self.out_channels = out_channels

    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out = self.conv2(out)
        if self.downsample:
            residual = self.downsample(x)
        out += residual
        out = self.relu(out)
        return out

class ResNet(nn.Module):
    def __init__(self, num_classes = 10):
        # layers = [3, 4, 6, 3]
        layers = [2, 2, 2, 2]
        super(ResNet, self).__init__()
        self.inplanes = 64
        self.conv1 = nn.Sequential(
                        nn.Conv2d(3, 64, kernel_size = 7, stride = 2, padding = 3),
                        nn.BatchNorm2d(64),
                        nn.ReLU())
        self.maxpool = nn.MaxPool2d(kernel_size = 3, stride = 2, padding = 1)
        self.layer0 = self._make_layer(ResBlock, 64, layers[0], stride = 1)
        self.layer1 = self._make_layer(ResBlock, 128, layers[1], stride = 2)
        self.layer2 = self._make_layer(ResBlock, 256, layers[2], stride = 2)
        self.layer3 = self._make_layer(ResBlock, 512, layers[3], stride = 2)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512, num_classes)

    def _make_layer(self, ResBlock, planes, blocks, stride=1):
        print(f"Making layer with {planes} planes")
        downsample = None
        if stride != 1 or self.inplanes != planes:

            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes, kernel_size=1, stride=stride),
                nn.BatchNorm2d(planes),
            )
        layers = []
        layers.append(ResBlock(self.inplanes, planes, stride, downsample))
        self.inplanes = planes
        for i in range(1, blocks):
            layers.append(ResBlock(self.inplanes, planes))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.maxpool(x)
        x = self.layer0(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)

        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x

In [23]:
res_net = train(ResNet, trainloader, optim.SGD, device=gpu, lr=0.007, momentum=0.9)
res_net_acc = test(nets5[-1], testloader, device=gpu)
print(f"Acc: {res_net_acc: .0f}")

Making layer with 64 planes
Making layer with 128 planes
Making layer with 256 planes
Making layer with 512 planes
Acc:  68


# Misc

In [10]:
from itertools import permutations, product

conv = [{"kernel_size": 5, "out_channels": 32, "padding": 3},
        {"kernel_size": 3, "out_channels": 64, "padding": 2}]

sizes = (2400, 400, 400)

net = make_Net(conv, sizes, pool=nn.MaxPool2d(2, 2), drop_softmax=True)()

print(net.convs[0].weight.data)

tensor([[[[ 3.1600e-02,  4.5054e-02,  1.1591e-02,  1.0247e-01, -1.1023e-01],
          [-1.5258e-02,  6.4661e-02,  1.1234e-02, -1.6596e-02, -3.8726e-03],
          [ 5.4763e-02, -1.7057e-02, -7.3655e-02,  1.0306e-01,  1.6145e-02],
          [ 1.4092e-02, -3.1699e-02, -3.2882e-02,  3.3918e-02, -5.5739e-02],
          [-6.8762e-02, -1.3512e-02,  2.4894e-02, -5.8581e-03,  2.1964e-02]],

         [[-7.0406e-02,  6.5489e-02,  3.6116e-02, -1.1181e-01,  1.4533e-02],
          [ 7.7778e-02, -1.0411e-01, -6.4189e-02, -1.0278e-01,  3.1882e-03],
          [ 5.9914e-02,  4.4581e-02,  5.8595e-02, -8.0919e-02, -5.1720e-02],
          [-5.6214e-02,  3.8414e-02,  9.7419e-02, -7.2217e-02, -1.0158e-01],
          [-5.2495e-02,  2.3259e-02, -5.7799e-02,  2.6712e-03, -5.7352e-02]],

         [[ 8.9496e-02,  5.7826e-02,  4.6927e-03,  4.1311e-02,  1.4761e-02],
          [-1.0366e-01, -5.2102e-02, -7.2886e-02, -6.4115e-02,  7.8429e-02],
          [-3.0173e-02, -9.6887e-03, -2.4090e-02, -1.1329e-01, -2.0763e-

In [None]:
Net2 = make_Net([(16, 5), (16, 5)], [1200, 84*5], pool=nn.AvgPool2d(2, 2))

In [None]:
net2, losses = train(Net2, trainloader, optim.SGD, device=gpu, lr=0.007, momentum=0.9, get_losses=True)

In [None]:
plt.ylim(0, max(losses)*1.02)
plt.plot(losses)