# Convolutional Networks Architectures

In [1]:
import torch
from torch import nn
import torchvision as tv
from torchsummary import summary

from func import train

In [21]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

## Dataset

In [3]:
BATCH_SIZE = 256
transforms = tv.transforms.Compose([
    tv.transforms.Resize((224, 224)),
    tv.transforms.ToTensor()
])
train_dataset = tv.datasets.MNIST('./datas', train=True, transform=transforms, download=True)
test_dataset = tv.datasets.MNIST('./datas', train=False, transform=transforms, download=True)
train_iter = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE)
test_iter = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE)

## AlexNet

In [4]:
net = nn.Sequential(
    nn.Conv2d(1, 96, kernel_size=11, stride=4),
    nn.ReLU(),
    nn.MaxPool2d(3, stride=2),
    nn.Conv2d(96, 256, kernel_size=5, padding=2),
    nn.ReLU(),
    nn.MaxPool2d(3, stride=2),
    nn.Conv2d(256, 384, kernel_size=3, padding=1),
    nn.ReLU(),
    nn.Conv2d(384, 384, kernel_size=3, padding=1),
    nn.ReLU(),
    nn.Conv2d(384, 256, kernel_size=3, padding=1),
    nn.ReLU(),
    nn.MaxPool2d(3, stride=2),
    nn.Flatten(),
    nn.Linear(6400, 4096),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(4096, 4096),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(4096, 10)
)

In [5]:
summary(net.to(device), input_size=(1, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 96, 54, 54]          11,712
              ReLU-2           [-1, 96, 54, 54]               0
         MaxPool2d-3           [-1, 96, 26, 26]               0
            Conv2d-4          [-1, 256, 26, 26]         614,656
              ReLU-5          [-1, 256, 26, 26]               0
         MaxPool2d-6          [-1, 256, 12, 12]               0
            Conv2d-7          [-1, 384, 12, 12]         885,120
              ReLU-8          [-1, 384, 12, 12]               0
            Conv2d-9          [-1, 384, 12, 12]       1,327,488
             ReLU-10          [-1, 384, 12, 12]               0
           Conv2d-11          [-1, 256, 12, 12]         884,992
             ReLU-12          [-1, 256, 12, 12]               0
        MaxPool2d-13            [-1, 256, 5, 5]               0
          Flatten-14                 [-

In [6]:
lr, num_epochs = 0.001, 1
optimizer = torch.optim.Adam(net.parameters(), lr=lr)
train(net, train_iter, test_iter, optimizer, num_epochs, device)

Step 0, time since epoch: 0.808. Train acc: 0.121. Train Loss: 2.302
Step 10, time since epoch: 2.719. Train acc: 0.121. Train Loss: 2.317
Step 20, time since epoch: 4.730. Train acc: 0.263. Train Loss: 2.086
Step 30, time since epoch: 6.649. Train acc: 0.376. Train Loss: 1.782
Step 40, time since epoch: 8.508. Train acc: 0.474. Train Loss: 1.518
Step 50, time since epoch: 10.336. Train acc: 0.536. Train Loss: 1.347
Step 60, time since epoch: 12.210. Train acc: 0.584. Train Loss: 1.212
Step 70, time since epoch: 14.057. Train acc: 0.624. Train Loss: 1.101
Step 80, time since epoch: 15.867. Train acc: 0.658. Train Loss: 1.001
Step 90, time since epoch: 17.676. Train acc: 0.686. Train Loss: 0.922
Step 100, time since epoch: 19.541. Train acc: 0.709. Train Loss: 0.855
Step 110, time since epoch: 21.328. Train acc: 0.730. Train Loss: 0.798
Step 120, time since epoch: 23.127. Train acc: 0.747. Train Loss: 0.749
Step 130, time since epoch: 24.975. Train acc: 0.762. Train Loss: 0.705
Step 140

## VGG

In [23]:
def vgg_block(num_convs, input_channels, num_channels):
    blk = nn.Sequential(nn.Conv2d(input_channels, num_channels, kernel_size=3, padding=1), nn.ReLU())
    for i in range(num_convs - 1):
        blk.add_module("conv{}".format(i), nn.Conv2d(num_channels, num_channels, kernel_size=3, padding=1))
        blk.add_module("relu{}".format(i), nn.ReLU())
    blk.add_module("pool", nn.MaxPool2d(2, stride=2))
    return blk    


In [24]:
def vgg(conv_arch):
    net = nn.Sequential()

    for i, (num_convs, input_ch, num_channels) in enumerate(conv_arch):
        net.add_module("block{}".format(i), vgg_block(num_convs, input_ch, num_channels))

    classifier = nn.Sequential(
        nn.Flatten(),
        nn.Linear(6272, 4096), nn.ReLU(), nn.Dropout(0.5),
        nn.Linear(4096, 4096), nn.ReLU(), nn.Dropout(0.5),
        nn.Linear(4096, 10) 
    )

    net.add_module('classifier', classifier)
    return net    

In [25]:
conv_arch = ((1, 1, 64), (1, 64, 128), (2, 128, 256), (2, 256, 512), (2, 512, 512))

In [26]:
ratio = 4
small_conv_arch = [(v[0], max(v[1] // ratio, 1), v[2] // ratio) for v in conv_arch]
net = vgg(small_conv_arch)

In [27]:
summary(net.to(device), input_size=(1, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 16, 224, 224]             160
              ReLU-2         [-1, 16, 224, 224]               0
         MaxPool2d-3         [-1, 16, 112, 112]               0
            Conv2d-4         [-1, 32, 112, 112]           4,640
              ReLU-5         [-1, 32, 112, 112]               0
         MaxPool2d-6           [-1, 32, 56, 56]               0
            Conv2d-7           [-1, 64, 56, 56]          18,496
              ReLU-8           [-1, 64, 56, 56]               0
            Conv2d-9           [-1, 64, 56, 56]          36,928
             ReLU-10           [-1, 64, 56, 56]               0
        MaxPool2d-11           [-1, 64, 28, 28]               0
           Conv2d-12          [-1, 128, 28, 28]          73,856
             ReLU-13          [-1, 128, 28, 28]               0
           Conv2d-14          [-1, 128,

In [28]:
lr, num_epochs = 0.001, 1
optimizer = torch.optim.Adam(net.parameters(), lr=lr)
train(net, train_iter, test_iter, optimizer, num_epochs, device)

Step 0, time since epoch: 0.201. Train acc: 0.094. Train Loss: 2.302
Step 10, time since epoch: 2.206. Train acc: 0.103. Train Loss: 2.303
Step 20, time since epoch: 4.247. Train acc: 0.105. Train Loss: 2.302
Step 30, time since epoch: 6.277. Train acc: 0.108. Train Loss: 2.302
Step 40, time since epoch: 8.275. Train acc: 0.124. Train Loss: 2.292
Step 50, time since epoch: 10.289. Train acc: 0.185. Train Loss: 2.158
Step 60, time since epoch: 12.294. Train acc: 0.277. Train Loss: 1.931
Step 70, time since epoch: 14.298. Train acc: 0.358. Train Loss: 1.726
Step 80, time since epoch: 16.347. Train acc: 0.427. Train Loss: 1.544
Step 90, time since epoch: 18.398. Train acc: 0.483. Train Loss: 1.399
Step 100, time since epoch: 20.391. Train acc: 0.529. Train Loss: 1.279
Step 110, time since epoch: 22.377. Train acc: 0.566. Train Loss: 1.181
Step 120, time since epoch: 24.364. Train acc: 0.599. Train Loss: 1.095
Step 130, time since epoch: 26.345. Train acc: 0.626. Train Loss: 1.021
Step 140