In [1]:
import torch
import torchvision
import matplotlib.pyplot as plt
import numpy as np
from torch.utils.tensorboard import SummaryWriter
from datetime import datetime

import torch.nn as nn
import torch.nn.functional as F

from utils import *

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(device, device.type)

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=torchvision.transforms.ToTensor())
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=torchvision.transforms.ToTensor())

cuda cuda
Files already downloaded and verified
Files already downloaded and verified


In [2]:
class BatchNormConv2d(nn.Module):
    def __init__(self, in_ch, out_ch, kernel_size=3, stride=1, padding=1):
        super(BatchNormConv2d, self).__init__()
        self.convs = []
        self.bns = []
        self.gelu = nn.GELU()
        self.conv = nn.Conv2d(in_channels=in_ch, out_channels=out_ch, kernel_size=kernel_size, stride=stride, padding=padding)
        self.bn = nn.BatchNorm2d(out_ch)
        
    def forward(self, x):
        return self.gelu(self.bn(self.conv(x)))

In [3]:
class BottleneckBlock(nn.Module):
    def __init__(self, in_ch, mid_ch, out_ch, kernel_sizes=(1, 3, 1), stride=1):
        super(BottleneckBlock, self).__init__()
        self.reduce = BatchNormConv2d(in_ch, mid_ch, kernel_size=kernel_sizes[0], stride=stride, padding=kernel_sizes[0] // 2)
        self.maintain = BatchNormConv2d(mid_ch, mid_ch, kernel_size=kernel_sizes[1], stride=1, padding=kernel_sizes[1] // 2)
        self.expand = BatchNormConv2d(mid_ch, out_ch, kernel_size=kernel_sizes[2], stride=1, padding=kernel_sizes[2] // 2)
        
        self.residual = None
        if in_ch != out_ch:
            self.residual = nn.Conv2d(in_ch, out_ch, kernel_size=1, stride=stride)
    def forward(self, x):
        res = self.residual(x) if self.residual else x
        x = self.reduce(x)
        x = self.maintain(x)
        x = self.expand(x)
        return F.gelu(x + res)

In [10]:
class BottleneckModel(nn.Module):
    def __init__(self, in_ch=3, num_classes=10, bottleneck_configs=None):
        super(BottleneckModel, self).__init__()
        if bottleneck_configs is None:
            bottleneck_configs = [
                (3, 16, 32, (1, 3, 1)),
                (32, 32, 32, (1, 3, 1)),
                (32, 64, 64, (1, 5, 1))
            ]

        #we have to use ModuleList here since a normal list wouldn't be able to be moved onto the GPU (or through 'dirty" explicit move of the list to the GPU)
        self.bottlenecks = nn.ModuleList([BottleneckBlock(*config) for config in bottleneck_configs])        
        self.pool = nn.MaxPool2d((2, 2))
        self.mlp = nn.Linear(bottleneck_configs[-1][2], num_classes)

    def forward(self, x):
        for block in self.bottlenecks:
            x = block(x)
            x = self.pool(x)
        
        x = torch.flatten(x, 1)
        x = self.mlp(x)
        
        return F.softmax(x, dim=1)

In [11]:
class BatchNormResidualConvolutionalModel(nn.Module):
    def __init__(self, stride=1, num_classes=10):
        super().__init__()
        
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv1 = BatchNormConv2d(3, 6, stride=stride)
        self.conv2 = BatchNormConv2d(6, 16, stride=stride)
        self.conv3 = BatchNormConv2d(16, 32, stride=stride)
        
        self.mlp = nn.Linear(in_features=(32 * 4 * 4), out_features=num_classes)

        self.proj1 = nn.Conv2d(in_channels=3, out_channels=6, kernel_size=1, stride=stride)
        self.proj2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=1, stride=stride)
        self.proj3 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=1, stride=stride)

    def forward(self, x):
        res1 = self.proj1(x)
        x = self.conv1(x)
        x = x + res1
        x = self.pool(x)
        
        res2 = self.proj2(x)
        x = self.conv2(x)
        x = x + res2
        x = self.pool(x)
        
        res3 = self.proj3(x)
        x = self.conv3(x)
        x = x + res3
        x = self.pool(x)
        
        x = torch.flatten(x, 1)
        x = self.mlp(x)
        
        return F.softmax(x, dim=1)

In [12]:
conv_model = BottleneckModel()
if device.type == "cuda":
    conv_model = conv_model.to('cuda')
learning_rate = 0.001
BATCH_SIZE = 128
epochs = 20

train_loader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, pin_memory=True)
test_loader = torch.utils.data.DataLoader(testset, batch_size=BATCH_SIZE, pin_memory=True)

opt = torch.optim.Adam(conv_model.parameters(), lr=learning_rate)
print(conv_model, opt, device)

timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
writer = SummaryWriter('runs/fashion_trainer_{}'.format(timestamp))

for epoch in range(epochs):
    print(f"Epoch {epoch+1}")
    avg_loss = fit_one_cycle(conv_model, train_loader, opt, epoch, writer, device, flatten=False)
    acc = prediction_accuracy(conv_model, test_loader, device, flatten=False)
    print(f"Accuracy: {acc * 100}% ({acc})\nLoss: {avg_loss}")

model is moded to gpu
BottleneckModel(
  (bottlenecks): ModuleList(
    (0): BottleneckBlock(
      (reduce): BatchNormConv2d(
        (gelu): GELU(approximate='none')
        (conv): Conv2d(3, 16, kernel_size=(1, 1), stride=(1, 1))
        (bn): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (maintain): BatchNormConv2d(
        (gelu): GELU(approximate='none')
        (conv): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (bn): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (expand): BatchNormConv2d(
        (gelu): GELU(approximate='none')
        (conv): Conv2d(16, 32, kernel_size=(1, 1), stride=(1, 1))
        (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (residual): Conv2d(3, 32, kernel_size=(1, 1), stride=(1, 1))
    )
    (1): BottleneckBlock(
      (reduce): BatchNormConv2d(
        (gelu): GELU(approximat

Training:   0%|                                                                                                                       | 0/391 [00:00<?, ?it/s]

moved batch to  cuda


Training:   0%|                                                                                                                       | 0/391 [00:00<?, ?it/s]


RuntimeError: mat1 and mat2 shapes cannot be multiplied (128x1024 and 64x10)