In [2]:
import torch
import torchvision
import matplotlib.pyplot as plt
import numpy as np
from torch.utils.tensorboard import SummaryWriter
from datetime import datetime

import torch.nn as nn
import torch.nn.functional as F

from utils import *

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(device, device.type)

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=torchvision.transforms.ToTensor())
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=torchvision.transforms.ToTensor())

cuda cuda
Files already downloaded and verified
Files already downloaded and verified


In [3]:
%%bash
nvidia-smi

Sun Dec  8 15:06:09 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.120                Driver Version: 550.120        CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA RTX A1000 6GB Lap...    Off |   00000000:01:00.0 Off |                  N/A |
| N/A   46C    P8              4W /   45W |    1360MiB /   6144MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [4]:
class BatchNormConv2d(nn.Module):
    def __init__(self, in_ch, out_ch, kernel_size=3, stride=1, padding=1, groups=1):
        super(BatchNormConv2d, self).__init__()
        self.gelu = nn.GELU()
        self.conv = nn.Conv2d(in_channels=in_ch, out_channels=out_ch, kernel_size=kernel_size, stride=stride, padding=padding)
        self.bn = nn.BatchNorm2d(out_ch)
        
    def forward(self, x):
        return self.gelu(self.bn(self.conv(x)))

In [5]:
class InvertedBottleneckBlock(nn.Module):
    def __init__(self, in_ch, mid_ch, out_ch, ks=3, stride=1):
        super(InvertedBottleneckBlock, self).__init__()

        self.expand = BatchNormConv2d(in_ch, mid_ch, kernel_size=1, stride=stride, padding=0)
        if ks < 3:
            raise ValueError("The kernel for the depthwise convolution needs to be at least 3x3")
        #the groups should be the input channel here 'mid_ch' when using bigger sized kernels
        self.padding = 0 if ks % 2 == 0 else (ks // 2) #handle asymmetric padding since the division works for uneven number here
        self.maintain = BatchNormConv2d(mid_ch, mid_ch, kernel_size=ks, groups=mid_ch, padding=self.padding)
        self.reduce = BatchNormConv2d(mid_ch, out_ch, kernel_size=1, stride=stride, padding=0)
        
        self.residual = None
        #added dimension projection layer for differeing in and out sizes
        if in_ch != out_ch:
            self.residual = nn.Conv2d(in_ch, out_ch, kernel_size=1, stride=stride)
    def forward(self, x):
        res = self.residual(x) if self.residual else x
        #print("start", x.shape)
        x = self.expand(x)
        #print("after expand", x.shape)
        x = self.maintain(x)
        #print("after maintain", x.shape)
        x = self.reduce(x)
        #print("after reduce", x.shape)
        if x.shape != res.shape:
            raise RuntimeError(f"Shape mismatch: x={x.shape}, res={res.shape}")
        return F.gelu(x + res)

In [6]:
class InvertedBottleneckModel(nn.Module):
    def __init__(self, in_ch=3, num_classes=10, bottleneck_configs=None):
        super(InvertedBottleneckModel, self).__init__()
        if bottleneck_configs is None:
            bottleneck_configs = [
                (3, 16, 32, 3),
                (32, 64, 128, 5),
                (128, 256, 512, 3),
            ]

        self.blocks = nn.ModuleList([InvertedBottleneckBlock(*config) for config in bottleneck_configs])
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.mlp = nn.Linear(bottleneck_configs[-1][2]*4*4, num_classes)

    def forward(self, x):
        for block in self.blocks:
            x = block(x)
            x = self.pool(x)
        
        x = torch.flatten(x, 1)
        x = self.mlp(x)
        
        return F.softmax(x, dim=1)

In [9]:
conv_model = InvertedBottleneckModel()
if device.type == "cuda":
    conv_model = conv_model.to('cuda')
learning_rate = 0.0001
BATCH_SIZE = 128
epochs = 20

train_loader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, pin_memory=True)
test_loader = torch.utils.data.DataLoader(testset, batch_size=BATCH_SIZE, pin_memory=True)

#move dataset loader to gpu for each batch all at once to avoid moving memory during training
#train_loader = move_dataloader_to_device(train_loader, device)
#test_loader = move_dataloader_to_device(test_loader, device)

opt = torch.optim.Adam(conv_model.parameters(), lr=learning_rate)
print(conv_model, opt, device)

InvertedBottleneckModel(
  (blocks): ModuleList(
    (0): InvertedBottleneckBlock(
      (expand): BatchNormConv2d(
        (gelu): GELU(approximate='none')
        (conv): Conv2d(3, 16, kernel_size=(1, 1), stride=(1, 1))
        (bn): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (maintain): BatchNormConv2d(
        (gelu): GELU(approximate='none')
        (conv): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (bn): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (reduce): BatchNormConv2d(
        (gelu): GELU(approximate='none')
        (conv): Conv2d(16, 32, kernel_size=(1, 1), stride=(1, 1))
        (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (residual): Conv2d(3, 32, kernel_size=(1, 1), stride=(1, 1))
    )
    (1): InvertedBottleneckBlock(
      (expand): BatchNormConv2d(
        (gelu): GELU(approximate='

In [None]:
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
writer = SummaryWriter('runs/fashion_trainer_{}'.format(timestamp))
for epoch in range(epochs):
    print(f"Epoch {epoch+1}")
    avg_loss = fit_one_cycle(conv_model, train_loader, opt, epoch, writer, device, flatten=False)
    acc = prediction_accuracy(conv_model, test_loader, device, flatten=False)
    print(f"Accuracy: {acc * 100}% ({acc})\nLoss: {avg_loss}")

In [10]:
#testing multiple configs
model_configs = [
    [(3, 16, 32, 3), (32, 64, 128, 5), (128, 256, 512, 3)],  # Default config
    [(3, 16, 32, 3), (32, 32, 64, 3), (64, 128, 128, 3)],    # Smaller mid-channels
    [(3, 32, 64, 5), (64, 128, 256, 5), (256, 512, 1024, 5)], # Larger kernel sizes
]

best_accuracy = 0
best_config = None

epochs = 20

for idx, bottleneck_config in enumerate(model_configs):
    print(f"Training configuration {idx + 1}/{len(model_configs)}: {bottleneck_config}")
    
    conv_model = InvertedBottleneckModel(in_ch=3, num_classes=10, bottleneck_configs=bottleneck_config).to(device)
    if device.type == "cuda":
        conv_model = conv_model.to('cuda')
    
    opt = torch.optim.Adam(conv_model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()
    BATCH_SIZE = 128

    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    writer = SummaryWriter('runs/inverted_bottleneck_trainer_{}_{}'.format(idx, timestamp))

    acc = 0
    for epoch in range(epochs):
        print(f"Epoch {epoch+1}")
        avg_loss = fit_one_cycle(conv_model, train_loader, opt, epoch, writer, device, flatten=False)
        acc = prediction_accuracy(conv_model, test_loader, device, flatten=False)
        print(f"Accuracy: {acc * 100}% ({acc})\nLoss: {avg_loss}")

    conv_model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = conv_model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()
    
    accuracy = correct / total
    print(f"Configuration {idx + 1}: Accuracy = {accuracy:.4f}")
    
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_config = bottleneck_config

print("\nBest Configuration:", best_config)
print("Best Accuracy:", best_accuracy)


Training configuration 1/3: [(3, 16, 32, 3), (32, 64, 128, 5), (128, 256, 512, 3)]
Epoch 1



raining: 100%|███████████████████████████████| 391/391 [00:13<00:00, 28.49it/s]

Correct: 4260, Total: 10000, Accuracy: 0.43
Accuracy: 42.6% (0.426)
Loss: 2.044842086340252
Epoch 2



raining: 100%|███████████████████████████████| 391/391 [00:13<00:00, 29.43it/s]

Correct: 4190, Total: 10000, Accuracy: 0.42
Accuracy: 41.9% (0.419)
Loss: 2.011384650280601
Epoch 3



raining: 100%|███████████████████████████████| 391/391 [00:13<00:00, 29.06it/s]

Correct: 4690, Total: 10000, Accuracy: 0.47
Accuracy: 46.9% (0.469)
Loss: 1.960647934361508
Epoch 4



raining: 100%|███████████████████████████████| 391/391 [00:13<00:00, 29.13it/s]

Correct: 5262, Total: 10000, Accuracy: 0.53
Accuracy: 52.62% (0.5262)
Loss: 1.9263364766773425
Epoch 5



raining: 100%|███████████████████████████████| 391/391 [00:13<00:00, 28.97it/s]

Correct: 5335, Total: 10000, Accuracy: 0.53
Accuracy: 53.349999999999994% (0.5335)
Loss: 1.9120170944615413
Epoch 6



raining: 100%|███████████████████████████████| 391/391 [00:13<00:00, 29.04it/s]

Correct: 5580, Total: 10000, Accuracy: 0.56
Accuracy: 55.800000000000004% (0.558)
Loss: 1.888701275775307
Epoch 7



raining: 100%|███████████████████████████████| 391/391 [00:13<00:00, 29.31it/s]

Correct: 5664, Total: 10000, Accuracy: 0.57
Accuracy: 56.64% (0.5664)
Loss: 1.8960458102979159
Epoch 8



raining: 100%|███████████████████████████████| 391/391 [00:13<00:00, 29.14it/s]

Correct: 5939, Total: 10000, Accuracy: 0.59
Accuracy: 59.39% (0.5939)
Loss: 1.8398846701571816
Epoch 9



raining: 100%|███████████████████████████████| 391/391 [00:13<00:00, 29.27it/s]

Correct: 6570, Total: 10000, Accuracy: 0.66
Accuracy: 65.7% (0.657)
Loss: 1.7988638877868652
Epoch 10



raining: 100%|███████████████████████████████| 391/391 [00:13<00:00, 28.86it/s]

Correct: 6696, Total: 10000, Accuracy: 0.67
Accuracy: 66.96% (0.6696)
Loss: 1.7626289882157977
Epoch 11



raining: 100%|███████████████████████████████| 391/391 [00:13<00:00, 29.01it/s]

Correct: 6908, Total: 10000, Accuracy: 0.69
Accuracy: 69.08% (0.6908)
Loss: 1.752696771370737
Epoch 12



raining: 100%|███████████████████████████████| 391/391 [00:13<00:00, 29.30it/s]

Correct: 7041, Total: 10000, Accuracy: 0.70
Accuracy: 70.41% (0.7041)
Loss: 1.7160840473676984
Epoch 13



raining: 100%|███████████████████████████████| 391/391 [00:13<00:00, 29.20it/s]

Correct: 7102, Total: 10000, Accuracy: 0.71
Accuracy: 71.02000000000001% (0.7102)
Loss: 1.7199852529324984
Epoch 14



raining: 100%|███████████████████████████████| 391/391 [00:13<00:00, 28.72it/s]

Correct: 7204, Total: 10000, Accuracy: 0.72
Accuracy: 72.04% (0.7204)
Loss: 1.6876453223981356
Epoch 15



raining: 100%|███████████████████████████████| 391/391 [00:13<00:00, 28.84it/s]

Correct: 7256, Total: 10000, Accuracy: 0.73
Accuracy: 72.56% (0.7256)
Loss: 1.6879805389203524
Epoch 16



raining: 100%|███████████████████████████████| 391/391 [00:13<00:00, 28.81it/s]

Correct: 7279, Total: 10000, Accuracy: 0.73
Accuracy: 72.78999999999999% (0.7279)
Loss: 1.6783240468878495
Epoch 17



raining: 100%|███████████████████████████████| 391/391 [00:13<00:00, 29.05it/s]

Correct: 7423, Total: 10000, Accuracy: 0.74
Accuracy: 74.22999999999999% (0.7423)
Loss: 1.6412700477399325
Epoch 18



raining: 100%|███████████████████████████████| 391/391 [00:13<00:00, 29.01it/s]

Correct: 7501, Total: 10000, Accuracy: 0.75
Accuracy: 75.01% (0.7501)
Loss: 1.631757849141171
Epoch 19



raining: 100%|███████████████████████████████| 391/391 [00:13<00:00, 29.05it/s]

Correct: 7443, Total: 10000, Accuracy: 0.74
Accuracy: 74.42999999999999% (0.7443)
Loss: 1.6493856718665676
Epoch 20



raining: 100%|███████████████████████████████| 391/391 [00:13<00:00, 29.17it/s]

Correct: 7544, Total: 10000, Accuracy: 0.75
Accuracy: 75.44% (0.7544)
Loss: 1.610388900104322
Configuration 1: Accuracy = 0.7561
Training configuration 2/3: [(3, 16, 32, 3), (32, 32, 64, 3), (64, 128, 128, 3)]
Epoch 1



raining: 100%|███████████████████████████████| 391/391 [00:09<00:00, 41.25it/s]

Correct: 5817, Total: 10000, Accuracy: 0.58
Accuracy: 58.17% (0.5817)
Loss: 1.8739488124847412
Epoch 2



raining: 100%|███████████████████████████████| 391/391 [00:09<00:00, 42.67it/s]

Correct: 6375, Total: 10000, Accuracy: 0.64
Accuracy: 63.74999999999999% (0.6375)
Loss: 1.815960300596137
Epoch 3



raining: 100%|███████████████████████████████| 391/391 [00:09<00:00, 42.33it/s]

Correct: 6987, Total: 10000, Accuracy: 0.70
Accuracy: 69.87% (0.6987)
Loss: 1.7530378981640464
Epoch 4



raining: 100%|███████████████████████████████| 391/391 [00:09<00:00, 41.73it/s]

Correct: 7140, Total: 10000, Accuracy: 0.71
Accuracy: 71.39999999999999% (0.714)
Loss: 1.735082701632851
Epoch 5



raining: 100%|███████████████████████████████| 391/391 [00:09<00:00, 42.72it/s]

Correct: 7220, Total: 10000, Accuracy: 0.72
Accuracy: 72.2% (0.722)
Loss: 1.7132525193063837
Epoch 6



raining: 100%|███████████████████████████████| 391/391 [00:09<00:00, 42.85it/s]

Correct: 7384, Total: 10000, Accuracy: 0.74
Accuracy: 73.83999999999999% (0.7384)
Loss: 1.6915536867944818
Epoch 7



raining: 100%|███████████████████████████████| 391/391 [00:08<00:00, 43.46it/s]

Correct: 7512, Total: 10000, Accuracy: 0.75
Accuracy: 75.12% (0.7512)
Loss: 1.6617448831859387
Epoch 8



raining: 100%|███████████████████████████████| 391/391 [00:09<00:00, 41.43it/s]

Correct: 7546, Total: 10000, Accuracy: 0.75
Accuracy: 75.46000000000001% (0.7546)
Loss: 1.6347864991740177
Epoch 9



raining: 100%|███████████████████████████████| 391/391 [00:09<00:00, 42.83it/s]

Correct: 7532, Total: 10000, Accuracy: 0.75
Accuracy: 75.32% (0.7532)
Loss: 1.633476589855395
Epoch 10



raining: 100%|███████████████████████████████| 391/391 [00:09<00:00, 43.20it/s]

Correct: 7659, Total: 10000, Accuracy: 0.77
Accuracy: 76.59% (0.7659)
Loss: 1.6176657237504657
Epoch 11



raining: 100%|███████████████████████████████| 391/391 [00:09<00:00, 43.31it/s]

Correct: 7699, Total: 10000, Accuracy: 0.77
Accuracy: 76.99000000000001% (0.7699)
Loss: 1.6001524486039813
Epoch 12



raining: 100%|███████████████████████████████| 391/391 [00:09<00:00, 42.80it/s]

Correct: 7720, Total: 10000, Accuracy: 0.77
Accuracy: 77.2% (0.772)
Loss: 1.6038385817879124
Epoch 13



raining: 100%|███████████████████████████████| 391/391 [00:09<00:00, 42.52it/s]

Correct: 7750, Total: 10000, Accuracy: 0.78
Accuracy: 77.5% (0.775)
Loss: 1.5895333039133173
Epoch 14



raining: 100%|███████████████████████████████| 391/391 [00:09<00:00, 43.03it/s]

Correct: 7660, Total: 10000, Accuracy: 0.77
Accuracy: 76.6% (0.766)
Loss: 1.568912524926035
Epoch 15



raining: 100%|███████████████████████████████| 391/391 [00:08<00:00, 44.06it/s]

Correct: 7709, Total: 10000, Accuracy: 0.77
Accuracy: 77.09% (0.7709)
Loss: 1.5696760541514347
Epoch 16



raining: 100%|███████████████████████████████| 391/391 [00:09<00:00, 43.24it/s]

Correct: 7712, Total: 10000, Accuracy: 0.77
Accuracy: 77.12% (0.7712)
Loss: 1.570615906464426
Epoch 17



raining: 100%|███████████████████████████████| 391/391 [00:09<00:00, 43.36it/s]

Correct: 7752, Total: 10000, Accuracy: 0.78
Accuracy: 77.52% (0.7752)
Loss: 1.551848618607772
Epoch 18



raining: 100%|███████████████████████████████| 391/391 [00:09<00:00, 43.12it/s]

Correct: 7772, Total: 10000, Accuracy: 0.78
Accuracy: 77.72% (0.7772)
Loss: 1.5401316755696346
Epoch 19



raining: 100%|███████████████████████████████| 391/391 [00:08<00:00, 43.61it/s]

Correct: 7615, Total: 10000, Accuracy: 0.76
Accuracy: 76.14999999999999% (0.7615)
Loss: 1.5491471102363186
Epoch 20



raining: 100%|███████████████████████████████| 391/391 [00:08<00:00, 43.89it/s]

Correct: 7734, Total: 10000, Accuracy: 0.77
Accuracy: 77.34% (0.7734)
Loss: 1.5454176287902028
Configuration 2: Accuracy = 0.7769
Training configuration 3/3: [(3, 32, 64, 5), (64, 128, 256, 5), (256, 512, 1024, 5)]
Epoch 1



raining: 100%|███████████████████████████████| 391/391 [00:37<00:00, 10.41it/s]

Correct: 3484, Total: 10000, Accuracy: 0.35
Accuracy: 34.839999999999996% (0.3484)
Loss: 2.122241133137753
Epoch 2



raining: 100%|███████████████████████████████| 391/391 [00:37<00:00, 10.35it/s]

Correct: 3483, Total: 10000, Accuracy: 0.35
Accuracy: 34.83% (0.3483)
Loss: 2.060584457297074
Epoch 3



raining: 100%|███████████████████████████████| 391/391 [00:37<00:00, 10.33it/s]

Correct: 3837, Total: 10000, Accuracy: 0.38
Accuracy: 38.37% (0.3837)
Loss: 2.066728253113596
Epoch 4



raining: 100%|███████████████████████████████| 391/391 [00:37<00:00, 10.31it/s]

Correct: 3924, Total: 10000, Accuracy: 0.39
Accuracy: 39.24% (0.3924)
Loss: 2.067378326466209
Epoch 5



raining: 100%|███████████████████████████████| 391/391 [00:37<00:00, 10.33it/s]

Correct: 3501, Total: 10000, Accuracy: 0.35
Accuracy: 35.010000000000005% (0.3501)
Loss: 2.076095982601768
Epoch 6



raining: 100%|███████████████████████████████| 391/391 [00:37<00:00, 10.31it/s]

Correct: 3769, Total: 10000, Accuracy: 0.38
Accuracy: 37.69% (0.3769)
Loss: 2.0706961280421208
Epoch 7



raining: 100%|███████████████████████████████| 391/391 [00:37<00:00, 10.34it/s]

Correct: 3653, Total: 10000, Accuracy: 0.37
Accuracy: 36.53% (0.3653)
Loss: 2.0660409927368164
Epoch 8



raining: 100%|███████████████████████████████| 391/391 [00:37<00:00, 10.36it/s]

Correct: 3692, Total: 10000, Accuracy: 0.37
Accuracy: 36.919999999999995% (0.3692)
Loss: 2.085370791585822
Epoch 9



raining: 100%|███████████████████████████████| 391/391 [00:37<00:00, 10.41it/s]

Correct: 4107, Total: 10000, Accuracy: 0.41
Accuracy: 41.07% (0.4107)
Loss: 2.0611336858649003
Epoch 10



raining: 100%|███████████████████████████████| 391/391 [00:37<00:00, 10.48it/s]

Correct: 4020, Total: 10000, Accuracy: 0.40
Accuracy: 40.2% (0.402)
Loss: 2.0691215866490413
Epoch 11



raining: 100%|███████████████████████████████| 391/391 [00:37<00:00, 10.48it/s]

Correct: 3932, Total: 10000, Accuracy: 0.39
Accuracy: 39.32% (0.3932)
Loss: 2.0782691177568937
Epoch 12



raining: 100%|███████████████████████████████| 391/391 [00:37<00:00, 10.48it/s]

Correct: 4111, Total: 10000, Accuracy: 0.41
Accuracy: 41.11% (0.4111)
Loss: 2.049823020633898
Epoch 13



raining: 100%|███████████████████████████████| 391/391 [00:37<00:00, 10.47it/s]

Correct: 3867, Total: 10000, Accuracy: 0.39
Accuracy: 38.67% (0.3867)
Loss: 2.0791739288129305
Epoch 14



raining: 100%|███████████████████████████████| 391/391 [00:37<00:00, 10.46it/s]

Correct: 3994, Total: 10000, Accuracy: 0.40
Accuracy: 39.94% (0.3994)
Loss: 2.0423301521100496
Epoch 15



raining: 100%|███████████████████████████████| 391/391 [00:37<00:00, 10.48it/s]

Correct: 3845, Total: 10000, Accuracy: 0.38
Accuracy: 38.45% (0.3845)
Loss: 2.087024713817396
Epoch 16



raining: 100%|███████████████████████████████| 391/391 [00:37<00:00, 10.49it/s]

Correct: 4107, Total: 10000, Accuracy: 0.41
Accuracy: 41.07% (0.4107)
Loss: 2.066435588033576
Epoch 17



raining: 100%|███████████████████████████████| 391/391 [00:37<00:00, 10.49it/s]

Correct: 3814, Total: 10000, Accuracy: 0.38
Accuracy: 38.14% (0.3814)
Loss: 2.073916849337126
Epoch 18



raining: 100%|███████████████████████████████| 391/391 [00:37<00:00, 10.48it/s]

Correct: 3743, Total: 10000, Accuracy: 0.37
Accuracy: 37.43% (0.3743)
Loss: 2.0774422319311845
Epoch 19



raining: 100%|███████████████████████████████| 391/391 [00:37<00:00, 10.49it/s]

Correct: 3350, Total: 10000, Accuracy: 0.34
Accuracy: 33.5% (0.335)
Loss: 2.0966429208454334
Epoch 20



raining: 100%|███████████████████████████████| 391/391 [00:37<00:00, 10.48it/s]

Correct: 4200, Total: 10000, Accuracy: 0.42
Accuracy: 42.0% (0.42)
Loss: 2.0372430901778373
Configuration 3: Accuracy = 0.4243

Best Configuration: [(3, 16, 32, 3), (32, 32, 64, 3), (64, 128, 128, 3)]
Best Accuracy: 0.7769


In [None]:
best_config = [(3, 16, 32, 3), (32, 32, 64, 3), (64, 128, 128, 3)]
conv_model = InvertedBottleneckModel(in_ch=3, num_classes=10, bottleneck_configs=best_config).to(device)

opt = torch.optim.Adam(conv_model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
BATCH_SIZE = 128
epochs = 30

timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
writer = SummaryWriter('runs/inverted_bottleneck_trainer_validation_{}'.format(timestamp))
training_and_validation_loop(conv_model, train_loader, test_loader, epochs, writer, device, opt, timestamp, flatten=False)

Epoch 1



raining: 100%|███████████████████████████████| 391/391 [00:09<00:00, 39.33it/s]

Correct: 4670, Total: 10000, Accuracy: 0.47
Accuracy: 46.7% (0.467)
Loss: 2.0116328628439653



alidation: 100%|███████████████████████████████| 79/79 [00:01<00:00, 77.51it/s]

LOSS train 2.0116328628439653 valid 1.9843546152114868
Saving model
Epoch 2



raining: 100%|███████████████████████████████| 391/391 [00:09<00:00, 39.73it/s]

Correct: 5313, Total: 10000, Accuracy: 0.53
Accuracy: 53.13% (0.5313)
Loss: 1.9522223974529065



alidation: 100%|███████████████████████████████| 79/79 [00:00<00:00, 79.93it/s]

LOSS train 1.9522223974529065 valid 1.9229406118392944
Saving model
Epoch 3



raining: 100%|███████████████████████████████| 391/391 [00:10<00:00, 39.06it/s]

Correct: 5581, Total: 10000, Accuracy: 0.56
Accuracy: 55.81% (0.5581)
Loss: 1.9036559744885093



alidation: 100%|███████████████████████████████| 79/79 [00:01<00:00, 75.57it/s]

LOSS train 1.9036559744885093 valid 1.8916205167770386
Saving model
Epoch 4



raining: 100%|███████████████████████████████| 391/391 [00:10<00:00, 38.98it/s]

Correct: 5693, Total: 10000, Accuracy: 0.57
Accuracy: 56.93% (0.5693)
Loss: 1.8688307373147262



alidation: 100%|███████████████████████████████| 79/79 [00:00<00:00, 84.66it/s]

LOSS train 1.8688307373147262 valid 1.8766107559204102
Saving model
Epoch 5



raining: 100%|███████████████████████████████| 391/391 [00:09<00:00, 42.00it/s]

Correct: 6286, Total: 10000, Accuracy: 0.63
Accuracy: 62.86000000000001% (0.6286)
Loss: 1.8216626455909328



alidation: 100%|███████████████████████████████| 79/79 [00:01<00:00, 76.10it/s]

LOSS train 1.8216626455909328 valid 1.823302149772644
Saving model
Epoch 6



raining: 100%|███████████████████████████████| 391/391 [00:09<00:00, 40.05it/s]

Correct: 6546, Total: 10000, Accuracy: 0.65
Accuracy: 65.46% (0.6546)
Loss: 1.800830740677683



alidation: 100%|███████████████████████████████| 79/79 [00:00<00:00, 88.33it/s]

LOSS train 1.800830740677683 valid 1.8005326986312866
Saving model
Epoch 7



raining: 100%|███████████████████████████████| 391/391 [00:09<00:00, 42.28it/s]

Correct: 6676, Total: 10000, Accuracy: 0.67
Accuracy: 66.75999999999999% (0.6676)
Loss: 1.7642273024508828



alidation: 100%|███████████████████████████████| 79/79 [00:00<00:00, 84.49it/s]

LOSS train 1.7642273024508828 valid 1.786367654800415
Saving model
Epoch 8



raining: 100%|███████████████████████████████| 391/391 [00:09<00:00, 42.52it/s]

Correct: 6714, Total: 10000, Accuracy: 0.67
Accuracy: 67.14% (0.6714)
Loss: 1.7350438833236694



alidation: 100%|███████████████████████████████| 79/79 [00:01<00:00, 70.02it/s]

LOSS train 1.7350438833236694 valid 1.779754638671875
Saving model
Epoch 9



raining: 100%|███████████████████████████████| 391/391 [00:10<00:00, 38.75it/s]

Correct: 6790, Total: 10000, Accuracy: 0.68
Accuracy: 67.9% (0.679)
Loss: 1.727854571844402



alidation: 100%|███████████████████████████████| 79/79 [00:00<00:00, 84.51it/s]

LOSS train 1.727854571844402 valid 1.7709789276123047
Saving model
Epoch 10



raining: 100%|███████████████████████████████| 391/391 [00:09<00:00, 39.35it/s]

Correct: 6839, Total: 10000, Accuracy: 0.68
Accuracy: 68.39% (0.6839)
Loss: 1.71670537245901



alidation: 100%|███████████████████████████████| 79/79 [00:00<00:00, 84.88it/s]

LOSS train 1.71670537245901 valid 1.7663599252700806
Saving model
Epoch 11



raining: 100%|███████████████████████████████| 391/391 [00:09<00:00, 39.92it/s]

Correct: 6863, Total: 10000, Accuracy: 0.69
Accuracy: 68.63% (0.6863)
Loss: 1.7021617199245251



alidation: 100%|███████████████████████████████| 79/79 [00:00<00:00, 83.92it/s]

LOSS train 1.7021617199245251 valid 1.7630140781402588
Saving model
Epoch 12



raining: 100%|███████████████████████████████| 391/391 [00:09<00:00, 39.55it/s]

Correct: 6898, Total: 10000, Accuracy: 0.69
Accuracy: 68.97999999999999% (0.6898)
Loss: 1.6941731854488975



alidation: 100%|███████████████████████████████| 79/79 [00:00<00:00, 87.06it/s]

LOSS train 1.6941731854488975 valid 1.7608768939971924
Saving model
Epoch 13



raining: 100%|███████████████████████████████| 391/391 [00:09<00:00, 40.34it/s]

Correct: 6952, Total: 10000, Accuracy: 0.70
Accuracy: 69.52000000000001% (0.6952)
Loss: 1.6766176349238346



alidation: 100%|███████████████████████████████| 79/79 [00:01<00:00, 68.26it/s]

LOSS train 1.6766176349238346 valid 1.7546366453170776
Saving model
Epoch 14



raining: 100%|███████████████████████████████| 391/391 [00:09<00:00, 40.84it/s]

Correct: 6959, Total: 10000, Accuracy: 0.70
Accuracy: 69.59% (0.6959)
Loss: 1.6784398994947736



alidation: 100%|███████████████████████████████| 79/79 [00:00<00:00, 84.31it/s]

LOSS train 1.6784398994947736 valid 1.753688097000122
Epoch 15



raining: 100%|███████████████████████████████| 391/391 [00:10<00:00, 38.86it/s]

Correct: 6972, Total: 10000, Accuracy: 0.70
Accuracy: 69.72% (0.6972)
Loss: 1.6551515177676552



alidation: 100%|███████████████████████████████| 79/79 [00:00<00:00, 86.95it/s]

LOSS train 1.6551515177676552 valid 1.749606966972351
Saving model
Epoch 16



raining: 100%|███████████████████████████████| 391/391 [00:09<00:00, 42.21it/s]

Correct: 6979, Total: 10000, Accuracy: 0.70
Accuracy: 69.78999999999999% (0.6979)
Loss: 1.6464685201644897



alidation: 100%|███████████████████████████████| 79/79 [00:00<00:00, 85.61it/s]

LOSS train 1.6464685201644897 valid 1.7464460134506226
Saving model
Epoch 17



raining: 100%|███████████████████████████████| 391/391 [00:09<00:00, 41.64it/s]

Correct: 6975, Total: 10000, Accuracy: 0.70
Accuracy: 69.75% (0.6975)
Loss: 1.6366635623731112



alidation: 100%|███████████████████████████████| 79/79 [00:01<00:00, 71.81it/s]

LOSS train 1.6366635623731112 valid 1.7474440336227417
Saving model
Epoch 18



raining: 100%|███████████████████████████████| 391/391 [00:09<00:00, 41.38it/s]

Correct: 7025, Total: 10000, Accuracy: 0.70
Accuracy: 70.25% (0.7025)
Loss: 1.6311832352688438



alidation: 100%|███████████████████████████████| 79/79 [00:01<00:00, 74.43it/s]

LOSS train 1.6311832352688438 valid 1.743212103843689
Saving model
Epoch 19



raining: 100%|███████████████████████████████| 391/391 [00:09<00:00, 41.06it/s]

Correct: 7042, Total: 10000, Accuracy: 0.70
Accuracy: 70.42% (0.7042)
Loss: 1.6223034356769763



alidation: 100%|███████████████████████████████| 79/79 [00:01<00:00, 76.55it/s]

LOSS train 1.6223034356769763 valid 1.7445515394210815
Saving model
Epoch 20



raining: 100%|███████████████████████████████| 391/391 [00:09<00:00, 41.28it/s]

Correct: 7055, Total: 10000, Accuracy: 0.71
Accuracy: 70.55% (0.7055)
Loss: 1.6138092279434204



alidation: 100%|███████████████████████████████| 79/79 [00:01<00:00, 76.17it/s]

LOSS train 1.6138092279434204 valid 1.7398490905761719
Saving model
Epoch 21



raining: 100%|███████████████████████████████| 391/391 [00:09<00:00, 40.87it/s]

Correct: 7036, Total: 10000, Accuracy: 0.70
Accuracy: 70.36% (0.7036)
Loss: 1.605806777351781



alidation: 100%|███████████████████████████████| 79/79 [00:01<00:00, 77.25it/s]

LOSS train 1.605806777351781 valid 1.7413885593414307
Saving model
Epoch 22



raining: 100%|███████████████████████████████| 391/391 [00:09<00:00, 41.34it/s]

Correct: 7049, Total: 10000, Accuracy: 0.70
Accuracy: 70.49% (0.7049)
Loss: 1.6045298011679399



alidation: 100%|███████████████████████████████| 79/79 [00:00<00:00, 79.98it/s]

LOSS train 1.6045298011679399 valid 1.7402938604354858
Saving model
Epoch 23



raining: 100%|███████████████████████████████| 391/391 [00:09<00:00, 42.12it/s]

Correct: 7000, Total: 10000, Accuracy: 0.70
Accuracy: 70.0% (0.7)
Loss: 1.5903868361523277



alidation: 100%|███████████████████████████████| 79/79 [00:01<00:00, 76.09it/s]

LOSS train 1.5903868361523277 valid 1.743006706237793
Saving model
Epoch 24



raining: 100%|███████████████████████████████| 391/391 [00:09<00:00, 40.42it/s]

Correct: 7051, Total: 10000, Accuracy: 0.71
Accuracy: 70.50999999999999% (0.7051)
Loss: 1.596183049051385



alidation: 100%|███████████████████████████████| 79/79 [00:01<00:00, 77.61it/s]

LOSS train 1.596183049051385 valid 1.739232063293457
Epoch 25



raining: 100%|███████████████████████████████| 391/391 [00:09<00:00, 41.01it/s]

Correct: 7049, Total: 10000, Accuracy: 0.70
Accuracy: 70.49% (0.7049)
Loss: 1.5817251644636456



alidation: 100%|███████████████████████████████| 79/79 [00:00<00:00, 87.42it/s]

LOSS train 1.5817251644636456 valid 1.7389764785766602
Saving model
Epoch 26



raining: 100%|███████████████████████████████| 391/391 [00:09<00:00, 41.01it/s]

Correct: 7065, Total: 10000, Accuracy: 0.71
Accuracy: 70.65% (0.7065)
Loss: 1.5681042106528031



alidation: 100%|███████████████████████████████| 79/79 [00:01<00:00, 70.60it/s]

LOSS train 1.5681042106528031 valid 1.7357405424118042
Saving model
Epoch 27



raining: 100%|███████████████████████████████| 391/391 [00:09<00:00, 40.46it/s]

Correct: 7081, Total: 10000, Accuracy: 0.71
Accuracy: 70.81% (0.7081)
Loss: 1.5696278622275905



alidation: 100%|███████████████████████████████| 79/79 [00:01<00:00, 74.80it/s]

LOSS train 1.5696278622275905 valid 1.7381706237792969
Epoch 28



raining: 100%|███████████████████████████████| 391/391 [00:09<00:00, 40.11it/s]

Correct: 7082, Total: 10000, Accuracy: 0.71
Accuracy: 70.82000000000001% (0.7082)
Loss: 1.5635075631894564



alidation: 100%|███████████████████████████████| 79/79 [00:01<00:00, 77.21it/s]

LOSS train 1.5635075631894564 valid 1.7355470657348633
Saving model
Epoch 29



raining: 100%|███████████████████████████████| 391/391 [00:10<00:00, 38.19it/s]

Correct: 7067, Total: 10000, Accuracy: 0.71
Accuracy: 70.67% (0.7067)
Loss: 1.5660095842261064



alidation: 100%|███████████████████████████████| 79/79 [00:01<00:00, 77.21it/s]

LOSS train 1.5660095842261064 valid 1.7385624647140503
Epoch 30



raining: 100%|███████████████████████████████| 391/391 [00:10<00:00, 38.20it/s]

Correct: 7052, Total: 10000, Accuracy: 0.71
Accuracy: 70.52000000000001% (0.7052)
Loss: 1.5585252485777203


Validation:   0%|                                        | 0/79 [00:00<?, ?it/s]

In [11]:
%load_ext tensorboard
%tensorboard --logdir runs

Reusing TensorBoard on port 6006 (pid 7111), started 12:14:39 ago. (Use '!kill 7111' to kill it.)