In [1]:
import torch
from torch import nn, optim
from torch.utils.data import random_split, DataLoader
from torchinfo import summary
from torchvision import datasets, transforms, models

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(DEVICE)

cuda


## Implementatation

In [2]:
class ConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels, **kwargs):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, bias=False, **kwargs),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(),
        )
    
    def forward(self, x):
        return self.conv(x)


class InceptionBlock(nn.Module):
    def __init__(self, in_channels, ch1, ch3_r, ch3, ch5_r, ch5, ch1_p):
        super().__init__()
        self.branch1 = ConvBlock(in_channels, ch1, kernel_size=1)
        self.branch2 = nn.Sequential(
            ConvBlock(in_channels, ch3_r, kernel_size=1),
            ConvBlock(ch3_r, ch3, kernel_size=3, padding=1),
        )
        self.branch3 = nn.Sequential(
            ConvBlock(in_channels, ch5_r, kernel_size=1),
            ConvBlock(ch5_r, ch5, kernel_size=5, padding=2),
        )
        self.branch4 = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
            ConvBlock(in_channels, ch1_p, kernel_size=1)
        )
    
    def forward(self, x):
        branch1 = self.branch1(x)
        branch2 = self.branch2(x)
        branch3 = self.branch3(x)
        branch4 = self.branch4(x)
        out = torch.cat((branch1, branch2, branch3, branch4), dim=1)
        return out


class AuxiliaryClassifier(nn.Module):
    def __init__(self, in_channels, n_classes):
        super().__init__()
        self.avg_pool = nn.AvgPool2d(5, stride=3)
        self.conv1 = ConvBlock(in_channels, 128, kernel_size=1)
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(2048, 1024),
            nn.ReLU(),
            nn.Dropout(0.7),
            nn.Linear(1024, n_classes)
        ) 
    def forward(self, x):
        return x


InceptionBlock(192, 64, 96, 128, 16, 32, 32)(torch.rand(1, 192, 28, 28)).shape

torch.Size([1, 256, 28, 28])

In [3]:
class InceptionNetV1(nn.Module):
    def __init__(self, n_channels, n_classes, use_aux):
        super().__init__()
        
        self.use_aux = use_aux
        
        self.conv1 = ConvBlock(n_channels, 64, kernel_size=7, stride=2, padding=3)
        self.maxpool1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.conv2 = ConvBlock(64, 64, kernel_size=1)
        self.conv3 = ConvBlock(64, 192, kernel_size=3, padding=1)
        self.maxpool2 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        
        self.inception_3a = InceptionBlock(192, 64, 96, 128, 16, 32, 32)
        self.inception_3b = InceptionBlock(256, 128, 128, 192, 32, 96, 64)
        self.maxpool3 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        
        self.inception_4a = InceptionBlock(480, 192, 96, 208, 16, 48, 64)
        self.aux_classifier1 = AuxiliaryClassifier(512, n_classes) if self.use_aux else None
        self.inception_4b = InceptionBlock(512, 160, 112, 224, 24, 64, 64)
        self.inception_4c = InceptionBlock(512, 128, 128, 256, 24, 64, 64)
        self.inception_4d = InceptionBlock(512, 112, 144, 288, 32, 64, 64)
        self.aux_classifier2 = AuxiliaryClassifier(528, n_classes) if self.use_aux else None
        self.inception_4e = InceptionBlock(528, 256, 160, 320, 32, 128, 128)
        self.maxpool4 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        
        self.inception_5a = InceptionBlock(832, 256, 160, 320, 32, 128, 128)
        self.inception_5b = InceptionBlock(832, 384, 192, 384, 48, 128, 128)
        
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Dropout(0.5),
            nn.Linear(1024, n_classes),
        )
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.maxpool1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.maxpool2(x)
        x = self.inception_3a(x)
        x = self.inception_3b(x)
        x = self.maxpool3(x)
        x = self.inception_4a(x)
        aux_loss1 = self.aux_classifier1(x) if self.use_aux and self.training else None
        x = self.inception_4b(x)
        x = self.inception_4c(x)
        x = self.inception_4d(x)
        aux_loss2 = self.aux_classifier2(x)  if self.use_aux and self.training else None
        x = self.inception_4e(x)
        x = self.maxpool4(x)
        x = self.inception_5a(x)
        x = self.inception_5b(x)
        x = self.avgpool(x)
        x = self.classifier(x)
        
        return (x, aux_loss1, aux_loss2) if self.use_aux else x

In [4]:
inception_v1_model = InceptionNetV1(3, 1000, use_aux=True).to('cpu')
summary(inception_v1_model, input_size=(1, 3, 224, 224), col_names=['input_size', 'output_size', 'num_params'], device='cpu', depth=2)

Layer (type:depth-idx)                        Input Shape               Output Shape              Param #
InceptionNetV1                                [1, 3, 224, 224]          [1, 1000]                 6,379,984
├─ConvBlock: 1-1                              [1, 3, 224, 224]          [1, 64, 112, 112]         --
│    └─Sequential: 2-1                        [1, 3, 224, 224]          [1, 64, 112, 112]         9,536
├─MaxPool2d: 1-2                              [1, 64, 112, 112]         [1, 64, 56, 56]           --
├─ConvBlock: 1-3                              [1, 64, 56, 56]           [1, 64, 56, 56]           --
│    └─Sequential: 2-2                        [1, 64, 56, 56]           [1, 64, 56, 56]           4,224
├─ConvBlock: 1-4                              [1, 64, 56, 56]           [1, 192, 56, 56]          --
│    └─Sequential: 2-3                        [1, 64, 56, 56]           [1, 192, 56, 56]          110,976
├─MaxPool2d: 1-5                              [1, 192, 56, 56]      

In [5]:
inception_v1_torch_model = models.googlenet().to('cpu')
summary(inception_v1_torch_model, input_size=(1, 3, 224, 224), col_names=['input_size', 'output_size', 'num_params'], device='cpu', depth=2)



Layer (type:depth-idx)                   Input Shape               Output Shape              Param #
GoogLeNet                                [1, 3, 224, 224]          [1, 1000]                 6,379,984
├─BasicConv2d: 1-1                       [1, 3, 224, 224]          [1, 64, 112, 112]         --
│    └─Conv2d: 2-1                       [1, 3, 224, 224]          [1, 64, 112, 112]         9,408
│    └─BatchNorm2d: 2-2                  [1, 64, 112, 112]         [1, 64, 112, 112]         128
├─MaxPool2d: 1-2                         [1, 64, 112, 112]         [1, 64, 56, 56]           --
├─BasicConv2d: 1-3                       [1, 64, 56, 56]           [1, 64, 56, 56]           --
│    └─Conv2d: 2-3                       [1, 64, 56, 56]           [1, 64, 56, 56]           4,096
│    └─BatchNorm2d: 2-4                  [1, 64, 56, 56]           [1, 64, 56, 56]           128
├─BasicConv2d: 1-4                       [1, 64, 56, 56]           [1, 192, 56, 56]          --
│    └─Conv2d: 2-5  

## Training

In [6]:
from pathlib import Path

TRAIN_RATIO = 0.8
data_dir = Path('./data/')

transform = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
])

train_ds = datasets.CIFAR100(data_dir, train=True, download=True, transform=transform)
train_ds, val_ds = random_split(train_ds, (TRAIN_RATIO, 1 - TRAIN_RATIO))
val_ds.transform = transform
test_ds = datasets.CIFAR100(data_dir, train=False, download=True, transform=transform)

Files already downloaded and verified
Files already downloaded and verified


In [None]:
import wandb
from src.engine import *

config = dict(batch_size=128, lr=5e-4, epochs=20, dataset='CIFAR100')
with wandb.init(project='pytorch-study', name='InceptionV1', config=config) as run:
    w_config = run.config
    train_dl = DataLoader(train_ds, batch_size=w_config.batch_size, shuffle=True)
    val_dl = DataLoader(val_ds, batch_size=w_config.batch_size, shuffle=True)
    
    n_classes = len(train_ds.dataset.classes)
    vgg_model = InceptionNetV1(3, 1000, use_aux=False).to(DEVICE)
        
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(vgg_model.parameters(), lr=w_config.lr)
    
    loss_history, acc_history = train(vgg_model, train_dl, val_dl, criterion, optimizer, w_config.epochs, DEVICE, run) 

Epoch=1:   0%|          | 0/20 [00:00<?, ?it/s]