### Imports

In [1]:
import torch
from torch import nn
import torch.nn.functional as F
from torchinfo import summary as Model_Summary
import torch.optim as optim
from typing import Optional

PyTorch Version:  2.0.0+cu117
Torchvision Version:  0.15.1+cu117


### Inception Model

Convolutional block: 1 convolutional layer + batch normalisation, with ReLU activation

- include kernel size and stride here?
- is environment.yaml file .yaml or .yml?

In [None]:
class ConvBlock(nn.Module):
    def __init__(self, in_channels: int, out_channels: int, kernel_size, stride=1, padding='same'):
        super(ConvBlock, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding)
        # could also use default momentum and eps values for batch norm
        self.batch_norm = nn.BatchNorm2d(out_channels, eps=0.001, momentum=0.9997)
        
    def forward(self, x):
        return F.relu(self.batch_norm(self.conv(x)))

Inception block: contains various branches, output from these branches are concatenated to produce a final output in the ```forward()``` function

In [None]:
class InceptionBlockA(nn.Module):
    def __init__(self, 
                in_channels,
            ):
        
        super(InceptionBlockA, self).__init__()
    
    def forward(self, x):
        branches = []

        return torch.cat(branches, 1)

Auxiliary classifiers -- check setup in inceptionv3

In [None]:
class InceptionAux(nn.Module):
    def __init__(self, 
                 in_channels: int, 
                 num_classes: int, 
                 stride=1, 
                 padding='same'):
        super(InceptionAux, self).__init__()
        # pooling operation can be applied as layer or defined in forward function dependent
        # on whether it is to be trained as well
        # self.pool0 = nn.AvgPool2d(kernel_size=5, stride=3)
        # self.conv0 = ConvBlock(in_channels, 128, kernel_size=1, stride=stride, padding=padding)
        self.conv0 = ConvBlock(in_channels, 128, kernel_size=1)
        self.conv1 = ConvBlock(128, 768, kernel_size=5)
        # self.pool1 = nn.AvgPool2d(kernel_size=1)
        # self.conv1.stddev = 0.01
        self.fc = nn.Linear(768, num_classes)
        # self.fc.stddev = 0.001
    
    def forward(self, x):
        # x = self.pool0(x)
        x = F.avg_pool2d(x, kernel_size=5, stride=3)
        x = self.conv0(x)
        x = self.conv1(x)
        x = F.adaptive_avg_pool2d(x, (1 ,1))
        # x = self.pool1(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x

Full model

In [None]:
class InceptionV3(nn.Module):
    def __init__(self, aux_logits=True, num_classes=2):
        super(InceptionV3, self).__init__()

        self.aux_logits = aux_logits
        in_channels=3

        # Initial convolutional and pooling layers
        self.conv0 = nn.Conv2d(in_channels, 32, kernel_size=3, stride=2)
        self.conv1 = nn.Conv2d(32, 32, kernel_size=3, stride=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding='same')
        self.pool1 = nn.MaxPool2d(kernel_size=3, stride=2)
        self.conv3 = nn.Conv2d(64, 80, kernel_size=1, stride=1)
        self.conv4 = nn.Conv2d(80, 192, kernel_size=3, stride=1)
        self.pool2 = nn.MaxPool2d(kernel_size=3, stride=2)

        # ? run model to test what number of channels is after pool2 layer, is it still 3? for in_channels to follow in inception blocks

        # Inception blocks
        # (in_channels, b1_out_channels=None, include1x1=False, include5x5=False, include3x3=False, include3x3dbl=False, include7x7=False, include7x7dbl=False, include7x7x3=False, include_pool_avg=False, include_pool_max=False):
        self.inception_mixed_0 = InceptionBlock(in_channels, b1_out_channels=64, avg_pool_channels=32, include1x1=True, include5x5=True, include3x3dbl=True, include_pool_avg=True)
        self.inception_mixed_1 = InceptionBlock(in_channels, b1_out_channels=64, avg_pool_channels=64, include1x1=True, include5x5=True, include3x3dbl=True, include_pool_avg=True)
        self.inception_mixed_2 = InceptionBlock(in_channels, b1_out_channels=64, avg_pool_channels=64, include1x1=True, include5x5=True, include3x3dbl=True, include_pool_avg=True)
        self.inception_mixed_3 = InceptionBlock(in_channels, b3_out_channels=384, avg_pool_channels=64, include3x3=True, include3x3dbl_1=True, include_pool_max=True)
        self.inception_mixed_4 = InceptionBlock(in_channels, b1_out_channels=192, b7_channels=128, avg_pool_channels=192, include1x1=True, include7x7=True, include7x7dbl=True, include_pool_avg=True)
        self.inception_mixed_5 = InceptionBlock(in_channels, b1_out_channels=192, b7_channels=160, avg_pool_channels=192, include1x1=True, include7x7=True, include7x7dbl=True, include_pool_avg=True)
        self.inception_mixed_6 = InceptionBlock(in_channels, b1_out_channels=192, b7_channels=160, avg_pool_channels=192, include1x1=True, include7x7=True, include7x7dbl=True, include_pool_avg=True)
        self.inception_mixed_7 = InceptionBlock(in_channels, b1_out_channels=192, b7_channels=192, avg_pool_channels=192, include1x1=True, include7x7=True, include7x7dbl=True, include_pool_avg=True)
        self.inception_mixed_8 = InceptionBlock(in_channels, include3x3_1=True, include7x7x3=True, include_pool_max=True)
        self.inception_mixed_9 = InceptionBlock(in_channels, b1_out_channels=320, avg_pool_channels=192,  include1x1=True, include3x3_2=True, include3x3dbl_2=True, include_pool_avg=True)
        self.inception_mixed_10 = InceptionBlock(in_channels, b1_out_channels=320, avg_pool_channels=192, include1x1=True, include3x3_2=True, include3x3dbl_2=True, include_pool_avg=True)
        
        if aux_logits:
            self.AuxLogits = InceptionAux(512, num_classes)

        # Final layers
        self.avg_pool = nn.AvgPool2d(padding='valid')
        self.dropout = nn.Dropout(p=0.8)
        self.flatten = nn.Flatten()
        self.fc = nn.Linear(in_features=2048, out_features=num_classes)

    def forward(self, x):
        # Initial layers
        x = self.conv0(x)
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.pool1(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.pool2(x)
        # Inception blocks
        x = self.inception_mixed_0(x)
        x = self.inception_mixed_1(x)
        x = self.inception_mixed_2(x)
        x = self.inception_mixed_3(x)
        x = self.inception_mixed_4(x)
        x = self.inception_mixed_5(x)
        x = self.inception_mixed_6(x)
        x = self.inception_mixed_7(x)
        # Auxiliary heads
        aux: Optional[Tensor] = None
        if self.AuxLogits is not None:
            if self.training:
                aux = self.AuxLogits(x)
        # more Inception
        x = self.inception_mixed_8(x)
        x = self.inception_mixed_9(x)
        x = self.inception_mixed_10(x)

        x = self.avg_pool(x)
        x = self.dropout(x)
        x = self.flatten(x)
        x = self.fc(x)

        return x, aux

Check model

In [None]:
model = InceptionV3()
print(model)

# Model_Summary(model, test_input.shape)

In [None]:
# Test model: sanity check with dummy input
test_input = torch.randn(2, 3, 224, 224)
aux1, aux2, output = model(test_input)
print(output.shape)

### Helper functions

### Parameters

In [2]:
# Number of classes in the dataset
num_classes = 2
# Batch size for training (change depending on how much memory you have)
batch_size = 32
# Number of epochs to train for
# * paper: 100
num_epochs = 20

### Initialise model

In [None]:
# move function to `initalise_models.py`
def inception(num_classes):

    # Hyperparameters
    WEIGHT_DECAY = 0.9                  # Decay term for RMSProp.
    # weight_decay = 0.00004?
    # from inception_v3_parameters
    MOMENTUM = 0.9                      # Momentum in RMSProp.
    EPSILON = 1.0                       # Epsilon term for RMSProp.
    INITIAL_LEARNING_RATE = 0.1         # Initial learning rate.
    NUM_EPOCHS_PER_DECAY = 30.0         # Epochs after which learning rate decays.
    LEARNING_RATE_DECAY_FACTOR = 0.16   # Learning rate decay factor.

    model = InceptionV3()
    # * set parameters correct?
    optimiser = optim.RMSprop(lr=INITIAL_LEARNING_RATE, momentum=MOMENTUM, eps=EPSILON, weight_decay=WEIGHT_DECAY)
    criterion = nn.CrossEntropyLoss()
    # * complete
    parameters = {"learning_rate": INITIAL_LEARNING_RATE, "momentum": MOMENTUM}

    return model, optimiser, criterion, parameters

In [None]:
# Define the loss function with weight decay
loss_fn = nn.CrossEntropyLoss()
weight_decay = 0.00004
l2_reg = torch.tensor(0.)
for param in conv_layer.parameters():
    l2_reg += torch.norm(param)
loss_fn = nn.CrossEntropyLoss()
loss_fn = loss_fn(output, target) + weight_decay * l2_reg

### Load data

### Model

In [None]:
# Hyperparameters
RMSPROP_DECAY = 0.9                # Decay term for RMSProp.
MOMENTUM = 0.9                     # Momentum in RMSProp.
RMSPROP_EPSILON = 1.0              # Epsilon term for RMSProp.
INITIAL_LEARNING_RATE = 0.1        # Initial learning rate.
NUM_EPOCHS_PER_DECAY = 30.0        # Epochs after which learning rate decays.
LEARNING_RATE_DECAY_FACTOR = 0.16  # Learning rate decay factor.