# Model File

This file's purpose is to develop the RestNet CNN based on the following architecture: https://medium.com/@sharma.tanish096/detailed-explanation-of-residual-network-resnet50-cnn-model-106e0ab9fa9e

In [1]:
from importnb import Notebook
with Notebook():
    import dataloader

../data/intel-image-classification.zip already exists. Skipping download.
../data already exists and is not empty. Skipping extraction.
There are 3 directories and 0 images in '../data/intel-image-classification'.
There are 1 directories and 0 images in '../data/intel-image-classification/seg_test'.
There are 6 directories and 0 images in '../data/intel-image-classification/seg_test/seg_test'.
There are 0 directories and 474 images in '../data/intel-image-classification/seg_test/seg_test/forest'.
There are 0 directories and 437 images in '../data/intel-image-classification/seg_test/seg_test/buildings'.
There are 0 directories and 553 images in '../data/intel-image-classification/seg_test/seg_test/glacier'.
There are 0 directories and 501 images in '../data/intel-image-classification/seg_test/seg_test/street'.
There are 0 directories and 525 images in '../data/intel-image-classification/seg_test/seg_test/mountain'.
There are 0 directories and 510 images in '../data/intel-image-classific

In [4]:
# Importing all relevant variables
image_path = dataloader.image_path
train_dir = dataloader.train_dir
test_dir = dataloader.test_dir
pred_dir = dataloader.pred_dir
train_transform = dataloader.train_transform
plot_transformed_images = dataloader.plot_transformed_images
train_data = dataloader.train_data
test_data = dataloader.test_data
BATCH_SIZE = dataloader.BATCH_SIZE
train_dataloader = dataloader.train_dataloader
test_dataloader = dataloader.test_dataloader

In [7]:
img, label = train_data[0][0], train_data[0][1]
permuted_img = img.permute(1, 2, 0)
permuted_img.shape

torch.Size([224, 224, 3])

For understanding the hyperparameters set beneath, reference the following website:

https://poloclub.github.io/cnn-explainer/

This model uses a TinyVGG CNN, not a RestNet50 CNN, but the hyperparameter explanation is very well done. 

In [None]:
import torch
from torch import nn

class ResidualBlock(nn.Module):
    """
    Residual block with the bottleneck architecture. Key integration of the RestNet50 architecture aimed at tackling the vanishing gradient problem.
    Essentially, this block provides the model with a helper path that skips some layers from the input to the output, allowing the residual to be learned more easily. 
    This block contains three mini-layers: 1x1, 3x3, and 1x1 convolutions. We compress the data, then extract spatial features, and then compress again to its original state. 
    """

    def __init__(self, in_channels, mid_channels, out_channels, stride=1):
        super().__init__()

        self.conv_block_1 = nn.Conv2d(in_channels, mid_channels, kernel_size=1, stride=stride, bias=False)
        self.bn_block_1 = nn.BatchNorm2d(mid_channels)
        self.conv_block_2 = nn.Conv2d(mid_channels, mid_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn_block_2 = nn.BatchNorm2d(mid_channels)
        self.conv_block_3 = nn.Conv2d(mid_channels, out_channels, kernel_size=1, stride=1, bias=False)
        self.bn_block_3 = nn.BatchNorm2d(out_channels)

        # Shortcut connection
        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels: # True if there are mismatched dimensions amongst the input and output channels
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False), # 1x1 convolution
                nn.BatchNorm2d(out_channels) # We normalize to match the input to the size of the output
            )

    def forward(self, x):
        shortcut = self.shortcut(x) # Apply the shortcut (identity or adjusted input)
        x = nn.ReLU()(self.bn_block_1(self.conv_block_1(x))) # 1st layer: 1x1 convolution + batch norm + ReLU
        x = nn.ReLU()(self.bn_block_2(self.conv_block_2(x))) # 2nd layer: 3x3 convolution + batch norm + ReLU
        x = self.bn_block_3(self.conv_block_3(x)) # 3rd layer: 1x1 convolution + batch norm
        x += shortcut # Add shortcut (residual connection)
        return nn.ReLU()(x) # Apply ReLU to the final output

In [None]:
from torch import nn
import torch

class RestNet(nn.Module):
    def __init__(self, input_shape, hidden_units, output_shape):
        super().__init__()

        self.conv_block_1 = nn.Sequential(
            nn.Conv2d(input_shape, 64, kernel_size=7, stride=2, padding=3, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        )

        # This corresponds to the 4 residual stages after the initial convolution and pooling layers. Early stages focus on basic patterns while later stages focus on abstract representations
        self.conv_block_2 = self._make_stage(64, 64, 256, num_blocks=3, stride=1) # Extracts low-level features like edges and simple textures without spatial reduction.
        self.conv_block_3 = self._make_stage(256, 128, 512, num_blocks=4, stride=2) # Captures more complex features and reduces the spatial resolution
        self.conv_block_4 = self._make_stage(512, 256, 1024, num_blocks=6, stride=2) # Processes high-level features like object parts or shapes and further reduces spatial resolution
        self.conv_block_5 = self._make_stage(1024, 512, 2048, num_blocks=3, stride=2) # Extracts the most abstract and high-level features, preparing for the classification head

        # Classifier. It converts the high-level feature maps into class predictions
        self.classifier = nn.Sequential(
            nn.AdaptiveAvgPool2d((1, 1)),
            nn.Flatten(),
            nn.Linear(2048, output_shape)
        )

    # Creates a residual stage by stacking residual blocks. 
    # The blocks in the stage work hierarchically to extract increasingly complex features while potentially reducing the spatial dimensions of the feature maps.
    def _make_stage(self, in_channels, mid_channels, out_channels, num_blocks, stride):

        strides = [stride] + [1] * (num_blocks - 1)
        layers = []

        for stride in strides:

            layers.append(ResidualBlock(in_channels, mid_channels, out_channels, stride))
            in_channels = out_channels
            
        return nn.Sequential(*layers)

    def forward(self, x):

        x = self.conv_block_1(x)
        x = self.conv_block_2(x)
        x = self.conv_block_3(x)
        x = self.conv_block_4(x)
        x = self.conv_block_5(x)
        x = self.classifier(x)
        return x

In [19]:
model = RestNet(input_shape=3, hidden_units=2048, output_shape=1000)
model

AttributeError: cannot assign module before Module.__init__() call