# pytorch documentation of resnets
https://github.com/pytorch/vision/blob/main/torchvision/models/resnet.py

In [1]:
import numpy as np
import torch
import torch.nn as nn
from torchvision import datasets
from torchvision import transforms
from torch.utils.data.sampler import SubsetRandomSampler
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader

In [27]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_channels, out_channels, stride=1):
        super(Bottleneck, self).__init__()
        # self.conv1 is simply a `point-wise convolution` changing the channels with k_s = 1 .. so no shapes changes
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        
        # self.conv2 is changing the size of H,W but adding padding solves the mismatch shapes so we end with the same shape
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        
        # self.conv3 we're changing the out channels by previously out channels to out cahnnels * 4
        self.conv3 = nn.Conv2d(out_channels, out_channels * self.expansion, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(out_channels * self.expansion)
        
        self.shortcut = nn.Sequential()

        """in case we have concatenating X and f(x) the we 
        change the shape of X through passing it conv and batch norm"""
        if stride != 1 or in_channels != out_channels * self.expansion:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels * self.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels * self.expansion)
            )

    def forward(self, x):
        residual = x
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        """self.shortcut gets applied in case of stride != 1 
        or in_channels != out_channels * 4
        which means the shapes has changes"""
        out += self.shortcut(residual)
        out = F.relu(out)
        return out

class ResNet(nn.Module):
    def __init__(self, block, layers, num_classes=1000,image_channels=3):
        super(ResNet, self).__init__()
        
        self.in_channels = 64

        self.conv1 = nn.Conv2d(image_channels, 64, kernel_size=7, stride=2, padding=3, bias=False)
        #shape for (1,3,214,214) -> (1,64,107,107) becuse of the stride = 2 and padding keeps the size
        self.bn1 = nn.BatchNorm2d(64)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        """ First residual layer -> in self.layer1 the out channels = 64 is not really
        ending at that number of channels in fact  it will
        be out_channels * 4 -> so it will be 256 """
        self.layer1 = self.make_layer(block, 64, layers[0], stride=1)

        """ Second residual layer -> in self.layer2 the out channels = 128 
        is not really ending at that number of channels
        in fact  it will be out_channels * 4 -> so it will be 512
        Also very important note is that stride = 2 which means that
        shapes will change here H, W downsized by half
        """
        self.layer2 = self.make_layer(block, 128, layers[1], stride=2)

        """ Third residual layer -> in self.layer3 the out channels = 256 
        is not really ending at that number of channels
        in fact  it will be out_channels * 4 -> so it will be 1024 """
        self.layer3 = self.make_layer(block, 256, layers[2], stride=2)
        
        """ Fourth residual layer -> in self.layer4 the out channels = 512
        is not really ending at that number of channels
        in fact  it will be out_channels * 4 -> so it will be 2048 """
        self.layer4 = self.make_layer(block, 512, layers[3], stride=2)
        
        
        self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * block.expansion, num_classes)

    def make_layer(self, block, out_channels, blocks, stride):
        strides = [stride] + [1] * (blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_channels, out_channels, stride))
            self.in_channels = out_channels * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.maxpool(out)
        print(f"before self.layer 1 {out.shape}")
        out = self.layer1(out)
        print(f"after self.layer 1 {out.shape}")
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        print(f"after self.layer 4 it should be 2048 : {out.shape}")
        out = self.avg_pool(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out

In [31]:
def ResNet18():
    return ResNet(Bottleneck, [2, 2, 2, 2],num_classes=10,image_channels=3)

In [32]:
model = ResNet18()
y = model(torch.randn(2, 3, 224,224))
y.shape

before self.layer 1 torch.Size([2, 64, 56, 56])
after self.layer 1 torch.Size([2, 256, 56, 56])
after self.layer 4 it should be 2048 : torch.Size([2, 2048, 7, 7])


torch.Size([2, 10])

In [33]:
def ResNet50():
    return ResNet(Bottleneck, [3,4,6,3])

In [34]:
model_50 = ResNet50()
y = model_50(torch.randn(2, 3, 224,224))
y.shape

before self.layer 1 torch.Size([2, 64, 56, 56])
after self.layer 1 torch.Size([2, 256, 56, 56])
after self.layer 4 it should be 2048 : torch.Size([2, 2048, 7, 7])


torch.Size([2, 1000])

In [35]:
def ResNet101():
    return ResNet(Bottleneck, [3,4,23,3])

In [36]:
model_101 = ResNet101()
y = model_101(torch.randn(2, 3, 224,224))
y.shape

before self.layer 1 torch.Size([2, 64, 56, 56])
after self.layer 1 torch.Size([2, 256, 56, 56])
after self.layer 4 it should be 2048 : torch.Size([2, 2048, 7, 7])


torch.Size([2, 1000])

In [37]:
def ResNet152():
    return ResNet(Bottleneck, [3,4,23,3])

In [38]:
[3,8,36,3]
model_152 = ResNet152()
y = model_152(torch.randn(2, 3, 224,224))
y.shape

before self.layer 1 torch.Size([2, 64, 56, 56])
after self.layer 1 torch.Size([2, 256, 56, 56])
after self.layer 4 it should be 2048 : torch.Size([2, 2048, 7, 7])


torch.Size([2, 1000])