In [None]:
from torchvision.models import resnet34, resnet152

resnet = resnet152(pretrained=True)

numel_list = []
for p in resnet.parameters():
    numel_list.append(p.numel())

sum(numel_list)






Downloading: "https://download.pytorch.org/models/resnet152-394f9c45.pth" to /Users/aliabdallah/.cache/torch/hub/checkpoints/resnet152-394f9c45.pth


100.0%


60192808

In [1]:
import torch
import torch.nn as nn

In [None]:
class ResidualBlock(nn.Module):
            
    # He et al. (ResNet paper) explicitly designed the block so that:

    # Setting convolution weights ≈ 0
    # → The block behaves like identity

    # If ReLU is inside the residual path early, identity mapping is impossible.


    # Resnets protects the network from harmful depth
    # If dF(x)/dx is bad or small the gradient can still flow through identity => stable learning

    def __init__(self, n_input, n_output, stride=1, downsample=None):
        super().__init__()

        self.downsample = downsample

        self.conv1 = nn.Conv2d(n_input, n_output, kernel_size=3, padding=1, stride=stride, bias=False)
        self.bn1 = nn.BatchNorm2d(n_output)
        # There is no self.relu1 because the residual should be as linear as possible and the ReLU destorys information

        self.conv2 = nn.Conv2d(n_output, n_output, kernel_size=3, padding=1, stride=1, bias=False)
        self.bn2 = nn.BatchNorm2d(n_output)
        self.relu = nn.ReLU()

    def forward(self, inp):
        x = self.conv1(inp)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.conv2(x)
        x = self.bn2(x)
        if self.downsample:
            inp = self.downsample(inp)
        return x + inp

In [None]:
class ResNet34(nn.Module):

    def __init__(self):
        super().__init__()

        self.conv_block1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=7, padding=3, stride=2, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(3, 2, padding=1)
        )

        self.comp1 = nn.Sequential(
            ResidualBlock(64, 64, 2),
            ResidualBlock(64, 64),
            ResidualBlock(64, 64),
        )

        down1 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=1, stride=1, bias=False),
            nn.BatchNorm2d(128)
        )

        self.comp2 = nn.Sequential(
            ResidualBlock(64, 128, 2, downsample=down1),
            ResidualBlock(128, 128),
            ResidualBlock(128, 128),
            ResidualBlock(128, 128)
        )

        down2 = nn.Sequential(
            nn.Conv2d(128, 256, kernel_size=1, stride=1, bias=False),
            nn.BatchNorm2d(256)
        )

        self.comp3 = nn.Sequential(
            ResidualBlock(128, 256, 2, down2),
            *[ResidualBlock(256, 256) for _ in range(5)]            
        )

        down3 = nn.Sequential(
            nn.Conv2d(256, 512, kernel_size=1, stride=1, bias=False),
            nn.BatchNorm2d(512)
        )

        self.comp4 = nn.Sequential(
            ResidualBlock(256, 512, 2, down3),
            ResidualBlock(512, 512),
            ResidualBlock(512, 512)
        )

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))

        self.fc1 = nn.Linear(512, 1000) # 1000 -> number of classes

    def forward(self, inp):

        x = self.conv_block1(inp)
        x = self.comp1(x)
        x = self.comp2(x)
        x = self.comp3(x)
        x = self.comp4(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        return x