Name : Maurya Vijayaramchandran 

Email: maurya.mvr@gmail.com 

Project: implementation of Resnet 50

Import Libraries

In [None]:
import torch 
import torch.nn as nn

First lay out the skeletal structure of residual block

In [None]:
class block(nn.Module):
      """
    Residual block class with 3 convolutional layers.

    Args:
    - in_channels (int): Number of input channels.
    - out_channels (int): Number of output channels.
    - identity_downsample (nn.Module): Module for down-sampling the identity path, 
        if necessary to match dimensions with the output.
    - stride (int): Stride value for the second convolutional layer. Default is 1.
    """
  def __init__(self, in_channels, out_channels, identity_downsample = None, stride=1):
    super().__init__()
    self.expansion = 4
    self.conv1 = nn.Conv2d(in_channels, 
                           out_channels, 
                           kernel_size=1,
                           stride=1, 
                           padding=0,
                           bias= False)
    self.bn1 = nn.BatchNorm2d(out_channels)
    self.conv2 = nn.Conv2d(out_channels, 
                           out_channels, 
                           kernel_size=3, 
                           stride=stride,
                           padding=1,
                           bias=False)
    self.bn2 = nn.BatchNorm2d(out_channels)
    self.conv3 = nn.Conv2d(out_channels, 
                           out_channels*self.expansion, 
                           kernel_size=1, 
                           stride=1, 
                           padding=0,
                           bias=False)
    self.bn3 = nn.BatchNorm2d(out_channels*self.expansion)
    self.relu = nn.ReLU()
    self.identity_downsample = identity_downsample
  def forward(self,x):
        """
        Forward pass through the residual block.

        Args:
        - x (torch.Tensor): Input tensor.

        Returns:
        - Output tensor after passing through the residual block.
        """
    identity = x
    x = self.conv1(x)
    x= self.bn1(x)
    x= self.relu(x)
    x = self.conv2(x)
    x= self.bn2(x)
    x= self.relu(x)
    x = self.conv3(x)
    x= self.bn3(x)
    if self.identity_downsample is not None:
      identity = self.identity_downsample(identity)
    x+= identity 
    x= self.relu(x)
    return x

Next layout the skeletal structure of constructor block

In [None]:
class MyResNet(nn.Module):
  """
  Args:
    block (nn.Module): The block module to be used for constructing the ResNet.
    layers (list): A list containing the number of blocks to be used in each layer.
    image_channels (int): The number of input channels of the image.
    num_classes (int): The number of classes for the output.

  Attributes:
    in_channels (int): The number of input channels for the current layer.
    conv1 (nn.Conv2d): The convolutional layer for the first layer.
    bn1 (nn.BatchNorm2d): The batch normalization layer for the first layer.
    relu (nn.ReLU): The activation layer for the network.
    maxpool (nn.MaxPool2d): The max pooling layer for the network.
    layer1 (nn.Sequential): The sequence of layers for the first layer.
    layer2 (nn.Sequential): The sequence of layers for the second layer.
    layer3 (nn.Sequential): The sequence of layers for the third layer.
    layer4 (nn.Sequential): The sequence of layers for the fourth layer.
    avgpool (nn.AdaptiveAvgPool2d): The adaptive average pooling layer for the network.
    linear1 (nn.Linear): The linear layer for the output.

  Methods:
  forward(x): Defines the computation performed at every call, receives an input tensor x and returns the output.
  make_layer(block, num_of_residual_blocks, out_channels, stride): Constructs a layer consisting of a given number 
        of residual blocks.

"""
  def __init__(self, block, layers, image_channels, num_classes):
    super().__init__()
    self.in_channels = 64
    self.conv1 = nn.Conv2d(
        image_channels, 64,kernel_size =7, stride=2, padding=3,bias=False)
    self.bn1 = nn.BatchNorm2d(64)
    self.relu = nn.ReLU()
    self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding = 1)

    self.layer1 = self.make_layer(block, layers[0], out_channels=64, stride=1)
    self.layer2 = self.make_layer(block, layers[1], out_channels=128, stride=2)
    self.layer3 = self.make_layer(block, layers[2], out_channels=256, stride=2)
    self.layer4 = self.make_layer(block, layers[3], out_channels=512, stride=2)
    self.avgpool = nn.AdaptiveAvgPool2d((1,1))
    self.linear1 = nn.Linear(512*4, num_classes)

  def forward(self,x):
       """
    Defines the computation performed at every call, receives an input tensor x and returns the output.

    Args:
        x (torch.Tensor): The input tensor.

    Returns:
        torch.Tensor: The output tensor.
    """
    x = self.conv1(x)
    x = self.bn1(x)
    x = self.relu(x)
    x = self.maxpool(x)

    x = self.layer1(x)
    x = self.layer2(x)
    x= self.layer3(x)
    x = self.layer4(x)
    x = self.avgpool(x)
    x = x.reshape(x.shape[0],-1)
    x = self.linear1(x)
    return x


  def make_layer(self,block, num_of_residual_blocks, out_channels, stride):
    """
Args:
block (nn.Module): the basic residual block to be repeated.
num_of_residual_blocks (int): number of residual blocks to create in this layer.
out_channels (int): number of output channels for this layer.
stride (int): stride for the first block in the layer.

Returns:
nn.Sequential: a sequence of residual blocks with a downsampling layer at the beginning if stride is not 1 or the input and output channels don't match. """
    identity_downsample = None 
    layers = []
    if stride != 1 or self.in_channels != out_channels*4:
      identity_downsample = nn.Sequential(
          nn.Conv2d(
              self.in_channels, 
              out_channels*4, 
              kernel_size=1, 
              stride = stride,
              bias=False),
                                          nn.BatchNorm2d(out_channels*4))
      layers.append(
          block(self.in_channels, 
                out_channels, 
                identity_downsample, 
                stride))
      self.in_channels = out_channels*4

      for i in range(num_of_residual_blocks-1):
        layers.append(block(self.in_channels, out_channels))
      return nn.Sequential(*layers)


This code can be modified to implement the other versions of resnet as well.

In [None]:
def My_Resnet_50(img_channels=3, num_classes = 1000):
  return MyResNet(block, [3,4,6,3], img_channels, num_classes)

In [None]:
model = My_Resnet_50()

In [None]:
model.state_dict

<bound method Module.state_dict of MyResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU()
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): block(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU()
      (identity_downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1

In [None]:
def test():
  x = torch.randn(2,3,224,224)
  y = model(x).to("cuda")
  print(y.shape)

In [None]:
test()

torch.Size([2, 1000])
