# **DLIP Tutorial - PyTorch**
# ResNet-50 Model
Y.-K. Kim
(updated 2024. 5. 14) 

Jin Kwak/ 21900031
(edited 24. 05. 24)

The purpose of this tutorial is to make ResNet 50 model

https://arxiv.org/pdf/1512.03385

## For CoLab Usage:

1. Download this notebook
2. Then, open in Colab

# Setup Pytorch and Numpy and Device


In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt

In [2]:
# Get cpu or gpu device for training.
device = "cuda" if torch.cuda.is_available() else "cpu"

print(f"Using {device} device")
if torch.cuda.is_available(): print(f'Device name: {torch.cuda.get_device_name(0)}') 

Using cuda device
Device name: NVIDIA GeForce RTX 4060 Laptop GPU


# Prepare Datasets: Input and Output

We will not use dataset for training.  


# (Assignment) Define model - ResNet 50

create a class that inherits from nn.Module


* Define the layers of the network in  __init__ function
* Specify Forward network in the **forward function.**
* Activate Function: `ReLU`


![ResNet50_architecture](https://github.com/ykkimhgu/DLIP-src/assets/84508106/7bcd1af8-20b8-49f6-85f2-b29bf612286b)

![image](https://github.com/ykkimhgu/DLIP-src/assets/84508106/c4a7350c-cfc9-4bdb-8864-e741f28fea12)

#### Skip Connection
![image](https://github.com/ykkimhgu/DLIP-src/assets/84508106/425af944-dc82-4d8b-b9a9-b7c0344b8e0f)


In [10]:
import torch
import torch.nn as nn

# BasicBlock class defines the building block for ResNet
class BasicBlock(nn.Module):
    def __init__(self, in_channels, out_channels, down_sampling=None, stride=1):
        super().__init__()
        
        self.expansion = 4  # Expansion ratio for ResNet-50, 101, 152
        self.down_sampling = down_sampling
        self.stride = stride
        self.flatten= nn.Flatten()
        
        self.ReLU = nn.ReLU(inplace=True)
        self.conv_layer1 = nn.Conv2d(in_channels , out_channels, kernel_size=1  , stride=stride, padding=0, bias=False)
        self.batch_norm1 = nn.BatchNorm2d(out_channels)
        
        
        self.conv_layer2 = nn.Conv2d(out_channels, out_channels, kernel_size=3  , stride=1, padding=1, bias=False)
        self.batch_norm2 = nn.BatchNorm2d(out_channels)

        self.conv_layer3 = nn.Conv2d(out_channels, out_channels * self.expansion, kernel_size=1, stride=1, padding=0, bias=False)
        self.batch_norm3 = nn.BatchNorm2d(out_channels*self.expansion)  #Channels 64--> 256, 128 --> 512, 256 --> 1024
        
        if down_sampling:
            self.down_sampling = nn.Sequential(
                nn.Conv2d(in_channels, out_channels*self.expansion, kernel_size=1, stride=stride, padding=0, bias=False),
                nn.BatchNorm2d(out_channels*self.expansion)                                                                 
            )
        # else self.downsampling is predefined as None!
        
    def forward(self, x:torch.Tensor) -> torch.Tensor:
        # For Feedforward 
        identity = x.clone()
        
        out = self.conv_layer1(x)
        out = self.batch_norm1(out)
        out = self.ReLU(out)

        out = self.conv_layer2(out)
        out = self.batch_norm2(out)

        out = self.conv_layer3(out)
        out = self.batch_norm3(out)
        
        
        # Layer Change
        if self.down_sampling:      # Skip Connect
            identity = self.down_sampling(identity)
        
        out += identity
        out = self.ReLU(out)
        
        return out

# ResNet class defines the entire ResNet-50 architecture
"""
ResNet model
@Parameter:
1. block :(dtype)Class BasicBlock
2. layers:(dtype)List  Number of Iterations(?) per layer
3. image_channels:(dtype) Int Number of channels of input image
4. num_classes:(dtype) Int Number of classification classes
"""
class ResNet(nn.Module):
    def __init__(self, block, layers, image_channels, num_classes):
        super(ResNet, self).__init__()
        self.in_channels = 64  # Initial input channels
        self.expansion   = 4   # Expansion ratio for ResNet-50, 101, 152
        
        # conv2d, batch_norm2d, relu, maxpool2d
        self.conv = nn.Conv2d(image_channels,self.in_channels, kernel_size=7, stride=2, padding=3, bias=False)
        self.batch_norm = nn.BatchNorm2d(self.in_channels)
        self.ReLU = nn.ReLU(inplace=True)
        self.max_pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        
        # The main layers of ResNet (using self._make_layer)
        self.layer1 = self._make_layer(block, layers[0], 64  ,stride=1)
        self.layer2 = self._make_layer(block, layers[1], 128 ,stride=2)
        self.layer3 = self._make_layer(block, layers[2], 256 ,stride=2)
        self.layer4 = self._make_layer(block, layers[3], 512 ,stride=2)

        # Adaptive average pooling
        self.avg_pool = nn.AdaptiveAvgPool2d((1,1)) 
        
        # Fully connected layer
        self.fc = nn.Linear(512 * self.expansion, num_classes)

    def forward(self, x):
        # First conv layer -> bn -> relu -> maxpooling
        x = self.conv(x)
        x = self.batch_norm(x)
        x = self.ReLU(x)
        x = self.max_pool(x)
        
        # Layer 1 ~ 4
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        
        # Adaptive average pooling
        x = self.avg_pool(x)

        # Flatten
        x = torch.flatten(x, 1)

        # Fully connected layer
        x = self.fc(x)       
            
        return x

    # _make_layer method constructs the layers for ResNet
    def _make_layer(self, block, num_residual_blocks, out_channels, stride):
        down_sampling = None
        layers = []

        # Downsample identity if we change input dimensions or channels
        if stride != 1 or self.in_channels != out_channels * self.expansion:
            down_sampling = nn.Sequential(
            nn.Conv2d(self.in_channels, out_channels * self.expansion, kernel_size=1, stride=stride, bias=False),
            nn.BatchNorm2d(out_channels * self.expansion)
            )
            
        # append block layers
        layers.append(block(self.in_channels, out_channels, down_sampling, stride))
        
        # Expansion size is always 4 for ResNet-50, 101, 152 (e.g. 64 -> 256)
        self.in_channels = out_channels * self.expansion


        # Add additional blocks
        for idx in range(1, num_residual_blocks):
            layers.append(block(self.in_channels, out_channels))

        return nn.Sequential(*layers)

# Function to create ResNet-50 model
def ResNet50(img_channel=3, num_classes=1000):
    return ResNet(BasicBlock, [3, 4, 6, 3], img_channel, num_classes)

model = ResNet50()
model = model.cuda()  # Move model to GPU
print(model)


ResNet(
  (conv): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (batch_norm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (ReLU): ReLU(inplace=True)
  (max_pool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (down_sampling): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (flatten): Flatten(start_dim=1, end_dim=-1)
      (ReLU): ReLU(inplace=True)
      (conv_layer1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (batch_norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv_layer2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (batch_norm2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affin

Check your model is valid by **summary()** function


In [11]:
from torchsummary import summary
import torchvision.models as models
model_resnet50 = models.resnet50(pretrained=True).cuda()

summary(model_resnet50, (3,224,224))
summary(model, (3, 224, 224))


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 112, 112]           9,408
       BatchNorm2d-2         [-1, 64, 112, 112]             128
              ReLU-3         [-1, 64, 112, 112]               0
         MaxPool2d-4           [-1, 64, 56, 56]               0
            Conv2d-5           [-1, 64, 56, 56]           4,096
       BatchNorm2d-6           [-1, 64, 56, 56]             128
              ReLU-7           [-1, 64, 56, 56]               0
            Conv2d-8           [-1, 64, 56, 56]          36,864
       BatchNorm2d-9           [-1, 64, 56, 56]             128
             ReLU-10           [-1, 64, 56, 56]               0
           Conv2d-11          [-1, 256, 56, 56]          16,384
      BatchNorm2d-12          [-1, 256, 56, 56]             512
           Conv2d-13          [-1, 256, 56, 56]          16,384
      BatchNorm2d-14          [-1, 256,