In [1]:
'''
ResNet-50 Architecture
+----------+------------------+--------------------------------+------------------+
| Layer    | Type             | Configuration                  | Output Size      |
+----------+------------------+--------------------------------+------------------+
| Input    | Image            | 224 x 224 x 3 (RGB)            | 224 x 224 x 3    |
|          |                  |                                |                  |
| Conv1    | Convolution      | 64 filters (7x7), Stride 2     | 112 x 112 x 64   |
| BN1      | Batch Norm       | -                              | 112 x 112 x 64   |
| ReLU     | Activation       | -                              | 112 x 112 x 64   |
| MaxPool  | Max Pooling      | 3x3 window, Stride 2           | 56 x 56 x 64     |
|          |                  |                                |                  |
| Stage 1  | Residual Block 1 | [1x1,64] [3x3,64] [1x1,256] ×3 | 56 x 56 x 256    |
|          |                  | + Skip Connection              |                  |
|          |                  |                                |                  |
| Stage 2  | Residual Block 2 | [1x1,128][3x3,128][1x1,512] ×4 | 28 x 28 x 512    |
|          |                  | + Skip Connection (stride 2)   |                  |
|          |                  |                                |                  |
| Stage 3  | Residual Block 3 | [1x1,256][3x3,256][1x1,1024]×6 | 14 x 14 x 1024   |
|          |                  | + Skip Connection (stride 2)   |                  |
|          |                  |                                |                  |
| Stage 4  | Residual Block 4 | [1x1,512][3x3,512][1x1,2048]×3 | 7 x 7 x 2048     |
|          |                  | + Skip Connection (stride 2)   |                  |
|          |                  |                                |                  |
| AvgPool  | Global Avg Pool  | 7x7 window                     | 1 x 1 x 2048     |
| Flatten  | Flatten          | -                              | 2048             |
| FC       | Fully Connected  | 1000 Neurons (Softmax)         | 1000             |
+----------+------------------+--------------------------------+------------------+

Residual Block (Bottleneck) Structure:
┌─────────────────────────────────────────────────┐
│  Input (x)                                      │
│    │                                            │
│    ├──────────────────────────┐                 │
│    │                          │                 │
│    ▼                          │ (Skip/Identity) │
│  1x1 Conv → BN → ReLU         │                 │
│    │                          │                 │
│    ▼                          │                 │
│  3x3 Conv → BN → ReLU         │                 │
│    │                          │                 │
│    ▼                          │                 │
│  1x1 Conv → BN                │                 │
│    │                          │                 │
│    └──────────► ADD ◄─────────┘                 │
│                 │                               │
│                 ▼                               │
│               ReLU                              │
│                 │                               │
│              Output                             │
└─────────────────────────────────────────────────┘

Key Characteristics of ResNet:

- Residual/Skip Connections: The core innovation - adds input directly to output
  F(x) + x instead of just F(x), solving the degradation problem
  
- Bottleneck Design: Uses 1x1 convolutions to reduce/restore dimensions, making
  the network more efficient (1x1 reduces → 3x3 processes → 1x1 expands)
  
- Batch Normalization: Applied after every convolutional layer before activation

- No Dropout: ResNet doesn't use dropout; residual connections provide 
  regularization effect
  
- Identity Mapping: When dimensions change, uses 1x1 convolutions to match 
  dimensions for the skip connection

- Depth Variants: ResNet-18, ResNet-34 (basic blocks), ResNet-50, ResNet-101, 
  ResNet-152 (bottleneck blocks)

- Total Parameters: ~25.6 million (ResNet-50)

The skip connections allow gradients to flow directly through the network during
backpropagation, enabling training of very deep networks (100+ layers) without
degradation. The network learns residual functions F(x) = H(x) - x rather than
directly learning H(x), which is easier to optimize.
'''
print()




In [None]:
#######################
## ResNet50 building ##
#######################

from torch import nn

class BottleneckBlock(nn.Module):
    """
    Bottleneck residual block for ResNet50 (The block is called "bottleneck" because it reduces then expands channels
    Architecture: 1x1 -> 3x3 -> 1x1 with skip connection
    """
    def __init__(self, in_channels, mid_channels, out_channels, stride=1, downsample=None):
        super().__init__()
        
        # Main path: Three convolutions: 1x1 → 3x3 → 1x1
        self.conv1 = nn.Conv2d(in_channels, mid_channels, kernel_size=1, stride=1, bias=False)
        self.bn1 = nn.BatchNorm2d(mid_channels)
        
        self.conv2 = nn.Conv2d(mid_channels, mid_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(mid_channels)
        
        self.conv3 = nn.Conv2d(mid_channels, out_channels, kernel_size=1, stride=1, bias=False)
        self.bn3 = nn.BatchNorm2d(out_channels)
        
        self.relu = nn.ReLU(inplace=True)
        
        # Skip connection (identity or projection)
        self.downsample = downsample # For matching dimensions
        
    def forward(self, x):
        identity = x # Save original input for skip connection
        
        # Main path: Learns F(x) (the transformations)
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        
        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)
        
        out = self.conv3(out)
        out = self.bn3(out)
        
        # Skip connection: Adds original input x
        if self.downsample is not None:
            identity = self.downsample(x) # Match dimensions if needed
        
        out += identity  # ⭐ ADD skip connectio, out = F(x) + x, this is the key innovation!
        out = self.relu(out)
        
        return out


class ResNet50(nn.Module):
    def __init__(self, num_classes=1000):
        super().__init__()
        
        # Initial layers: Conv1 -> BN -> ReLU -> MaxPool
        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=64, kernel_size=7, stride=2, padding=3, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        )
        
        # Residual blocks
        # Conv2_x: 3 bottleneck blocks (64->64->256)
        self.conv2_x = self._make_layer(in_channels=64, mid_channels=64, out_channels=256, num_blocks=3, stride=1)
        
        # Conv3_x: 4 bottleneck blocks (128->128->512)
        self.conv3_x = self._make_layer(in_channels=256, mid_channels=128, out_channels=512, num_blocks=4, stride=2)
        
        # Conv4_x: 6 bottleneck blocks (256->256->1024)
        self.conv4_x = self._make_layer(in_channels=512, mid_channels=256, out_channels=1024, num_blocks=6, stride=2)
        
        # Conv5_x: 3 bottleneck blocks (512->512->2048)
        self.conv5_x = self._make_layer(in_channels=1024, mid_channels=512, out_channels=2048, num_blocks=3, stride=2)
        
        # Final layers: AvgPool -> Flatten -> FC
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(2048, num_classes)
        )
        
    def _make_layer(self, in_channels, mid_channels, out_channels, num_blocks, stride):
        """
        Create a sequence of bottleneck blocks
        First block may downsample, rest use identity skip connections
        """
        layers = []
        
        # First block (may need downsampling for skip connection)
        downsample = None
        if stride != 1 or in_channels != out_channels:
            downsample = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )
        
        layers.append(BottleneckBlock(in_channels, mid_channels, out_channels, stride, downsample))
        
        # Remaining blocks (identity skip connections)
        for _ in range(1, num_blocks):
            layers.append(BottleneckBlock(out_channels, mid_channels, out_channels, stride=1, downsample=None))
        
        return nn.Sequential(*layers)
    
    def forward(self, X):
        out = self.conv1(X)
        
        out = self.conv2_x(out)
        out = self.conv3_x(out)
        out = self.conv4_x(out)
        out = self.conv5_x(out)
        
        out = self.avgpool(out)
        out = self.fc(out)
        
        return out


# Example usage
if __name__ == "__main__":
    import torch
    
    # Create model
    model = ResNet50(num_classes=1000)
    
    # Test with random input
    x = torch.randn(1, 3, 224, 224)
    output = model(x)
    
    print(f"Input shape: {x.shape}")
    print(f"Output shape: {output.shape}")
    print(f"Number of parameters: {sum(p.numel() for p in model.parameters()):,}")