### ResNet

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

In [None]:
# Residual Block: Implements the main building block of ResNet
class ResidualBlock(nn.Module):
    """
    Residual block for ResNet
    
    Args:
        in_channels: Number of input channels
        out_channels: Number of output channels
        stride: Stride value
        downsample: Downsampling layer
    
    Returns:
        Output tensor
    """
    def __init__(self, in_channels: int, out_channels: int, stride: int = 1, downsample: nn.Module = None):
        super(ResidualBlock, self).__init__()
        # First convolutional layer
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size = 3, stride = stride, padding = 1, bias = False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        # Second convolutional layer
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size = 3, stride = 1, padding = 1, bias = False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.downsample = downsample  # Optional
    
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        identity = x
        
        # This used to adjust the dimensions of the input tensor when the number of channels or stride changes. 
        if self.downsample is not None:
            identity = self.downsample(x) 
        # In the original ResNet paper, when the input and output dimensions match, no downsampling is needed. 
        # However, in cases where the dimensions differ (such as when increasing feature map size in deeper layers), downsampling ensures compatibility
        
        out = self.conv1(x)
        out = self.bn1(out)
        out = F.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        
        out += identity  # Add residual connection
        out = F.relu(out)
        return out

In [3]:
# ResNet Model: Assembles multiple residual blocks
class ResNet(nn.Module):
    """
    ResNet model
    
    Args:
        block (nn.Module): Residual block
        layers (list): Number of residual blocks in each layer
        num_classes (int): Number of classes
    """
    def __init__(self, block: nn.Module, layers: list, num_classes: int = 10):
        super(ResNet, self).__init__()
        self.in_channels = 64
        
        # Initial convolutional layer
        self.conv1 = nn.Conv2d(1, 64, kernel_size = 7, stride = 2, padding = 3, bias = False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace = True)
        self.maxpool = nn.MaxPool2d(kernel_size = 3, stride = 2, padding = 1)
        
        # Define ResNet layers
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride = 2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride = 2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride = 2)
        
        # Global average pooling and fully connected layer
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512, num_classes)
        
    def _make_layer(self, block: nn.Module, out_channels: int, blocks: int, stride: int = 1):
        downsample = None
        if stride != 1 or self.in_channels != out_channels:
            # Downsample ensures input and output dimensions match when needed
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channels, out_channels, kernel_size = 1, stride = stride, bias = False),
                nn.BatchNorm2d(out_channels)
            )
        
        # Create a list of residual blocks
        layers = []
        layers.append(block(self.in_channels, out_channels, stride, downsample))
        self.in_channels = out_channels
        for _ in range(1, blocks):
            layers.append(block(out_channels, out_channels))
        
        return nn.Sequential(*layers) # Unpack list of layers
    
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        
        return x

In [4]:
def resnet18(num_classes: int):
    """
    ResNet-18 model
    
    Args:
        num_classes (int): Number of classes
        
    Returns:
        ResNet-18 model
    """
    return ResNet(ResidualBlock, [2, 2, 2, 2], num_classes)

def resnet34(num_classes: int):
    """
    ResNet-34 model
    
    Args:
        num_classes (int): Number of classes
        
    Returns:
        ResNet-34 model
    """
    return ResNet(ResidualBlock, [3, 4, 6, 3], num_classes)

In [None]:
def train_fashion_mnist():
    """
    Trains a ResNet-18 model on Fashion MNIST dataset
    
    Returns:
        Trained model
    """
    
    # Data loading
    transform = transforms.Compose([
        transforms.Grayscale(num_output_channels = 1), # Convert to grayscale
        transforms.ToTensor(), # Convert to tensor
        transforms.Normalize((0.5,), (0.5,)) # Normalize
    ])
    
    train_dataset = torchvision.datasets.FashionMNIST(root='./data', train = True, transform = transform, download = True)
    test_dataset = torchvision.datasets.FashionMNIST(root='./data', train = False, transform = transform, download = True)
    
    # Data loaders with batch size 64
    train_loader = DataLoader(train_dataset, batch_size = 64, shuffle = True)
    test_loader = DataLoader(test_dataset, batch_size = 64, shuffle = False)
    
    model = resnet18(num_classes = 10) # 10 for Fashion MNIST
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Check if GPU is available
    model.to(device)
    
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)
    
    for epoch in range(10):
        model.train()
        total_loss = 0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f'Epoch [{epoch+1}/10], Loss: {total_loss/len(train_loader):.4f}')
    
    print("Training complete!")
    return model

In [6]:
if __name__ == "__main__":
    model = train_fashion_mnist()

Epoch [1/10], Loss: 0.4394
Epoch [2/10], Loss: 0.3109
Epoch [3/10], Loss: 0.2728
Epoch [4/10], Loss: 0.2499
Epoch [5/10], Loss: 0.2284
Epoch [6/10], Loss: 0.2122
Epoch [7/10], Loss: 0.1950
Epoch [8/10], Loss: 0.1810
Epoch [9/10], Loss: 0.1696
Epoch [10/10], Loss: 0.1509
Training complete!
