In [None]:
# Install required packages

!pip install tensorflow==2.18.0
!pip install keras==3.7.0
!pip install torch==2.5.1
!pip install torchvision==0.20.1

!pip install numpy==2.0.2
!pip install scipy==1.14.1
!pip install pandas==2.2.3

!pip install scikit-learn==1.5.2

!pip install matplotlib==3.9.2

!pip install joblib==1.4.2
!pip install python-dateutil==2.9.0.post0

!pip install sympy==1.13.1
!pip install opt-einsum==3.4.0

!pip install tensorboard==2.18.0
!pip install protobuf==5.29.0
!pip install threadpoolctl==3.5.0
!pip install packaging==24.2


#1. Import Required Libraries

This section implements a custom ResNet (Residual Network) model with adjustable layers and depths. The code includes:

1. BasicBlock for building residual connections.

2. ResNet Class with configurable depth using dynamic layers.
3. Training and Testing Functions to evaluate model performance on CIFAR-10 dataset.
4. Experiments to compare ResNet variants (Small to XXX-Large) to observe the effect of increasing model complexity on training and test losses.


In [None]:
# Import Required Libraries
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np

This block imports the essential libraries required for:

torch and torch.nn: Used for defining and training neural network models.

torchvision: Provides the CIFAR-10 dataset and prebuilt transformations.

matplotlib.pyplot: Used for visualizing training and testing losses.

numpy: Utility library for numerical operations.


#2. Define BasicBlock

In [None]:
# Define BasicBlock for ResNet
class BasicBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.shortcut = nn.Sequential()

        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self, x):
        out = torch.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = torch.relu(out)
        return out

The BasicBlock class implements a residual block, the fundamental building block of ResNet. It includes:

* Two convolutional layers with Batch Normalization, which ensures stable training by normalizing activations.
* A shortcut connection directly bypassing the input to the output, allowing gradients to flow unimpeded, which addresses gradient vanishing issues in deep networks.
* The block supports downsampling when the stride is greater than 1 or the input/output channels differ.


#3. Define ResNet Class

In [None]:
# Define ResNet with adjustable layers
class ResNet(nn.Module):
    def __init__(self, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(16)
        self.in_channels = 16  # Start with 16 channels

        # Create layers dynamically
        self.layer1 = self._make_layer(16, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(32, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(64, num_blocks[2], stride=2)

        # Fully connected layer initialized dynamically
        self.fc = None
        self.num_classes = num_classes

    def _make_layer(self, out_channels, blocks, stride):
        layers = []
        layers.append(BasicBlock(self.in_channels, out_channels, stride))  # First block
        self.in_channels = out_channels  # Update the number of input channels
        for _ in range(1, blocks):
            layers.append(BasicBlock(out_channels, out_channels))  # Remaining blocks
        return nn.Sequential(*layers)

    def forward(self, x):
        out = torch.relu(self.bn1(self.conv1(x)))  # Initial conv layer
        out = self.layer1(out)  # Layer 1
        out = self.layer2(out)  # Layer 2
        out = self.layer3(out)  # Layer 3
        out = torch.nn.functional.avg_pool2d(out, 4)  # Global average pooling

        if self.fc is None:
            flattened_size = out.view(out.size(0), -1).size(1)
            self.fc = nn.Linear(flattened_size, self.num_classes).to(out.device)

        out = out.view(out.size(0), -1)  # Flatten
        out = self.fc(out)  # Fully connected layer
        return out

The ResNet class builds the overall network using BasicBlock. Key components:

* An initial convolutional layer to process the input.
* Layers dynamically built from BasicBlock, where:
   * The number of channels doubles in each subsequent layer.
   * The stride is adjusted to downsample the feature maps.
* A global average pooling layer reduces the feature map size before flattening.
* A fully connected layer (fc) computes the final class probabilities.

#4. Dataset and DataLoader

In [None]:
# Dataset and DataLoader
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=32, shuffle=False)

The CIFAR-10 dataset is used for this experiment. This block:

1. Applies data normalization to scale pixel values to a range centered at 0.
2. Creates training and testing datasets using torchvision's datasets.CIFAR10.
3. Wraps the datasets in PyTorch's DataLoader, which facilitates batch processing and shuffling for efficient training and evaluation.


#5. Training Function (train)

In [None]:
# Training Function
def train(model, device, trainloader, optimizer, criterion, epoch):
    model.train()
    total_loss = 0
    for inputs, targets in trainloader:
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(trainloader)

# Testing Function
def test(model, device, testloader, criterion):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for inputs, targets in testloader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            total_loss += loss.item()
    return total_loss / len(testloader)

This function trains the model on the CIFAR-10 training set:

* Switches the model to training mode using model.train().
* Iterates over batches of input data and their labels.
* Computes predictions, calculates the loss using a predefined criterion (e.g., Cross-Entropy Loss), and updates model weights via backpropagation.

The average loss across all batches is returned.

Next function evaluates the model's performance on the CIFAR-10 test set:

* Switches the model to evaluation mode using model.eval(), ensuring no dropout or batch normalization updates occur during inference.
* Computes predictions and calculates the loss for each batch without updating weights (using torch.no_grad() for efficiency).
* Returns the average loss across all test batches.

#6. Train and Evaluate Models

In [None]:
# Train and Evaluate Models
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model_variants = [
    {"name": "Small ResNet", "blocks": [1, 1, 1]},
    {"name": "Medium ResNet", "blocks": [2, 2, 2]},
    {"name": "Large ResNet", "blocks": [3, 3, 3]},
]

criterion = nn.CrossEntropyLoss()
train_losses, test_losses = [], []

for variant in model_variants:
    model = ResNet(variant["blocks"]).to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    model_train_losses, model_test_losses = [], []

    for epoch in range(1, 21):
        train_loss = train(model, device, trainloader, optimizer, criterion, epoch)
        test_loss = test(model, device, testloader, criterion)
        model_train_losses.append(train_loss)
        model_test_losses.append(test_loss)

    train_losses.append(model_train_losses)
    test_losses.append(model_test_losses)

# Plot Results
plt.figure(figsize=(12, 8))
for idx, variant in enumerate(model_variants):
    plt.plot(range(1, 21), train_losses[idx], label=f"{variant['name']} - Train Loss")
    plt.plot(range(1, 21), test_losses[idx], linestyle="--", label=f"{variant['name']} - Test Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.title("Training and Testing Loss for ResNet Variants")
plt.show()

This section trains and evaluates multiple ResNet variants (Small, Medium, Large) with varying numbers of blocks:

1. For each model variant:
   * Initializes a ResNet model with the specified block configuration.
  * Trains the model over 20 epochs, recording training and testing losses after each epoch.
2. Plots the training and testing losses for all variants to compare their performance and observe how increasing model complexity impacts the results.