# Step 1: Import Libraries

In [3]:
import torch  
import torch.nn as nn  
import torch.optim as optim  
import torchvision  
import torchvision.transforms as transforms  
from torch.utils.data import DataLoader

# Step 2: Define a Residual Block (Basic Block)

in the context of ResNet (Residual Networks), the terms Basic Block and Residual Block can be considered synonymous, but they might refer to slightly different concepts depending on the specific architecture and context. Below, I will explain this in more detail.

# **What is a Residual Block?**
A Residual Block is a neural network layer component introduced in the ResNet architecture to help train very deep networks more effectively. The primary innovation of a Residual Block is the introduction of skip connections (or shortcut connections), which allow gradients to flow more easily during backpropagation.

## **Structure of a Residual Block**
A typical Residual Block consists of:

1. **Two Convolutional Layers**:

    - Each layer is usually followed by a batch normalization layer and a non-linear activation function (ReLU).
    - If the input and output dimensions are the same, the outputs from the convolutional layers are added to the original input (the shortcut connection) before passing through a final activation function.

2. **Shortcut Connection**:

    - This connection skips the two convolutional layers and provides the original input to the output of the block.
    - This can be simply an identity connection, but if the dimensions do not match (for example, when downsampling), a linear layer (1x1 convolution) may be used to adjust the dimensions before adding it to the output.

### **Mathematical Representation**
For an input x to the block, the output y can be formulated as:

        y = F(x) + x

Where F(x) represents the nonlinear transformation (the two convolutional layers, batch normalization, and activation function).

## **What is a Basic Block?**
The Basic Block is a specific implementation of a residual block used in ResNet architectures. For example, in ResNet-18 and ResNet-34, the Basic Block typically has:

- 2 convolutional layers, each followed by batch normalization and a ReLU activation.
- A skip connection directly connecting the input to the output.

## **Differences in Variants**
1. **Basic Block**: Commonly used in the ResNet architectures with fewer parameters. It typically does not have a downsampling path; both the input and output have the same dimensions.

2. **Bottleneck Block**: In deeper architectures like ResNet-50 and ResNet-101, a Bottleneck Block is used. It consists of three convolutional layers instead of two. The first layer reduces dimensions (1x1 convolution), the second layer performs the main convolution (3x3), and the third layer restores dimensions (1x1 convolution). The intention is to reduce the computational cost while maintaining the expressiveness of the network.



In [None]:
# In ResNet, a basic block consists of two convolutional layers with a skip connection (identity mapping). 
# Here, we'll define the BasicBlock class:

class BasicBlock(nn.Module):  
    expansion = 1  

    def __init__(self, in_channels, out_channels, stride=1, downsample=None):  
        super(BasicBlock, self).__init__()  
        
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)  
        self.bn1 = nn.BatchNorm2d(out_channels)  
        self.relu = nn.ReLU(inplace=True)  
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)  
        self.bn2 = nn.BatchNorm2d(out_channels)  
        self.downsample = downsample  

    def forward(self, x):  
        identity = x  
        
        out = self.conv1(x)  
        out = self.bn1(out)  
        out = self.relu(out)  

        out = self.conv2(out)  
        out = self.bn2(out)  

        if self.downsample is not None:  
            identity = self.downsample(x)  

        out += identity  
        out = self.relu(out)  

        return out

# Step 3: Define the ResNet Model

Now, let's define the ResNet class, which will use the BasicBlock to build the architecture.

In [9]:
class ResNet(nn.Module):  
    def __init__(self, block, layers, num_classes=10):  
        super(ResNet, self).__init__()  
        self.in_channels = 64  
        
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)  
        self.bn1 = nn.BatchNorm2d(64)  
        self.relu = nn.ReLU(inplace=True)  
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)  

        self.layer1 = self._make_layer(block, 64, layers[0])  
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)  
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)  
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)  
        
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))  
        self.fc = nn.Linear(512 * block.expansion, num_classes)  

    def _make_layer(self, block, out_channels, blocks, stride=1):  
        downsample = None  
        if stride != 1 or self.in_channels != out_channels * block.expansion:  
            downsample = nn.Sequential(  
                nn.Conv2d(self.in_channels, out_channels * block.expansion, kernel_size=1, stride=stride, bias=False),  
                nn.BatchNorm2d(out_channels * block.expansion),  
            )  

        layers = []  
        layers.append(block(self.in_channels, out_channels, stride, downsample))  
        self.in_channels = out_channels * block.expansion  
        for _ in range(1, blocks):  
            layers.append(block(self.in_channels, out_channels))  

        return nn.Sequential(*layers)  

    def forward(self, x):  
        x = self.conv1(x)  
        x = self.bn1(x)  
        x = self.relu(x)  
        x = self.maxpool(x)  

        x = self.layer1(x)  
        x = self.layer2(x)  
        x = self.layer3(x)  
        x = self.layer4(x)  

        x = self.avgpool(x)  
        x = torch.flatten(x, 1)  
        x = self.fc(x)  

        return x

# Step 4: Instantiate the Model

Now create an instance of the ResNet model:

In [10]:
def resnet18():  
    return ResNet(BasicBlock, [2, 2, 2, 2])

# Step 5: Prepare CIFAR-10 Dataset  
Now, we have to load and preprocess the CIFAR-10 dataset:

In [None]:
# Data preprocessing  
transform = transforms.Compose([  
    transforms.Resize((32, 32)),  
    transforms.ToTensor(),  
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),  
])  

# Load CIFAR-10 dataset  
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)  
trainloader = DataLoader(trainset, batch_size=64, shuffle=True)  

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)  
testloader = DataLoader(testset, batch_size=64, shuffle=False)

# Step 6: Define Training Parameters
Set device, loss function, optimizer, and other configurations.

In [11]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')  

model = resnet18().to(device)  
criterion = nn.CrossEntropyLoss()  
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Step 7: Train the Model
Now, we train the model. Here's how you can do that:

In [None]:
num_epochs = 10  

for epoch in range(num_epochs):  
    model.train()  
    running_loss = 0.0  
    for inputs, labels in trainloader:  
        inputs, labels = inputs.to(device), labels.to(device)  

        optimizer.zero_grad()  
        outputs = model(inputs)  
        loss = criterion(outputs, labels)  
        loss.backward()  
        optimizer.step()  

        running_loss += loss.item()  

    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {running_loss / len(trainloader):.4f}')

# Step 8: Evaluate the Model
After training, evaluate the model on the test set:

In [None]:
model.eval()  
correct = 0  
total = 0  

with torch.no_grad():  
    for inputs, labels in testloader:  
        inputs, labels = inputs.to(device), labels.to(device)  
        outputs = model(inputs)  
        _, predicted = torch.max(outputs.data, 1)  
        total += labels.size(0)  
        correct += (predicted == labels).sum().item()  

print(f'Accuracy of the model on the 10000 test images: {100 * correct / total:.2f}%')