In [1]:
# Online Resource used : https://www.youtube.com/watch?v=DkNIBBBvcPs&list=PLhhyoLH6IjfxeoooqP9rhU3HJIAVAJ3Vz&index=19
# Original Source: Aladdin Persson

In [2]:
import torch 
import torch.nn as nn

In [3]:
class block(nn.Module):
    """
    Residual Block with bottleneck architecture.

    Args:
        in_channels (int): Number of input channels.
        out_channels (int): Number of output channels.
        identity_downsample (nn.Module, optional): A downsample operation for the identity shortcut. Default is None.
        stride (int, optional): The stride for the convolutional layers. Default is 1.

    Attributes:
        expansion (int): Expansion factor for output channels.
        conv1 (nn.Conv2d): 1x1 convolutional layer.
        bn1 (nn.BatchNorm2d): Batch normalization layer after the first convolution.
        conv2 (nn.Conv2d): 3x3 convolutional layer.
        bn2 (nn.BatchNorm2d): Batch normalization layer after the second convolution.
        conv3 (nn.Conv2d): 1x1 convolutional layer.
        bn3 (nn.BatchNorm2d): Batch normalization layer after the third convolution.
        relu (nn.ReLU): ReLU activation function.
        identity_downsample (nn.Module, optional): Downsample operation for the identity shortcut. Default is None.
    """
    def __init__(self, in_channels, out_channels, identity_downsample=None, stride = 1):
        """
        Initialize a block instance.

        Args:
            in_channels (int): Number of input channels.
            out_channels (int): Number of output channels.
            identity_downsample (nn.Module, optional): A downsample operation for the identity shortcut. Default is None.
            stride (int, optional): The stride for the convolutional layers. Default is 1.
        """
        super(block, self).__init__()
        self.expansion = 4 
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=stride, padding=1)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.conv3 = nn.Conv2d(out_channels, out_channels*self.expansion, kernel_size=1, stride=1, padding=0)
        self.bn3 = nn.BatchNorm2d(out_channels*self.expansion)
        self.relu = nn.ReLU()
        self.identity_downsample = identity_downsample
        
    def forward(self, x):
        """
        Forward pass through the block.

        Args:
            x (torch.Tensor): Input tensor.

        Returns:
            torch.Tensor: Output tensor.
        """
        identity = x
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.relu(self.bn2(self.conv2(x)))
        x = self.bn3(self.conv3(x))
        
        if self.identity_downsample is not None:
            identity = self.identity_downsample(identity)
            
        x += identity
        x = self.relu(x)
        return x 

In [4]:
class ResNet(nn.Module):
    """
        Residual Neural Network (ResNet) architecture.

        Args:
            block (nn.Module): The residual block to be used in the network.
            layers (list): List of integers specifying the number of residual blocks in each layer.
            image_channels (int): Number of input channels in the image.
            num_classes (int): Number of output classes.

        Attributes:
            in_channels (int): Number of input channels for the network.
            conv1 (nn.Conv2d): Initial convolutional layer.
            bn1 (nn.BatchNorm2d): Batch normalization layer after the initial convolution.
            relu (nn.ReLU): ReLU activation function.
            maxpool (nn.MaxPool2d): Max pooling layer.
            layer1 (nn.Sequential): First residual layer.
            layer2 (nn.Sequential): Second residual layer.
            layer3 (nn.Sequential): Third residual layer.
            layer4 (nn.Sequential): Fourth residual layer.
            avgpool (nn.AdaptiveAvgPool2d): Adaptive average pooling layer.
            fc (nn.Linear): Fully connected layer for classification.
        """
    def __init__(self, block, layers, image_channels, num_classes):
        """
        Initialize a ResNet instance.

        Args:
            block (nn.Module): The residual block to be used in the network.
            layers (list): List of integers specifying the number of residual blocks in each layer.
            image_channels (int): Number of input channels in the image.
            num_classes (int): Number of output classes.
        """
        super(ResNet, self).__init__()
        self.in_channels = 64 
        self.conv1 = nn.Conv2d(image_channels, 64, kernel_size=7, stride= 2 , padding=3)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride = 2, padding = 1)
        
        #Resnet layers
        self.layer1 = self._make_layer(block, layers[0], out_channels=64, stride =1 )
        self.layer2 = self._make_layer(block, layers[1], out_channels=128, stride =2 )
        self.layer3 = self._make_layer(block, layers[2], out_channels=256, stride =2 )
        self.layer4 = self._make_layer(block, layers[3], out_channels=512, stride =2 )
        
        self.avgpool = nn.AdaptiveAvgPool2d((1,1))
        self.fc = nn.Linear(512*4, num_classes)
        
        
    def _make_layer(self, block, num_residual_blocks, out_channels, stride):
        """
        Create a residual layer with multiple residual blocks.

        Args:
            block (nn.Module): The residual block to be used in the layer.
            num_residual_blocks (int): Number of residual blocks in the layer.
            out_channels (int): Number of output channels for each block.
            stride (int): The stride for the first block in the layer.

        Returns:
            nn.Sequential: A sequential container of residual blocks.
        """
        identity_downsample = None 
        layers = [] 
        
        if stride !=1 or self.in_channels !=out_channels*4:
            identity_downsample = nn.Sequential(nn.Conv2d(self.in_channels, out_channels*4, kernel_size= 1, stride = stride), 
                                                nn.BatchNorm2d(out_channels*4)
                                                )
        layers.append(block(self.in_channels, out_channels, identity_downsample, stride))
        
        self.in_channels = out_channels*4
        
        for i in range(num_residual_blocks - 1):
            layers.append(block(self.in_channels, out_channels))
        return nn.Sequential(*layers)
    
    def forward(self, x):
        """
        Forward pass through the ResNet.

        Args:
            x (torch.Tensor): Input tensor.

        Returns:
            torch.Tensor: Output tensor.
        """
        x = self.maxpool(self.relu(self.bn1(self.conv1(x))))

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        
        x = self.avgpool(x)
        x = x.reshape(x.shape[0], -1)
        x = self.fc(x)
        return x 
        
    

In [5]:
def ResNet50(img_channels= 3, num_classes = 1000):
    """
    Create a ResNet-50 model.

    Args:
        img_channels (int): Number of input channels in the image.
        num_classes (int): Number of output classes.

    Returns:
        ResNet: ResNet-50 model.
    """
    return ResNet(block, [3, 4, 6, 3], img_channels, num_classes)

def ResNet101(img_channels= 3, num_classes = 1000):
    """
    Create a ResNet-101 model.

    Args:
        img_channels (int): Number of input channels in the image.
        num_classes (int): Number of output classes.

    Returns:
        ResNet: ResNet-101 model.
    """
    return ResNet(block, [3, 4, 23, 3], img_channels, num_classes)

def ResNet152(img_channels= 3, num_classes = 1000):
    """
    Create a ResNet-152 model.

    Args:
        img_channels (int): Number of input channels in the image.
        num_classes (int): Number of output classes.

    Returns:
        ResNet: ResNet-152 model.
    """
    return ResNet(block, [3, 8, 36, 3], img_channels, num_classes)


In [6]:
def test():
    net = ResNet152()
    x = torch.randn(2,3,224, 224)
    y = net(x)
    print(y.shape)

In [7]:
test()

torch.Size([2, 1000])


In [8]:
import os
from glob import glob 
import pandas as pd 

path_to_data = os.path.join("..", "cifar_data", "cifar-10-batches-py")
train_df =  pd.read_csv(os.path.join(path_to_data, "train.csv"))
test_df = pd.read_csv(os.path.join(path_to_data, "test.csv"))


In [9]:
train_df["label"].value_counts()

6    5000
9    5000
4    5000
1    5000
2    5000
7    5000
8    5000
3    5000
5    5000
0    5000
Name: label, dtype: int64

In [10]:
test_df["label"].value_counts()

3    1000
8    1000
0    1000
6    1000
1    1000
9    1000
5    1000
7    1000
4    1000
2    1000
Name: label, dtype: int64

In [11]:
import cv2
from numpy import array
from pandas import DataFrame
from torch.utils.data import Dataset, TensorDataset, DataLoader
from torchvision import transforms

def get_from_df_paths_targets( df: DataFrame, transform=None):
    """
    Get paths and labels from a DataFrame.

    Args:
        df (DataFrame): The DataFrame containing 'path' and 'label' columns.
        transform (callable, optional): A function/transform to apply to the data.

    Returns:
        DataFrame: The input DataFrame.
        list: List of paths.
        list: List of labels.
        callable: The transform function.
    """
    paths = df['path'].to_list()
    labels = df['label'].to_list()
    return df,paths, labels, transform

class CV2ImageDataset(Dataset):
    """
    OpenCV-based dataset suitable for use with albumentations.

    Args:
        df (DataFrame): The DataFrame containing 'path' and 'label' columns.
        transform (callable, optional): A function/transform to apply to the data.
        device (torch.device, optional): The device to use for tensors (default: 'cuda:0' if available, else 'cpu').

    Attributes:
        df (DataFrame): The input DataFrame.
        paths (list): List of image file paths.
        labels (list): List of corresponding labels.
        transform (callable): The transform function to apply to images.
    """
    def __init__(self, df:DataFrame, transform: transforms =None, device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")):
        """
        Initialize a CV2ImageDataset instance.

        Args:
            df (DataFrame): DataFrame containing image paths and labels.
            transform (callable, optional): A function/transform to apply to the image data. Default is None.
            device (torch.device, optional): The device to use for tensor computations. Default is 'cuda:0' if available, else 'cpu'.
        """
        self.df, self.paths, self.labels, self.transform = get_from_df_paths_targets( df, transform=transform)
        self.device=device
    def __len__(self):
        """
        Get the length of the dataset.

        Returns:
            int: Number of samples in the dataset.
        """
        return len(self.df)
    
    def __getitem__(self, idx):
        """
        Get an item from the dataset.

        Args:
            idx (int): Index of the item to retrieve.

        Returns:
            torch.Tensor: Transformed image.
            torch.Tensor: Corresponding label.
        """
        image = cv2.imread(self.paths[idx])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        label = torch.tensor(int(self.labels[idx]))
        if self.transform is not None:
            image = self.transform(image=image)["image"]
        return image, label
    
    
class DatasetLoader():
    """
    Data Loader object
    """
    def __init__(self, dataset: Dataset, batch_size:int = 1 , num_workers: int =1):
        self.dataset = dataset
        self.batch_size = batch_size
        self.num_workers = num_workers
        #self.get_dataloader()
        
    def get_dataloader(self,):
        """
        Method to return dataloader
        Returns:
            self.loader: Dataset Loader
        """
        self.loader = DataLoader(
                    self.dataset,
                    batch_size=self.batch_size,
                    num_workers=self.num_workers,
                    shuffle=True
                )
        return self.loader
    
    def check_dataloader_dimension(self):
        """
        Prints out the dimension of dataloader
        """
        for _, (data, target) in enumerate(self.loader):
            print('Data Shape of Dataloader is (data, target) : ', data.shape, target.shape)
            print('Data Type of Dataloader is (data, target) : ', type(data), type(target))
            torch.cuda.empty_cache()
            break



In [12]:
import albumentations as A
from albumentations.pytorch import ToTensorV2
batch_size = 64
aug = A.Compose([   
A.HorizontalFlip(p=0.5),          
A.Normalize(),            
ToTensorV2()])

In [13]:
train_dataset = CV2ImageDataset(train_df, transform=aug)
test_dataset = CV2ImageDataset(test_df, transform=aug)

In [14]:
train_loader = DatasetLoader(train_dataset, batch_size, num_workers=0).get_dataloader()
test_loader = DatasetLoader(test_dataset, batch_size, num_workers=0).get_dataloader()

In [15]:
import torch.optim as optim
from torch.optim.lr_scheduler import CosineAnnealingLR
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_epochs = 10
model  = ResNet50().to(device)
optimizer = optim.SGD(model.parameters(), lr = 0.1)
scheduler = CosineAnnealingLR(optimizer, T_max=num_epochs, eta_min=0.0001)
criterion = nn.CrossEntropyLoss()

In [16]:
num_epochs = 10
for epoch in range(num_epochs):
    model.train()  # Set the model to training mode

    running_loss = 0.0
    correct_predictions = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)

        # Calculate loss
        loss = criterion(outputs, labels)

        # Backpropagation and optimization
        loss.backward()
        optimizer.step()

        # Update statistics
        running_loss += loss.item() * inputs.size(0)
        _, predicted = torch.max(outputs, 1)
        correct_predictions += (predicted == labels).sum().item()

    # Calculate epoch statistics
    epoch_loss = running_loss / len(train_dataset)
    epoch_accuracy = correct_predictions / len(train_dataset)

    print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.4f}")

    
    scheduler.step()
    
    
    # Evaluation on the test set
    model.eval()  # Set the model to evaluation mode

    test_loss = 0.0
    test_correct_predictions = 0

    with torch.no_grad():
        for test_inputs, test_labels in test_loader:
            test_inputs, test_labels = test_inputs.to(device), test_labels.to(device)

            # Forward pass
            test_outputs = model(test_inputs)

            # Calculate test loss
            test_loss += criterion(test_outputs, test_labels).item() * test_inputs.size(0)

            # Calculate test accuracy
            _, test_predicted = torch.max(test_outputs, 1)
            test_correct_predictions += (test_predicted == test_labels).sum().item()

    # Calculate test set statistics
    test_loss /= len(test_dataset)
    test_accuracy = test_correct_predictions / len(test_dataset)

    print(f"Test Set - Loss: {test_loss:.4f}, Accuracy: {test_accuracy:.4f}")

print("Training finished.")

Epoch [1/10], Loss: 3.7283, Accuracy: 0.2152
Test Set - Loss: 29.5375, Accuracy: 0.1025
Epoch [2/10], Loss: 2.6077, Accuracy: 0.2505
Test Set - Loss: 7.3345, Accuracy: 0.2664
Epoch [3/10], Loss: 1.9609, Accuracy: 0.3651
Test Set - Loss: 9.6813, Accuracy: 0.2046
Epoch [4/10], Loss: 1.7181, Accuracy: 0.4137
Test Set - Loss: 28.3599, Accuracy: 0.3164
Epoch [5/10], Loss: 1.5673, Accuracy: 0.4505
Test Set - Loss: 5.2233, Accuracy: 0.3805
Epoch [6/10], Loss: 1.4521, Accuracy: 0.4843
Test Set - Loss: 3.2469, Accuracy: 0.4179
Epoch [7/10], Loss: 1.3625, Accuracy: 0.5128
Test Set - Loss: 71.2285, Accuracy: 0.1378
Epoch [8/10], Loss: 1.3673, Accuracy: 0.5082
Test Set - Loss: 2.3914, Accuracy: 0.4897
Epoch [9/10], Loss: 1.2682, Accuracy: 0.5412
Test Set - Loss: 1.3386, Accuracy: 0.5361
Epoch [10/10], Loss: 1.2313, Accuracy: 0.5559
Test Set - Loss: 1.2925, Accuracy: 0.5497
Training finished.
