In [1]:
# Online Resource used : https://www.youtube.com/watch?v=uQc4Fs7yx5I&list=PLhhyoLH6IjfxeoooqP9rhU3HJIAVAJ3Vz&index=19
# Original Source: Aladdin Persson

import torch
import torch.nn as nn

In [2]:
class conv_block(nn.Module):
    """
    Convolutional Block Module consisting of Conv2D, BatchNormalization, and ReLU activation.

    Args:
        in_channels (int): Number of input channels.
        out_channels (int): Number of output channels.
        **kwargs: Additional keyword arguments for the Conv2D layer.

    Attributes:
        relu (nn.ReLU): ReLU activation function.
        conv (nn.Conv2d): Conv2D layer.
        batchnorm (nn.BatchNorm2d): Batch Normalization layer.

    Methods:
        forward(x): Forward pass of the ConvBlock module.

    """
    def __init__(self, in_channels, out_channels, **kwargs):
        super(conv_block, self).__init__()
        self.relu = nn.ReLU()
        self.conv = nn.Conv2d(in_channels, out_channels, **kwargs)
        self.batchnorm = nn.BatchNorm2d(out_channels)
    def forward(self, x):
        """
        Apply the ConvBlock to input tensor.

        Args:
            x (torch.Tensor): Input tensor with shape (batch_size, in_channels, height, width).

        Returns:
            torch.Tensor: Processed feature tensor after applying ConvBlock.
        """
        return self.relu(self.batchnorm(self.conv(x)))

In [3]:
class Inception_block(nn.Module):
    """
    Inception Block Module consisting of multiple branches with convolutional layers.

    Args:
        in_channels (int): Number of input channels.
        out_1x1 (int): Number of output channels for the 1x1 convolution branch.
        red_3x3 (int): Number of intermediate channels for the 1x1 convolution before the 3x3 branch.
        out_3x3 (int): Number of output channels for the 3x3 convolution branch.
        red_5x5 (int): Number of intermediate channels for the 1x1 convolution before the 5x5 branch.
        out_5x5 (int): Number of output channels for the 5x5 convolution branch.
        out_1x1pool (int): Number of output channels for the 1x1 convolution after the pooling branch.

    Attributes:
        branch1 (conv_block): Convolutional block for the 1x1 convolution branch.
        branch2 (nn.Sequential): Sequential module for the 1x1 and 3x3 convolution branches.
        branch3 (nn.Sequential): Sequential module for the 1x1 and 5x5 convolution branches.
        branch5 (nn.Sequential): Sequential module for the pooling and 1x1 convolution branches.

    Methods:
        forward(x): Forward pass of the InceptionBlock module.

    """
    def __init__(self, in_channels, out_1x1, red_3x3, out_3x3, red_5x5, out_5x5, out_1x1pool):
        super(Inception_block, self).__init__()
        
        self.branch1 = conv_block(in_channels, out_1x1, kernel_size = 1)
        
        self.branch2 = nn.Sequential(conv_block(in_channels, red_3x3, kernel_size = 1),
                                     conv_block(red_3x3, out_3x3, kernel_size = 3, padding = 1))
        
        self.branch3 = nn.Sequential(conv_block(in_channels, red_5x5, kernel_size = 1),
                                     conv_block(red_5x5, out_5x5, kernel_size = 5, padding = 2))
        
        self.branch5  = nn.Sequential(nn.MaxPool2d(kernel_size=3, stride = 1, padding = 1), conv_block(in_channels, out_1x1pool, kernel_size = 1))
        
    def forward(self, x):
        """
        Apply the InceptionBlock to input tensor.

        Args:
            x (torch.Tensor): Input tensor with shape (batch_size, in_channels, height, width).

        Returns:
            torch.Tensor: Processed feature tensor after applying InceptionBlock.
        """
        return torch.cat([self.branch1(x), self.branch2(x), self.branch3(x), self.branch5(x)], 1)

In [4]:
class GoogleNet(nn.Module):
    def __init__(self, in_channels, num_classes = 1000):
        """
        GoogleNet (Inception V1) architecture.

        Args:
            in_channels (int): Number of input channels.
            num_classes (int): Number of output classes. Default is 1000 for ImageNet.

        Attributes:
            conv1 (conv_block): First convolutional block.
            maxpool1 (nn.MaxPool2d): First max-pooling layer.
            conv2 (conv_block): Second convolutional block.
            maxpool2 (nn.MaxPool2d): Second max-pooling layer.
            inception3a (InceptionBlock): First Inception block in the third stage.
            inception3b (InceptionBlock): Second Inception block in the third stage.
            maxpool3 (nn.MaxPool2d): Third max-pooling layer.
            inception4a (InceptionBlock): First Inception block in the fourth stage.
            inception4b (InceptionBlock): Second Inception block in the fourth stage.
            inception4c (InceptionBlock): Third Inception block in the fourth stage.
            inception4d (InceptionBlock): Fourth Inception block in the fourth stage.
            inception4e (InceptionBlock): Fifth Inception block in the fourth stage.
            maxpool4 (nn.MaxPool2d): Fourth max-pooling layer.
            inception5a (InceptionBlock): First Inception block in the fifth stage.
            inception5b (InceptionBlock): Second Inception block in the fifth stage.
            avgpool (nn.AvgPool2d): Average pooling layer.
            dropout (nn.Dropout): Dropout layer with a dropout rate of 0.4.
            fc1 (nn.Linear): First fully connected layer.
            fc2 (nn.Linear): Second fully connected layer.

        Methods:
            forward(x): Forward pass of the GoogleNet model.

        """
        super(GoogleNet, self).__init__()
        self.conv1 = conv_block(in_channels=in_channels, out_channels= 64, kernel_size = (7,7), stride = (2,2), padding = (3,3))
        self.maxpool1 = nn.MaxPool2d(kernel_size=3, stride = 2, padding =1)
        self.conv2 = conv_block(64, 192, kernel_size = 3, stride = 1, padding = 1)
        self.maxpool2 = nn.MaxPool2d(kernel_size=3, stride=1, padding = 1)
        
        self.inception3a = Inception_block(192, 64, 96, 128, 16, 32, 32)
        self.inception3b = Inception_block(256, 128, 128, 192, 32, 96, 64)
        self.maxpool3 = nn.MaxPool2d(kernel_size=3, stride = 2, padding =1)
        
        self.inception4a = Inception_block(480, 192, 96, 208, 16, 48, 64)
        self.inception4b = Inception_block(512, 160, 112, 224, 24, 64, 64)
        self.inception4c = Inception_block(512, 128, 128, 256, 24, 64, 64)
        self.inception4d = Inception_block(512, 112, 144, 288, 32, 64, 64)
        self.inception4e = Inception_block(528, 256, 160, 320, 32, 128, 128)
        self.maxpool4 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        
        self.inception5a = Inception_block(832, 256, 160, 320, 32, 128, 128)
        self.inception5b = Inception_block(832, 384, 192, 384, 48, 128, 128)
        
        self.avgpool = nn.AvgPool2d(kernel_size=7, stride = 1)
        self.dropout = nn.Dropout(p=0.4)
        self.fc1 = nn.Linear(65536, 1024)
        self.fc2 = nn.Linear(1024, 1000)
        
    def forward (self, x):
        """
        Apply the GoogleNet model to input tensor.

        Args:
            x (torch.Tensor): Input tensor with shape (batch_size, in_channels, height, width).

        Returns:
            torch.Tensor: Predicted output tensor.
        """
        x = self.conv1(x)
        x = self.maxpool1(x)
        x = self.conv2(x)
        x = self.maxpool2(x)
        
        x = self.inception3a(x)
        x = self.inception3b(x)
        x = self.maxpool3(x)
        
        x = self.inception4a(x)
        x = self.inception4b(x)
        x = self.inception4c(x)
        x = self.inception4d(x)
        x = self.inception4e(x)
        x = self.maxpool4(x)
        
        x = self.inception5a(x)
        x = self.inception5b(x)
        x = self.avgpool(x)
        x = x.reshape(x.shape[0], -1)
        x = self.dropout(x)
        x = self.fc1(x)
        x = self.fc2(x)
        return x 
        

In [5]:
if __name__ == "__main__":
    x =torch.randn(3,3,224, 224)
    model = GoogleNet(in_channels=3)
    print(model(x).shape)

torch.Size([3, 1000])


In [6]:
import os
from glob import glob 
import pandas as pd 

path_to_data = os.path.join("..", "cifar_data", "cifar-10-batches-py")
train_df =  pd.read_csv(os.path.join(path_to_data, "train.csv"))
test_df = pd.read_csv(os.path.join(path_to_data, "test.csv"))

In [7]:
train_df["label"].value_counts()

6    5000
9    5000
4    5000
1    5000
2    5000
7    5000
8    5000
3    5000
5    5000
0    5000
Name: label, dtype: int64

In [8]:
test_df["label"].value_counts()

3    1000
8    1000
0    1000
6    1000
1    1000
9    1000
5    1000
7    1000
4    1000
2    1000
Name: label, dtype: int64

In [9]:
import cv2
import torch
from numpy import array
from pandas import DataFrame
from torch.utils.data import Dataset, TensorDataset, DataLoader
from torchvision import transforms

def get_from_df_paths_targets( df: DataFrame, transform=None):
    """
    Get paths and labels from a DataFrame.

    Args:
        df (DataFrame): The input DataFrame containing 'path' and 'label' columns.
        transform (callable, optional): A transformation to apply to the images.

    Returns:
        Tuple: A tuple containing the DataFrame, paths, labels, and transform function.
    """
    paths = df['path'].to_list()
    labels = df['label'].to_list()
    return df,paths, labels, transform

class CV2ImageDataset(Dataset):
    """
    Custom dataset using OpenCV and compatible with Albumentations transformations.
    """
    def __init__(self, df:DataFrame, transform: transforms =None, device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")):
        """
        Initialize the CV2ImageDataset.

        Args:
            df (DataFrame): The DataFrame containing image paths and labels.
            transform (callable, optional): A transformation to apply to the images.
            device (torch.device, optional): The device to use for data processing.
        """
        self.df, self.paths, self.labels, self.transform = get_from_df_paths_targets( df, transform=transform)
        self.device=device
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        """
        Get an item from the dataset.

        Args:
            idx (int): Index of the item to retrieve.

        Returns:
            Tuple: A tuple containing the image tensor and its label.
        """
        image = cv2.imread(self.paths[idx])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        label = torch.tensor(int(self.labels[idx]))
        if self.transform is not None:
            image = self.transform(image=image)["image"]
        return image, label
    
    
class DatasetLoader():
    """
    Data Loader object
    """
    def __init__(self, dataset: Dataset, batch_size:int = 1 , num_workers: int =1):
        """
        Initialize the DatasetLoader.

        Args:
            dataset (Dataset): The PyTorch dataset to load.
            batch_size (int, optional): Batch size for data loading.
            num_workers (int, optional): Number of workers for parallel data loading.
        """
        self.dataset = dataset
        self.batch_size = batch_size
        self.num_workers = num_workers
        #self.get_dataloader()
        
    def get_dataloader(self,):
        """
        Get a DataLoader object for the specified dataset.

        Returns:
            DataLoader: A DataLoader for the dataset.
        """
        self.loader = DataLoader(
                    self.dataset,
                    batch_size=self.batch_size,
                    num_workers=self.num_workers,
                    shuffle=True
                )
        return self.loader
    
    def check_dataloader_dimension(self):
        """
        Print the dimensions of the DataLoader.
        """
        for _, (data, target) in enumerate(self.loader):
            print('Data Shape of Dataloader is (data, target) : ', data.shape, target.shape)
            print('Data Type of Dataloader is (data, target) : ', type(data), type(target))
            torch.cuda.empty_cache()
            break



In [10]:
import albumentations as A
from albumentations.pytorch import ToTensorV2
batch_size = 2
aug = A.Compose([   
A.Resize(224, 224),
A.HorizontalFlip(p=0.5),          
A.Normalize(),            
ToTensorV2()])

In [11]:
train_dataset = CV2ImageDataset(train_df, transform=aug)
test_dataset = CV2ImageDataset(test_df, transform=aug)

In [12]:
train_loader = DatasetLoader(train_dataset, batch_size, num_workers=0).get_dataloader()
test_loader = DatasetLoader(test_dataset, batch_size, num_workers=0).get_dataloader()

In [13]:
import torch.optim as optim
from torch.optim.lr_scheduler import CyclicLR
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_epochs = 10
model  = GoogleNet(3).to(device)
optimizer = optim.SGD(model.parameters(), lr = 0.1)
scheduler = scheduler = CyclicLR(optimizer, base_lr=0.001, max_lr=0.1, step_size_up=2000)
criterion = nn.CrossEntropyLoss()

In [14]:
num_epochs = 10
for epoch in range(num_epochs):
    model.train()  # Set the model to training mode

    running_loss = 0.0
    correct_predictions = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)

        # Calculate loss
        loss = criterion(outputs, labels)

        # Backpropagation and optimization
        loss.backward()
        optimizer.step()

        # Update statistics
        running_loss += loss.item() * inputs.size(0)
        _, predicted = torch.max(outputs, 1)
        correct_predictions += (predicted == labels).sum().item()

    # Calculate epoch statistics
    epoch_loss = running_loss / len(train_dataset)
    epoch_accuracy = correct_predictions / len(train_dataset)

    print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.4f}")

    scheduler.step()
    
    # Evaluation on the test set
    model.eval()  # Set the model to evaluation mode

    test_loss = 0.0
    test_correct_predictions = 0

    with torch.no_grad():
        for test_inputs, test_labels in test_loader:
            test_inputs, test_labels = test_inputs.to(device), test_labels.to(device)

            # Forward pass
            test_outputs = model(test_inputs)

            # Calculate test loss
            test_loss += criterion(test_outputs, test_labels).item() * test_inputs.size(0)

            # Calculate test accuracy
            _, test_predicted = torch.max(test_outputs, 1)
            test_correct_predictions += (test_predicted == test_labels).sum().item()

    # Calculate test set statistics
    test_loss /= len(test_dataset)
    test_accuracy = test_correct_predictions / len(test_dataset)

    print(f"Test Set - Loss: {test_loss:.4f}, Accuracy: {test_accuracy:.4f}")

print("Training finished.")

Epoch [1/10], Loss: 1.6781, Accuracy: 0.3960
Test Set - Loss: 1.3949, Accuracy: 0.5081
Epoch [2/10], Loss: 1.0622, Accuracy: 0.6249
Test Set - Loss: 0.9395, Accuracy: 0.6789
Epoch [3/10], Loss: 0.7980, Accuracy: 0.7227
Test Set - Loss: 1.0016, Accuracy: 0.6940
Epoch [4/10], Loss: 0.6557, Accuracy: 0.7770
Test Set - Loss: 0.8398, Accuracy: 0.7389
Epoch [5/10], Loss: 0.5580, Accuracy: 0.8086
Test Set - Loss: 0.7331, Accuracy: 0.7672
Epoch [6/10], Loss: 0.4902, Accuracy: 0.8320
Test Set - Loss: 0.6158, Accuracy: 0.7986
Epoch [7/10], Loss: 0.4412, Accuracy: 0.8499
Test Set - Loss: 0.7511, Accuracy: 0.7886
Epoch [8/10], Loss: 0.3960, Accuracy: 0.8651
Test Set - Loss: 0.5356, Accuracy: 0.8296
Epoch [9/10], Loss: 0.3561, Accuracy: 0.8797
Test Set - Loss: 1.0990, Accuracy: 0.7557
Epoch [10/10], Loss: 0.3295, Accuracy: 0.8859
Test Set - Loss: 0.5161, Accuracy: 0.8423
Training finished.
