In [1]:
#Online Implementation : https://www.youtube.com/watch?v=ACmuBbuXn20&list=PLhhyoLH6IjfxeoooqP9rhU3HJIAVAJ3Vz&index=17
# Original Source: Aladdin Persson

import torch 
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision.datasets as datasets
import torchvision.transforms as transforms



In [2]:
VGG_types = {
    "VGG11": [64, "M", 128, "M", 256, 256, "M", 512, 512, "M", 512, 512, "M", 512, 512,"M"],
    "VGG13": [64, 64, "M", 128, 128, "M", 256, 256, "M", 512, 512, "M", 512, 512, "M", 512, 512,"M"],
    "VGG16": [64, 64, "M", 128, 128, "M", 256, 256, 256, "M", 512, 512, 512, "M", 512, 512, 512, "M", 512, 512, 512, "M"],
    "VGG19": [64, 64, "M", 128, 128, "M", 256, 256, 256, 256, "M", 512, 512, 512, 512, "M", 512, 512, 512, 512, "M", 512, 512, 512, 512, "M"],
}

In [3]:
class VGG_net(nn.Module):
    """
    VGG network implementation.

    Args:
        in_channels (int, optional): Number of input channels (default: 3).
        num_classes (int, optional): Number of output classes (default: 1000).

    Attributes:
        in_channels (int): Number of input channels.
        conv_layers (Sequential): Convolutional layers of the VGG network.
        fcs (Sequential): Fully connected layers of the VGG network.
    """
    def __init__(self, in_channels = 3, num_classes = 1000):
        super(VGG_net, self).__init__()
        self.in_channels = in_channels
        self.conv_layers = self.create_conv_layers(VGG_types["VGG16"])
        
        self.fcs = nn.Sequential(nn.Linear(4608, 4096), 
                                 nn.ReLU(), 
                                 nn.Dropout(),
                                 nn.Linear(4096, 4096, ),
                                 nn.ReLU(),
                                 nn.Dropout(),
                                 nn.Linear(4096, num_classes))
        
    def forward(self, x):
        """
        Forward pass of the VGG network.

        Args:
            x (torch.Tensor): Input tensor.

        Returns:
            torch.Tensor: Output tensor.
        """
        x = self.conv_layers(x)
        x = x.reshape(x.shape[0], -1)
        x = self.fcs(x)
        return x
    
    def create_conv_layers(self, architecture):
        """
        Create convolutional layers based on the specified architecture.

        Args:
            architecture (list): List of integers and 'M' representing layer configurations.

        Returns:
            Sequential: Sequential module containing convolutional layers.
        """
        layers = []
        in_channels =self.in_channels
        
        for x in architecture:
            if type(x)== int:
                out_channels = x 
                
                layers += [nn.Conv2d(in_channels=in_channels, out_channels=out_channels, 
                                     kernel_size=(3,3), stride = (1,1), padding=(1,1)),
                            nn.BatchNorm2d(x),
                            nn.ReLU()
                            ]
                in_channels = x 
            elif x == "M":
                layers +=[nn.MaxPool2d(kernel_size=(2,2), stride = (2,2))]
        return nn.Sequential(*layers)



In [4]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model = VGG_net(in_channels=3, num_classes=1000).to(device)
x = torch.randn(1,3, 224, 224).to(device)
print(model(x).shape)

torch.Size([1, 1000])


In [5]:
import os
from glob import glob 
import pandas as pd 

path_to_data = os.path.join("..", "cifar_data", "cifar-10-batches-py")
train_df =  pd.read_csv(os.path.join(path_to_data, "train.csv"))
test_df = pd.read_csv(os.path.join(path_to_data, "test.csv"))

In [6]:
train_df["label"].value_counts()

6    5000
9    5000
4    5000
1    5000
2    5000
7    5000
8    5000
3    5000
5    5000
0    5000
Name: label, dtype: int64

In [7]:
test_df["label"].value_counts()

3    1000
8    1000
0    1000
6    1000
1    1000
9    1000
5    1000
7    1000
4    1000
2    1000
Name: label, dtype: int64

In [8]:
import cv2
import torch
from numpy import array
from pandas import DataFrame
from torch.utils.data import Dataset, TensorDataset, DataLoader
from torchvision import transforms

import cv2
import torch
from numpy import array
from pandas import DataFrame
from torch.utils.data import Dataset, TensorDataset, DataLoader
from torchvision import transforms

def get_from_df_paths_targets( df: DataFrame, transform=None):
    """
    Extracts image paths and labels from a DataFrame.

    Args:
        df (DataFrame): The DataFrame containing image paths and labels.
        transform (callable, optional): A transformation function to apply to images. Default is None.

    Returns:
        DataFrame: The original DataFrame.
        list: A list of image paths.
        list: A list of labels.
        callable: The transformation function.
    """
    paths = df['path'].to_list()
    labels = df['label'].to_list()
    return df,paths, labels, transform

class CV2ImageDataset(Dataset):
    """
    Dataset class for handling images using OpenCV and transformations.

    Args:
        df (DataFrame): The DataFrame containing image paths and labels.
        transform (callable, optional): A transformation function to apply to images. Default is None.
        device (torch.device, optional): The target device for data tensors. Default is CUDA if available, otherwise CPU.

    Attributes:
        df (DataFrame): The DataFrame containing image paths and labels.
        paths (list): A list of image paths.
        labels (list): A list of labels.
        transform (callable): The transformation function.
        device (torch.device): The target device.
    """
    def __init__(self, df:DataFrame, transform: transforms =None, device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")):
        self.df, self.paths, self.labels, self.transform = get_from_df_paths_targets( df, transform=transform)
        self.device=device
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        """
        Get an image and its label from the dataset.

        Args:
            idx (int): Index of the image in the dataset.

        Returns:
            tensor: The transformed image.
            tensor: The label tensor.
        """
        image = cv2.imread(self.paths[idx])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        label = torch.tensor(int(self.labels[idx]))
        if self.transform is not None:
            image = self.transform(image=image)["image"]
        return image, label
    
    
class DatasetLoader():
    """
    Data loader object for handling DataLoader creation.

    Args:
        dataset (Dataset): The dataset to be loaded.
        batch_size (int, optional): The batch size for each batch. Default is 1.
        num_workers (int, optional): The number of data loading workers. Default is 1.

    Attributes:
        dataset (Dataset): The dataset to be loaded.
        batch_size (int): The batch size for each batch.
        num_workers (int): The number of data loading workers.
        loader (DataLoader): The DataLoader instance.
    """
    def __init__(self, dataset: Dataset, batch_size:int = 1 , num_workers: int =1):
        self.dataset = dataset
        self.batch_size = batch_size
        self.num_workers = num_workers
        #self.get_dataloader()
        
    def get_dataloader(self,):
        """
        Get a DataLoader instance for the dataset.

        Returns:
            DataLoader: The DataLoader instance.
        """
        self.loader = DataLoader(
                    self.dataset,
                    batch_size=self.batch_size,
                    num_workers=self.num_workers,
                    shuffle=True
                )
        return self.loader
    
    def check_dataloader_dimension(self):
        """
        Print the dimensions and types of the first batch in the DataLoader.
        """
        for _, (data, target) in enumerate(self.loader):
            print('Data Shape of Dataloader is (data, target) : ', data.shape, target.shape)
            print('Data Type of Dataloader is (data, target) : ', type(data), type(target))
            torch.cuda.empty_cache()
            break

In [9]:
import albumentations as A
from albumentations.pytorch import ToTensorV2
batch_size = 4
aug = A.Compose([   
A.Resize(224, 224),
A.HorizontalFlip(p=0.5),          
A.Normalize(),            
ToTensorV2()])

In [10]:
train_dataset = CV2ImageDataset(train_df, transform=aug)
test_dataset = CV2ImageDataset(test_df, transform=aug)

In [11]:
train_loader = DatasetLoader(train_dataset, batch_size, num_workers=0).get_dataloader()
test_loader = DatasetLoader(test_dataset, batch_size, num_workers=0).get_dataloader()

In [12]:
import torch.optim as optim
from torch.optim.lr_scheduler import CyclicLR
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_epochs = 10
model  = VGG_net().to(device)
optimizer = optim.SGD(model.parameters(), lr = 0.1)
scheduler = scheduler = CyclicLR(optimizer, base_lr=0.001, max_lr=0.1, step_size_up=2000)
criterion = nn.CrossEntropyLoss()

In [13]:
num_epochs = 10
for epoch in range(num_epochs):
    model.train()  # Set the model to training mode

    running_loss = 0.0
    correct_predictions = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)

        # Calculate loss
        loss = criterion(outputs, labels)

        # Backpropagation and optimization
        loss.backward()
        optimizer.step()

        # Update statistics
        running_loss += loss.item() * inputs.size(0)
        _, predicted = torch.max(outputs, 1)
        correct_predictions += (predicted == labels).sum().item()

    # Calculate epoch statistics
    epoch_loss = running_loss / len(train_dataset)
    epoch_accuracy = correct_predictions / len(train_dataset)

    print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.4f}")

    scheduler.step()
    
    # Evaluation on the test set
    model.eval()  # Set the model to evaluation mode

    test_loss = 0.0
    test_correct_predictions = 0

    with torch.no_grad():
        for test_inputs, test_labels in test_loader:
            test_inputs, test_labels = test_inputs.to(device), test_labels.to(device)

            # Forward pass
            test_outputs = model(test_inputs)

            # Calculate test loss
            test_loss += criterion(test_outputs, test_labels).item() * test_inputs.size(0)

            # Calculate test accuracy
            _, test_predicted = torch.max(test_outputs, 1)
            test_correct_predictions += (test_predicted == test_labels).sum().item()

    # Calculate test set statistics
    test_loss /= len(test_dataset)
    test_accuracy = test_correct_predictions / len(test_dataset)

    print(f"Test Set - Loss: {test_loss:.4f}, Accuracy: {test_accuracy:.4f}")

print("Training finished.")

Epoch [1/10], Loss: 1.6195, Accuracy: 0.4298
Test Set - Loss: 1.0727, Accuracy: 0.6326
Epoch [2/10], Loss: 0.9281, Accuracy: 0.6812
Test Set - Loss: 0.7928, Accuracy: 0.7322
Epoch [3/10], Loss: 0.6951, Accuracy: 0.7612
Test Set - Loss: 0.6066, Accuracy: 0.7982
Epoch [4/10], Loss: 0.5798, Accuracy: 0.8038
Test Set - Loss: 0.5452, Accuracy: 0.8151
Epoch [5/10], Loss: 0.4985, Accuracy: 0.8320
Test Set - Loss: 0.4948, Accuracy: 0.8353
Epoch [6/10], Loss: 0.4299, Accuracy: 0.8551
Test Set - Loss: 0.4860, Accuracy: 0.8399
Epoch [7/10], Loss: 0.3773, Accuracy: 0.8704
Test Set - Loss: 0.4603, Accuracy: 0.8485
Epoch [8/10], Loss: 0.3379, Accuracy: 0.8847
Test Set - Loss: 0.4288, Accuracy: 0.8578
Epoch [9/10], Loss: 0.3011, Accuracy: 0.8970
Test Set - Loss: 0.4495, Accuracy: 0.8543
Epoch [10/10], Loss: 0.2624, Accuracy: 0.9108
Test Set - Loss: 0.5050, Accuracy: 0.8459
Training finished.
