Understanding Components of a Custom DataLoader in PyTorch

1. Dataset `torch.utils.data.Dataset`
2. DataLoader `torch.utils.data.DataLoader`

Creating custom dataset inPytorch

- `init()` - initialised the dataset , loads data , applied preprocessing
- `len()`  - return the total numbers odf samples in the dataset 
- `getitem()` - Defines how to review a single data sample when index is provide

In [1]:
%matplotlib inline


Matplotlib is building the font cache; this may take a moment.


In [1]:
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
from torch.utils.data import Dataset , DataLoader
import os
from PIL import Image 
from torchvision import transforms
from torch.utils.data import Dataset , DataLoader

In [3]:
image_dir = r"C:\Users\hp\Downloads\Classification_dataset_v3\Classification_dataset_v3\images\train"
for label , class_dir in enumerate(os.listdir(image_dir)):
    print(label , class_dir)

0 Cat
1 Dog
2 person


- `self.class_name` - Stores the mapping between label and class name.
- `class_path` - Holds the path to one class folder at a time
-  `self.image_paths` - Stores full paths to all images across all classes.

so the class_names has label and its corresponding class and class_path has the images of one class and image_paths has the path to that images with its label

In [4]:
class ImageDataset(Dataset):
    def __init__(self,image_dir,transform=None):
        self.image_dir = image_dir
        self.image_paths = []
        self.labels = []
        self.class_name = {}
        self.transform = transform

        for label , class_dir in enumerate(os.listdir(image_dir)):
            self.class_name[label] = class_dir
            class_path = os.path.join(image_dir , class_dir)
            for img_name in os.listdir(class_path):
                self.image_paths.append(os.path.join(class_path , img_name))
                self.labels.append(label)
                
    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self , idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB')
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)
        return image , label 

In [5]:
transform = transforms.Compose([
    transforms.Resize((128 , 128)),
    transforms.ToTensor() ,
    transforms.Normalize(mean=[0.5 , 0.5 , 0.5] , std = [0.5 , 0.5 , 0.5])
])

In [6]:
train_image_dir = r"C:\Users\hp\Downloads\Classification_dataset_v3\Classification_dataset_v3\images\train"
test_image_dir = r"C:\Users\hp\Downloads\Classification_dataset_v3\Classification_dataset_v3\images\test"

train_image_dataset = ImageDataset(image_dir = train_image_dir , transform=transform)
test_image_dataset = ImageDataset(image_dir = test_image_dir , transform=transform)

In [7]:
train_image_loader = DataLoader(dataset = train_image_dataset , batch_size=32 , shuffle=True)
test_image_loader = DataLoader(dataset = test_image_dataset , batch_size=32 , shuffle=True)

In [8]:
for images , labels in train_image_loader:
    print(images.shape , labels.shape)
    break

torch.Size([32, 3, 128, 128]) torch.Size([32])


In [9]:
print(train_image_dataset.class_name)
print(test_image_dataset.class_name)

{0: 'Cat', 1: 'Dog', 2: 'person'}
{0: 'Cat', 1: 'Dog', 2: 'person'}


In [10]:
for images , labels in train_image_loader:
    print(images.shape , labels.shape)
    img = images[0].numpy()
    label=labels[0].item()
    print(train_image_dataset.class_name[label])
    np.transpose(img , (1,2,0))
    print(img.shape)
    print(label)
    break

torch.Size([32, 3, 128, 128]) torch.Size([32])
person
(3, 128, 128)
2


In [11]:
import torch.nn as nn
import torch.optim as optim

so the 3 is the channels the 3x3 is the filter map and 32 are the no of the features we want is that right and in output we get 32 features 

In [12]:
class CustomCnnModule(nn.Module):
    def __init__(self,input_dim , num_classes):
        super(CustomCnnModule , self).__init__()
        self.input_dim = input_dim
        self.num_classes = num_classes
        
        #32 is the number of feature maps produced by the convolution.
        #kernel_size - The size of each convolution filter
        
        self.conv_layers = nn.Sequential(
            # Fully Connected Layer 1
            nn.Conv2d(3,32,kernel_size=3,stride=1 , padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2 , stride=2),

            # Fully Connected layer 2
            nn.Conv2d(32 , 64 , kernel_size=3 , stride=1 , padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2 , stride=2),

            # Fully Connected Layer 3
            nn.Conv2d(64 , 128 , kernel_size=3 , stride=1 , padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2 , stride=2),

            # Fully Connected layer 4
            nn.Conv2d(128 , 256, kernel_size=3 , stride=1 , padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2 , stride=2),
        )

        self._to_linear = None
        self._get_conv_output(self.input_dim)

        self.fc_layers = nn.Sequential(
            nn.Linear(self._to_linear , 512),
            nn.ReLU(),
            #nn.Dropout(0.2)
            nn.Linear(512 , 128),
            nn.ReLU(),
            nn.Linear(128 , self.num_classes)
        )
        pass

    def _get_conv_output(self , input_dim=128):
        with torch.no_grad():
            dummy_input = torch.zeros(1,3,input_dim ,input_dim)
            output = self.conv_layers(dummy_input)
            self._to_linear = output.view(1 , -1).size(1)
            
    def forward(self , x):
        x = self.conv_layers(x)
        x = x.view(x.size(0) , -1)
        x = self.fc_layers(x)
        return x

In [13]:
model = CustomCnnModule(input_dim = 128 , num_classes=3)

In [14]:
print(model)

CustomCnnModule(
  (conv_layers): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU()
    (11): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (12): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_st

In [15]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters() , lr=0.001)

In [16]:
epochs = 10

for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for images , labels in train_image_loader:
        optimizer.zero_grad()
        outputs = model(images)
        
        loss = criterion(outputs , labels)

        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch {epoch+1}/{epochs} , Loss: {running_loss/len(train_image_loader)}")
    

Epoch 1/10 , Loss: 0.9256369948387146
Epoch 2/10 , Loss: 0.6286568368736066
Epoch 3/10 , Loss: 0.5373990131051917
Epoch 4/10 , Loss: 0.4958054642928274
Epoch 5/10 , Loss: 0.43070179365183175
Epoch 6/10 , Loss: 0.41985822759176555
Epoch 7/10 , Loss: 0.3756843610813743
Epoch 8/10 , Loss: 0.32743206322193147
Epoch 9/10 , Loss: 0.30248469333899647
Epoch 10/10 , Loss: 0.29168898486777356


In [17]:
torch.save(model.state_dict() , "CNN_Model.pth")

In [18]:
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_image_loader:
        outputs = model(images)

        _, predicted = torch.max(outputs , 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
print(f"Test Accuracy is : {100* correct / total:.2f}%")

Test Accuracy is : 79.50%


In [127]:
classifier = ImageClassifier('Documents/PyTorch/CNN_Model.pth', train_image_dataset.class_name)
label = classifier.predict(r"C:\Users\hp\Downloads\images (1).webp")
print(f"Predicted Class id : {label}")

Predicted Class id : Dog
