In [2]:
import os
import torch
import torch.nn as nn
import torch.optim  as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, random_split
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from PIL import Image
from tqdm import tqdm  # for progress bars

### LeNET Architecture

![LeNET Architecture](architecture_images\lenet_architecture.png)


In [3]:
class LeNet(nn.Module):
    def __init__(self, num_classes=10):
        super(LeNet, self).__init__()
        self.relu = nn.ReLU()
        self.pool = nn.AvgPool2d(kernel_size=(2, 2), stride=(2, 2))
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=(5, 5), stride=(1,1), padding=(0,0))
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=(5, 5), stride=(1,1), padding=(0,0))
        self.conv3 = nn.Conv2d(in_channels=16, out_channels=120, kernel_size=(5, 5), stride=(1,1), padding=(0,0))
        self.linear1 = nn.Linear(120, 84)
        self.linear2 = nn.Linear(84, num_classes)

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.pool(x)
        x = self.relu(self.conv2(x))
        x = self.pool(x)
        x = self.relu(self.conv3(x)) # no. of samples x 120 x 1 x 1 --> no. of samples x 120
        x = x.reshape(x.shape[0], -1)
        x = self.relu(self.linear1(x))
        x = self.linear2(x)

        return x



In [4]:
x = torch.randn(64, 1, 32, 32)
model = LeNet()
print(model(x).shape)

torch.Size([64, 10])


##  Loading dataset

In [5]:
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((32, 32)),  # Resize to 224x224
        transforms.Grayscale(num_output_channels=1),  # Convert to grayscale
        transforms.ToTensor(),  # Convert to tensor
        transforms.Normalize([0.5], [0.5])  # Normalize for grayscale (mean and std can be adjusted)
    ]),
    'val': transforms.Compose([
        transforms.Resize((32, 32)),
        transforms.Grayscale(num_output_channels=1),
        transforms.ToTensor(),
        transforms.Normalize([0.5], [0.5])
    ]),
    'test': transforms.Compose([
        transforms.Resize((32, 32)),
        transforms.Grayscale(num_output_channels=1),
        transforms.ToTensor(),
        transforms.Normalize([0.5], [0.5])
    ]),
}

In [6]:
data_dir = 'cats_vs_dogs_mini_dataset'
dataset = datasets.ImageFolder(os.path.join(data_dir), transform=data_transforms['train'])

In [19]:
train_size = int(0.7 * len(dataset))
val_size = int(0.15 * len(dataset))
test_size = len(dataset) - train_size - val_size

In [20]:
train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

In [21]:
val_dataset.dataset.transform = data_transforms['val']
test_dataset.dataset.transform = data_transforms['test']

In [22]:
batch_size = 24

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

In [23]:
train_data_iter = iter(train_loader)

# Get the next batch
images, labels = next(train_data_iter)
print(images.shape)
print(labels)
print(train_dataset.dataset.class_to_idx)

torch.Size([24, 1, 32, 32])
tensor([1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1])
{'cats_set': 0, 'dogs_set': 1}


In [24]:
train_dataset.dataset.classes

['cats_set', 'dogs_set']

In [25]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [26]:
model = LeNet(num_classes=len(train_dataset.dataset.classes)).to(device)

In [27]:
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

## Training

In [28]:
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=10, patience=3, save_path="saved_best_models/lenet_best_model.pth"):
    best_val_loss = float('inf')  # Initialize best validation loss to a large value
    epochs_no_improve = 0  # Counter for early stopping
    
    for epoch in range(num_epochs):
        # Training phase
        model.train()
        train_loss = 0.0
        correct = 0
        total = 0
        for images, labels in tqdm(train_loader):
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            
            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)
            train_loss += loss.item()
            
            # Backward and optimize
            loss.backward()
            optimizer.step()
            
            # Calculate train accuracy
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        
        train_loss /= len(train_loader)
        train_accuracy = 100 * correct / total
        
        # Validation phase
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                
                # Calculate validation accuracy
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        
        val_loss /= len(val_loader)
        val_accuracy = 100 * correct / total
        
        # Check if validation loss improved
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            epochs_no_improve = 0  # Reset early stopping counter
            torch.save(model.state_dict(), save_path)  # Save best model
            print(f"Best model saved with val_loss: {best_val_loss:.4f}")
        # else:
        #     epochs_no_improve += 1
        
        # # Early stopping check
        # if epochs_no_improve >= patience:
        #     print("Early stopping triggered.")
        #     break
        
        print(f"Epoch [{epoch + 1}/{num_epochs}], "
              f"Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.2f}%, "
              f"Val Loss: {val_loss:.4f}, Val Acc: {val_accuracy:.2f}%")

In [29]:
num_epochs = 10
train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs)

100%|██████████| 30/30 [00:01<00:00, 20.18it/s]


Best model saved with val_loss: 0.6932
Epoch [1/10], Train Loss: 0.6933, Train Acc: 49.14%, Val Loss: 0.6932, Val Acc: 46.00%


100%|██████████| 30/30 [00:01<00:00, 29.69it/s]


Best model saved with val_loss: 0.6929
Epoch [2/10], Train Loss: 0.6926, Train Acc: 52.43%, Val Loss: 0.6929, Val Acc: 46.67%


100%|██████████| 30/30 [00:01<00:00, 25.33it/s]


Best model saved with val_loss: 0.6926
Epoch [3/10], Train Loss: 0.6922, Train Acc: 51.43%, Val Loss: 0.6926, Val Acc: 51.33%


100%|██████████| 30/30 [00:01<00:00, 26.68it/s]


Best model saved with val_loss: 0.6915
Epoch [4/10], Train Loss: 0.6915, Train Acc: 59.57%, Val Loss: 0.6915, Val Acc: 49.33%


100%|██████████| 30/30 [00:01<00:00, 27.43it/s]


Epoch [5/10], Train Loss: 0.6911, Train Acc: 57.00%, Val Loss: 0.6928, Val Acc: 52.00%


100%|██████████| 30/30 [00:01<00:00, 29.33it/s]


Best model saved with val_loss: 0.6898
Epoch [6/10], Train Loss: 0.6889, Train Acc: 61.00%, Val Loss: 0.6898, Val Acc: 55.33%


100%|██████████| 30/30 [00:01<00:00, 28.93it/s]


Epoch [7/10], Train Loss: 0.6866, Train Acc: 60.71%, Val Loss: 0.6914, Val Acc: 51.33%


100%|██████████| 30/30 [00:01<00:00, 28.45it/s]


Epoch [8/10], Train Loss: 0.6853, Train Acc: 57.43%, Val Loss: 0.6908, Val Acc: 57.33%


100%|██████████| 30/30 [00:01<00:00, 29.72it/s]


Epoch [9/10], Train Loss: 0.6794, Train Acc: 61.43%, Val Loss: 0.6911, Val Acc: 57.33%


100%|██████████| 30/30 [00:00<00:00, 32.55it/s]


Epoch [10/10], Train Loss: 0.6717, Train Acc: 63.00%, Val Loss: 0.6908, Val Acc: 57.33%


## Test set eval

In [30]:
def evaluate_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    test_accuracy = 100 * correct / total
    print(f"Test Accuracy: {test_accuracy:.2f}%")

# Run evaluation
evaluate_model(model, test_loader)

Test Accuracy: 59.33%


## Load and inference

In [37]:
def load_model(model, load_path="best_model.pth"):
    model.load_state_dict(torch.load(load_path, weights_only=True))
    model.eval()
    return model

In [38]:
def infer(model, image):
    model = load_model(model, load_path=r"saved_best_models\lenet_best_model.pth")
    image = image.to(device)
    with torch.no_grad():
        output = model(image.unsqueeze(0))
        _, predicted = torch.max(output, 1)
    return predicted.item()

In [39]:
def load_image(image_path):
    image = Image.open(image_path).convert('RGB')
    image = data_transforms['test'](image)
    return image

In [40]:
image_path = "cats_vs_dogs_mini_dataset\dogs_set\dog.4014.jpg"
class_index = {value:key.split("_")[0] for key, value in train_dataset.dataset.class_to_idx.items()}
image_tensor = load_image(image_path)
predicted_label = infer(model, image_tensor)

print("Predicted Class:", class_index[predicted_label])

Predicted Class: dogs
