In [1]:
import numpy as np
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F

import torchvision
import torchvision.transforms as transforms

import matplotlib.pyplot as plt

# Hyperparameters 


Here, we define some **hyperparameters** that control training:
- `batchsize = 64` → number of samples processed before updating model weights.
- `learning_rate = 0.001` → step size for gradient descent.
- `epochs = 20` → number of complete passes through the entire training dataset.


In [2]:
batchsize = 64
learning_rate = 0.001
epochs = 20

# Data Preparation

### preprocessing tranforms


We define an image transformation pipeline using `torchvision.transforms.Compose`.  
- `ToTensor()` converts PIL images to PyTorch tensors with values in `[0,1]`.
- `Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))` shifts and scales pixel values to `[-1,1]` for each RGB channel, helping the network train faster and more stably.


In [3]:
transfrom = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
    ]
)

### loading dataset


Here we load the **CIFAR-10 dataset**, a standard dataset of 60,000 32×32 color images in 10 classes (like airplanes, cars, cats, etc.).

- `train=True` loads the training split (50,000 images).  
- `train=False` loads the test split (10,000 images).  
- `download=True` ensures it’s downloaded if not already available.  
- The `transform` applies our preprocessing pipeline to each image.



We wrap datasets in **DataLoaders**:
- `batch_size=batchsize` determines how many samples are fed per batch.
- `shuffle=True` randomizes training samples to improve generalization.
- `shuffle=False` in the test set keeps order consistent for evaluation.

These loaders efficiently feed data to the model during training/testing.


In [4]:
class_names = ['plane', 'car','bird','cat','deer',
               'dog', 'frog', 'horse', 'ship', 'truck']

In [5]:
train_dataset = torchvision.datasets.CIFAR10(root='./dataset',train=True,download=True,transform=transfrom)
test_dataset = torchvision.datasets.CIFAR10(root='./dataset',train=False,download=True,transform=transfrom)

train_loader = torch.utils.data.DataLoader(train_dataset,batch_size=batchsize, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset,batch_size=batchsize, shuffle=False)


100%|██████████| 170M/170M [01:32<00:00, 1.84MB/s] 


# Design CNN Architecture


This defines the **CNN architecture** by subclassing `nn.Module`.

- Input images: 32×32 with 3 channels (RGB).
- **Conv Layer 1:** 3→32 filters with 3×3 kernels.  
- **Pooling:** `MaxPool2d(2,2)` halves the spatial dimensions.  
- **Conv Layer 2:** 32→64 filters, same kernel size.  
- **Conv Layer 3:** Another 64→64 layer for deeper feature extraction.  
- After convolutions, we flatten features into a vector of size `64×4×4` before feeding into:
  - **fc1:** Fully connected layer with 64 neurons.
  - **fc2:** Output layer with 10 neurons (CIFAR-10 classes).



Defines the **forward pass** — how data flows through the network:

1. Apply **Conv1 → ReLU → Pooling**  
2. Apply **Conv2 → ReLU → Pooling**
3. Apply **Conv3 → ReLU** (no pooling, preserving more detail)
4. **Flatten** the 3D feature maps into a 1D vector per image.
5. Pass through **fc1 → ReLU**.
6. Output **fc2**, giving raw class scores (logits) for 10 classes.


In [6]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()

        self.images_size = (32,32)
        self.w = 32
        self.h = 32
        self.input_channels = 3
        self.kernal_size = 3
        
        self.conv_layer1 = nn.Conv2d(self.input_channels, 32 , self.kernal_size); self.pool = nn.MaxPool2d(2,2)
        self.conv_layer2 = nn.Conv2d(32, 64 , self.kernal_size)
        self.conv_layer3 = nn.Conv2d(64, 64 , self.kernal_size) 

        self.fc1 = nn.Linear(64*4*4, 64)
        self.fc2 = nn.Linear(64, 10)
    

    
    def forward(self,x):
        x = self.pool(F.relu(self.conv_layer1(x)))
        x = self.pool(F.relu(self.conv_layer2(x)))
        x = (F.relu(self.conv_layer3(x)))
        x = torch.flatten(x,1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x


        

## Create the model instance


We create an instance of our CNN model.  
At this point, it has random weights and is ready for training.


In [7]:
cnn_model = CNN()

## Define the loss function and optimizer


We set up:
- **Loss Function:** `CrossEntropyLoss` — combines softmax + negative log-likelihood, suitable for multi-class classification.
- **Optimizer:** `Adam` — an adaptive optimizer that updates model parameters using gradients and the defined learning rate.


In [8]:
loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(cnn_model.parameters(), lr=learning_rate)


# Training Loop

This is the **training loop**:

- For each **epoch** (full pass through dataset):
  - Iterate through `train_loader` batches.
  - **Forward pass:** feed images → get predictions.
  - **Loss calculation:** compare predictions vs. true labels.
  - **Backward pass:** 
    - `zero_grad()` clears old gradients.
    - `backward()` computes new gradients.
    - `step()` updates weights.
  - Track running loss for monitoring.
- After all epochs → print average loss → training complete!



Saves the **trained model parameters** (weights only) to a `.pth` file for later use.  
This allows us to reload the trained model without retraining from scratch.


In [None]:
n_steps = len(train_loader)
for epoch in range(epochs):
    running_loss = 0.0

    for i, (imgs,labels) in enumerate(train_loader):
        
        # Forward prop
        outputs = cnn_model(imgs)

        # Calc loss
        loss = loss_func(outputs,labels)

        # Backward prop

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
    
    print(f"[{epoch+1}] loss: {running_loss/n_steps:.3f}")

print("Finished Training !")

path = './models/latest_cnn.pth'

torch.save(cnn_model.state_dict(),path)

[1] loss: f1.517
[2] loss: f1.172
[3] loss: f0.999
[4] loss: f0.895
[5] loss: f0.810
[6] loss: f0.751
[7] loss: f0.701
[8] loss: f0.652
[9] loss: f0.609
[10] loss: f0.582
[11] loss: f0.543
[12] loss: f0.507
[13] loss: f0.484
[14] loss: f0.454
[15] loss: f0.428
[16] loss: f0.408
[17] loss: f0.376
[18] loss: f0.358
[19] loss: f0.331
[20] loss: f0.312
Finished Training !


# Evaluating the model

### Load the saved model paramters


Loads the saved model weights back into a new CNN instance and sets it to **evaluation mode** (`eval()`), disabling features like dropout or batch norm updates to ensure consistent inference.


In [10]:
loaded_cnn = CNN()
loaded_cnn.load_state_dict(torch.load(path))
loaded_cnn.eval()

CNN(
  (conv_layer1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv_layer2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (conv_layer3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=1024, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=10, bias=True)
)

### Calculate accuracy on test set

Performs **evaluation** on the test dataset:

- `torch.no_grad()` disables gradient computation for faster inference.
- Loops over the test batches:
  - Get model outputs.
  - `torch.max(outputs, 1)` finds the predicted class index.
  - Count correct predictions.
- Compute and print the **accuracy** percentage over the entire test set.


In [11]:
with torch.no_grad():
    n_correct = 0
    n_samples = len(test_loader.dataset)

    for imgs, labels in test_loader:

        outputs = loaded_cnn(imgs)

        _, preds = torch.max(outputs,1)
        
        n_correct += (preds == labels).sum().item()

    
    acc =(n_correct / n_samples) * 100.0
    print(f"Model Accuracy = {acc} %")

Model Accuracy = 72.16 %


# Test on examples out of the dataset 

In [12]:
new_transform = transforms.Compose(
    [
        transforms.Resize((32,32)),
        transforms.ToTensor(),
        transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
    ]
)

In [13]:
def load_img(img_path):
    img = Image.open(img_path)
    img = new_transform(img)
    img = img.unsqueeze(0)
    return img

In [None]:
images_paths  = ["online_test_imgs/airplane_ex1.jpeg","online_test_imgs/dog_ex2.jpeg","online_test_imgs/frog_ex3.jpeg"]
images = [load_img(img) for img in images_paths]


with torch.no_grad():
    for img in images:
        outputs = loaded_cnn(img)
        _, pred = torch.max(outputs,1)
        print(f"Prediction: {class_names[pred.item()]}")        

    
    

Prediction: plane
Prediction: dog
Prediction: frog
