In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import random_split, DataLoader
import matplotlib.pyplot as plt

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cpu


In [2]:
# ============================================================
# 1. DATA PREPROCESSING PIPELINE (NORMALIZATION)
# ============================================================
# Every ML project MUST normalize pixel values.
# CIFAR-10 images are originally between 0–255.
# ToTensor() → converts them to [0,1]
# Normalize(mean, std) → standardizes each RGB channel.

transform = transforms.Compose([
    transforms.ToTensor(),                       # Convert to PyTorch tensors
    transforms.Normalize(
        (0.5, 0.5, 0.5),                         # Normalize R,G,B to mean 0.5
        (0.5, 0.5, 0.5)                          # Normalize with std 0.5
    )
])

In [3]:
 #============================================================
# 2. LOAD CIFAR-10 DATASET (TRAIN + TEST)
# ============================================================
# CIFAR-10 automatically downloads the dataset if missing.

full_trainset = torchvision.datasets.CIFAR10(
    root='./data',
    train=True,                                   # 50,000 training images
    download=True,
    transform=transform
)

testset = torchvision.datasets.CIFAR10(
    root='./data',
    train=False,                                  # 10,000 test images
    download=True,
    transform=transform
)


100%|██████████| 170M/170M [00:05<00:00, 31.0MB/s] 


In [4]:
# ============================================================
# 3. TRAIN/VALIDATION SPLIT
# ============================================================
# We split the 50,000 training images:
# 90% (45,000) → train
# 10% (5,000)  → validation
# Validation is CRITICAL for monitoring overfitting.

train_size = int(0.9 * len(full_trainset))
val_size   = len(full_trainset) - train_size

trainset, valset = random_split(full_trainset, [train_size, val_size])


# Create DataLoaders (batches the data during training)
trainloader = DataLoader(trainset, batch_size=64, shuffle=True, num_workers=2)
valloader   = DataLoader(valset,   batch_size=64, shuffle=False, num_workers=2)
testloader  = DataLoader(testset,  batch_size=64, shuffle=False, num_workers=2)

# Class names for CIFAR-10
classes = ['plane', 'car', 'bird', 'cat', 'deer',
           'dog', 'frog', 'horse', 'ship', 'truck']


In [5]:
# ============================================================
# 4. DEFINE THE CNN MODEL 
# ============================================================
# This CNN has:
# - 2 convolutional blocks
# - 2 maxpool layers
# - 3 fully connected layers
# - Outputs 10 logits (one per class)

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # First conv layer: input=RGB(3), output=6 filters, kernel=5x5
        self.conv1 = nn.Conv2d(3, 6, 5)

        # Max pooling layer: halves image size
        self.pool = nn.MaxPool2d(2, 2)

        # Second conv layer: 6 → 16 channels, kernel=5x5
        self.conv2 = nn.Conv2d(6, 16, 5)

        # After conv+pool, the output size is 16 * 5 * 5
        self.fc1 = nn.Linear(16 * 5 * 5, 120)   # Fully connected
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)            # Final layer: 10 logits

    def forward(self, x):
        # Conv1 → ReLU → Pool
        x = self.pool(F.relu(self.conv1(x)))
        # Conv2 → ReLU → Pool
        x = self.pool(F.relu(self.conv2(x)))

        # Flatten feature maps into a vector
        x = x.view(-1, 16 * 5 * 5)

        # FC layers
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))

        # Output layer (NO softmax — CrossEntropyLoss applies it internally)
        x = self.fc3(x)
        return x

net = Net().to(device)


In [6]:
# ============================================================
# 5. LOSS FUNCTION + OPTIMIZER  ("COMPILE" IN TF)
# ============================================================
criterion = nn.CrossEntropyLoss()                   # multi-class classification loss
optimizer = optim.SGD(net.parameters(),             # Stochastic Gradient Descent
                      lr=0.001,                     # learning rate
                      momentum=0.9)                 # momentum improves convergence


In [7]:
# ============================================================
# 6. TRAINING LOOP (THE MODEL LEARNS HERE)
# ============================================================
# Each epoch goes through all training batches once.
# We compute training loss but DO NOT touch validation here.

num_epochs = 10

for epoch in range(num_epochs):

    net.train()                                     # Enable dropout/batchnorm if used
    running_loss = 0.0

    for i, data in enumerate(trainloader):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()                       # Clear previous gradients
        outputs = net(inputs)                       # Forward pass
        loss = criterion(outputs, labels)           # Compute loss
        loss.backward()                              # Backpropagation
        optimizer.step()                             # Update weights

        running_loss += loss.item()

    avg_train_loss = running_loss / len(trainloader)


    # ============================================================
    # 7. VALIDATION (EVALUATE ON valset EACH EPOCH)
    # ============================================================
    net.eval()                                       # Disable dropout/batchnorm
    val_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():                            # No gradients needed
        for inputs, labels in valloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            _, predicted = torch.max(outputs, 1)     # Class with highest logit
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    avg_val_loss = val_loss / len(valloader)
    val_accuracy = 100 * correct / total

    print(f"Epoch {epoch+1}/{num_epochs} | "
          f"Train Loss: {avg_train_loss:.4f} | "
          f"Val Loss: {avg_val_loss:.4f} | "
          f"Val Acc: {val_accuracy:.2f}%")


print("\nTraining completed successfully!")


Epoch 1/10 | Train Loss: 2.2972 | Val Loss: 2.2836 | Val Acc: 19.72%
Epoch 2/10 | Train Loss: 2.1637 | Val Loss: 2.0206 | Val Acc: 27.24%
Epoch 3/10 | Train Loss: 1.9210 | Val Loss: 1.8279 | Val Acc: 33.74%
Epoch 4/10 | Train Loss: 1.7554 | Val Loss: 1.6879 | Val Acc: 38.46%
Epoch 5/10 | Train Loss: 1.6542 | Val Loss: 1.6289 | Val Acc: 40.30%
Epoch 6/10 | Train Loss: 1.5727 | Val Loss: 1.5415 | Val Acc: 43.74%
Epoch 7/10 | Train Loss: 1.4932 | Val Loss: 1.4631 | Val Acc: 46.30%
Epoch 8/10 | Train Loss: 1.4277 | Val Loss: 1.4002 | Val Acc: 49.20%
Epoch 9/10 | Train Loss: 1.3764 | Val Loss: 1.3886 | Val Acc: 49.76%
Epoch 10/10 | Train Loss: 1.3334 | Val Loss: 1.3473 | Val Acc: 51.18%

Training completed successfully!


In [8]:
# ============================================================
# 8. FINAL TEST ACCURACY ON 10,000 UNSEEN IMAGES
# ============================================================
net.eval()
correct = 0
total = 0

with torch.no_grad():
    for images, labels in testloader:
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

test_accuracy = 100 * correct / total
print(f"\nFinal Accuracy on 10,000 test images: {test_accuracy:.2f}%\n")



Final Accuracy on 10,000 test images: 51.71%



In [9]:
# ============================================================
# 9. PREDICT ON YOUR OWN IMAGE (e.g., car.jpg)
# ============================================================

from PIL import Image

# Load your own image (replace 'car.jpg' with your filename)
img_path = "car.jpg"
img = Image.open(img_path)

# --- IMPORTANT ---
# CIFAR-10 images are 32x32, so resize your image
img = img.resize((32, 32))

# Convert to tensor AND apply same normalization as training
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(
        (0.5, 0.5, 0.5),   # SAME mean used in training
        (0.5, 0.5, 0.5)    # SAME std used in training
    )
])

# Apply transforms
img_tensor = transform(img)

# Add batch dimension → shape becomes (1, 3, 32, 32)
img_tensor = img_tensor.unsqueeze(0).to(device)

# Predict
net.eval()
with torch.no_grad():
    outputs = net(img_tensor)
    _, predicted = torch.max(outputs, 1)

print("\nPrediction for your image:", img_path)
print("Predicted class:", classes[predicted.item()])


Prediction for your image: car.jpg
Predicted class: car


In [10]:
# ============================================================
# 10. SAVE THE TRAINED MODEL
# ============================================================
torch.save(net.state_dict(), "cifar10_cnn_pytorch.pth")
print("\nModel saved as cifar10_cnn_pytorch.pth")



Model saved as cifar10_cnn_pytorch.pth
