# Image Classification - CIFAR-10
## Complete Training and Evaluation Pipeline

This notebook implements an end-to-end image classification pipeline using PyTorch.

### Table of Contents
1. [Setup and Imports](#setup)
2. [Device Configuration (CUDA/CPU)](#device)
3. [Data Loading and Exploration](#data)
4. [Data Preprocessing and Augmentation](#preprocessing)
5. [Model Architecture](#model)
6. [Training Loop](#training)
7. [Evaluation and Metrics](#evaluation)
8. [Inference Demo](#inference)
9. [Save Results](#save)

## 1. Setup and Imports <a id='setup'></a>

In [None]:
# Install required packages (run once)
!pip install torch torchvision matplotlib seaborn scikit-learn tqdm numpy pandas pillow -q

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision
import torchvision.transforms as transforms
from torchvision import models

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.notebook import tqdm
import json
import os
from datetime import datetime
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix, classification_report

# Set plotting style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette('husl')
%matplotlib inline

print("✅ All imports successful!")

## 2. Device Configuration (CUDA/CPU) <a id='device'></a>

In [None]:
# Automatic device selection - works for both CUDA and CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print(f"Using device: {device}")
if device.type == 'cuda':
    print(f"GPU Name: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")
    print(f"CUDA Version: {torch.version.cuda}")
else:
    print("No GPU available, using CPU")

# Set random seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed(42)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

## 3. Data Loading and Exploration <a id='data'></a>

In [None]:
# Create data directory
data_dir = '../datasets/cifar10'
os.makedirs(data_dir, exist_ok=True)

# CIFAR-10 classes
classes = ['airplane', 'automobile', 'bird', 'cat', 'deer', 
           'dog', 'frog', 'horse', 'ship', 'truck']

print(f"Dataset: CIFAR-10")
print(f"Number of classes: {len(classes)}")
print(f"Classes: {classes}")

In [None]:
# Download and load CIFAR-10 dataset
# For initial exploration, we'll load without transforms
transform_basic = transforms.Compose([
    transforms.ToTensor(),
])

print("Downloading CIFAR-10 dataset...")
trainset_explore = torchvision.datasets.CIFAR10(
    root=data_dir, 
    train=True,
    download=True, 
    transform=transform_basic
)

testset_explore = torchvision.datasets.CIFAR10(
    root=data_dir, 
    train=False,
    download=True, 
    transform=transform_basic
)

print(f"✅ Dataset downloaded successfully!")
print(f"Training samples: {len(trainset_explore)}")
print(f"Test samples: {len(testset_explore)}")

In [None]:
# Visualize sample images
def show_images(dataset, num_images=16):
    fig, axes = plt.subplots(4, 4, figsize=(12, 12))
    axes = axes.ravel()
    
    for i in range(num_images):
        img, label = dataset[i]
        axes[i].imshow(img.permute(1, 2, 0))
        axes[i].set_title(f"{classes[label]}")
        axes[i].axis('off')
    
    plt.tight_layout()
    plt.savefig('../01_Image_Classification/results/sample_images.png', dpi=150, bbox_inches='tight')
    plt.show()

os.makedirs('../01_Image_Classification/results', exist_ok=True)
show_images(trainset_explore)

In [None]:
# Class distribution
train_labels = [label for _, label in trainset_explore]
unique, counts = np.unique(train_labels, return_counts=True)

plt.figure(figsize=(12, 5))
plt.bar([classes[i] for i in unique], counts, color='skyblue', edgecolor='black')
plt.xlabel('Class')
plt.ylabel('Number of Images')
plt.title('CIFAR-10 Training Set Class Distribution')
plt.xticks(rotation=45)
plt.grid(axis='y', alpha=0.3)
plt.savefig('../01_Image_Classification/results/class_distribution.png', dpi=150, bbox_inches='tight')
plt.show()

print("Class distribution:")
for cls, count in zip([classes[i] for i in unique], counts):
    print(f"  {cls}: {count}")

## 4. Data Preprocessing and Augmentation <a id='preprocessing'></a>

In [None]:
# CIFAR-10 normalization values
cifar10_mean = (0.4914, 0.4822, 0.4465)
cifar10_std = (0.2023, 0.1994, 0.2010)

# Training transforms with augmentation
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize(cifar10_mean, cifar10_std),
])

# Test transforms (no augmentation)
transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(cifar10_mean, cifar10_std),
])

# Load datasets with transforms
trainset = torchvision.datasets.CIFAR10(
    root=data_dir, 
    train=True,
    download=False,  # Already downloaded
    transform=transform_train
)

testset = torchvision.datasets.CIFAR10(
    root=data_dir, 
    train=False,
    download=False,
    transform=transform_test
)

print("✅ Data transforms applied")

In [None]:
# Create data loaders
batch_size = 128
num_workers = 4

trainloader = DataLoader(
    trainset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=num_workers,
    pin_memory=True if torch.cuda.is_available() else False
)

testloader = DataLoader(
    testset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=num_workers,
    pin_memory=True if torch.cuda.is_available() else False
)

print(f"✅ Data loaders created")
print(f"Batch size: {batch_size}")
print(f"Training batches: {len(trainloader)}")
print(f"Test batches: {len(testloader)}")

## 5. Model Architecture <a id='model'></a>

In [None]:
# Simple CNN model
class SimpleCNN(nn.Module):
    def __init__(self, num_classes=10):
        super(SimpleCNN, self).__init__()
        
        self.features = nn.Sequential(
            # Block 1
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.Conv2d(32, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout2d(0.25),
            
            # Block 2
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout2d(0.25),
            
            # Block 3
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.AdaptiveAvgPool2d(1)
        )
        
        self.classifier = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(128, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(256, num_classes)
        )
    
    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

# Initialize model and move to device
model = SimpleCNN(num_classes=10).to(device)

# Count parameters
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"✅ Model created and moved to {device}")
print(f"Total parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")
print(f"\nModel architecture:\n{model}")

## 6. Training Loop <a id='training'></a>

In [None]:
# Training configuration
num_epochs = 50
learning_rate = 0.001

# Loss function and optimizer
criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
optimizer = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=0.01)

# Learning rate scheduler
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs, eta_min=1e-6)

print("Training configuration:")
print(f"  Epochs: {num_epochs}")
print(f"  Learning rate: {learning_rate}")
print(f"  Optimizer: AdamW")
print(f"  Scheduler: CosineAnnealingLR")
print(f"  Loss: CrossEntropyLoss with label smoothing")

In [None]:
# Training function
def train_epoch(model, loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    pbar = tqdm(loader, desc='Training')
    for inputs, labels in pbar:
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        
        # Gradient clipping
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        
        optimizer.step()
        
        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
        
        pbar.set_postfix({
            'loss': f'{running_loss/(pbar.n+1):.4f}',
            'acc': f'{100.*correct/total:.2f}%'
        })
    
    return running_loss / len(loader), 100. * correct / total

# Validation function
def validate(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        pbar = tqdm(loader, desc='Validation')
        for inputs, labels in pbar:
            inputs, labels = inputs.to(device), labels.to(device)
            
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
            
            pbar.set_postfix({
                'loss': f'{running_loss/(pbar.n+1):.4f}',
                'acc': f'{100.*correct/total:.2f}%'
            })
    
    return running_loss / len(loader), 100. * correct / total

print("✅ Training and validation functions defined")

In [None]:
# Training loop
history = {
    'train_loss': [],
    'train_acc': [],
    'val_loss': [],
    'val_acc': [],
    'lr': []
}

best_acc = 0.0
best_epoch = 0

print("Starting training...\n")
start_time = datetime.now()

for epoch in range(num_epochs):
    print(f"\nEpoch {epoch+1}/{num_epochs}")
    print("-" * 50)
    
    # Train
    train_loss, train_acc = train_epoch(model, trainloader, criterion, optimizer, device)
    
    # Validate
    val_loss, val_acc = validate(model, testloader, criterion, device)
    
    # Update scheduler
    scheduler.step()
    current_lr = optimizer.param_groups[0]['lr']
    
    # Save history
    history['train_loss'].append(train_loss)
    history['train_acc'].append(train_acc)
    history['val_loss'].append(val_loss)
    history['val_acc'].append(val_acc)
    history['lr'].append(current_lr)
    
    # Print summary
    print(f"\nTrain Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}%")
    print(f"Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.2f}%")
    print(f"Learning Rate: {current_lr:.6f}")
    
    # Save best model
    if val_acc > best_acc:
        best_acc = val_acc
        best_epoch = epoch + 1
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'scheduler_state_dict': scheduler.state_dict(),
            'best_acc': best_acc,
        }, '../01_Image_Classification/models/best_model.pt')
        print(f"✅ Best model saved! (Accuracy: {best_acc:.2f}%)")

end_time = datetime.now()
training_time = (end_time - start_time).total_seconds()

print("\n" + "="*50)
print(f"Training completed!")
print(f"Total training time: {training_time/60:.2f} minutes")
print(f"Best validation accuracy: {best_acc:.2f}% (Epoch {best_epoch})")
print("="*50)

In [None]:
# Plot training history
fig, axes = plt.subplots(1, 3, figsize=(18, 5))

# Loss
axes[0].plot(history['train_loss'], label='Train Loss', linewidth=2)
axes[0].plot(history['val_loss'], label='Val Loss', linewidth=2)
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss')
axes[0].set_title('Training and Validation Loss')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Accuracy
axes[1].plot(history['train_acc'], label='Train Acc', linewidth=2)
axes[1].plot(history['val_acc'], label='Val Acc', linewidth=2)
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Accuracy (%)')
axes[1].set_title('Training and Validation Accuracy')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

# Learning rate
axes[2].plot(history['lr'], linewidth=2, color='red')
axes[2].set_xlabel('Epoch')
axes[2].set_ylabel('Learning Rate')
axes[2].set_title('Learning Rate Schedule')
axes[2].set_yscale('log')
axes[2].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('../01_Image_Classification/results/training_history.png', dpi=150, bbox_inches='tight')
plt.show()

## 7. Evaluation and Metrics <a id='evaluation'></a>

In [None]:
# Load best model
checkpoint = torch.load('../01_Image_Classification/models/best_model.pt', map_location=device)
model.load_state_dict(checkpoint['model_state_dict'])
print(f"✅ Loaded best model from epoch {checkpoint['epoch']+1}")
print(f"Best validation accuracy: {checkpoint['best_acc']:.2f}%")

In [None]:
# Get predictions on test set
model.eval()
all_preds = []
all_labels = []
all_probs = []

with torch.no_grad():
    for inputs, labels in tqdm(testloader, desc='Evaluating'):
        inputs = inputs.to(device)
        outputs = model(inputs)
        probs = torch.softmax(outputs, dim=1)
        _, predicted = outputs.max(1)
        
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.numpy())
        all_probs.extend(probs.cpu().numpy())

all_preds = np.array(all_preds)
all_labels = np.array(all_labels)
all_probs = np.array(all_probs)

print("✅ Predictions completed")

In [None]:
# Calculate metrics
accuracy = accuracy_score(all_labels, all_preds)
precision, recall, f1, _ = precision_recall_fscore_support(all_labels, all_preds, average='weighted')

print("\n" + "="*50)
print("EVALUATION METRICS")
print("="*50)
print(f"Accuracy:  {accuracy*100:.2f}%")
print(f"Precision: {precision:.4f}")
print(f"Recall:    {recall:.4f}")
print(f"F1-Score:  {f1:.4f}")
print("="*50)

# Per-class metrics
print("\nPer-class metrics:")
print(classification_report(all_labels, all_preds, target_names=classes))

In [None]:
# Confusion matrix
cm = confusion_matrix(all_labels, all_preds)

plt.figure(figsize=(12, 10))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=classes, yticklabels=classes,
            cbar_kws={'label': 'Count'})
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix - Image Classification')
plt.tight_layout()
plt.savefig('../01_Image_Classification/results/confusion_matrix.png', dpi=150, bbox_inches='tight')
plt.show()

In [None]:
# Per-class accuracy
class_accuracy = cm.diagonal() / cm.sum(axis=1)

plt.figure(figsize=(12, 6))
bars = plt.bar(classes, class_accuracy * 100, color='skyblue', edgecolor='black')
plt.xlabel('Class')
plt.ylabel('Accuracy (%)')
plt.title('Per-Class Accuracy')
plt.xticks(rotation=45)
plt.ylim(0, 100)
plt.grid(axis='y', alpha=0.3)

# Add value labels on bars
for bar in bars:
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2., height,
             f'{height:.1f}%', ha='center', va='bottom')

plt.tight_layout()
plt.savefig('../01_Image_Classification/results/per_class_accuracy.png', dpi=150, bbox_inches='tight')
plt.show()

## 8. Inference Demo <a id='inference'></a>

In [None]:
# Visualize predictions
def show_predictions(model, dataset, num_images=16, device='cuda'):
    model.eval()
    fig, axes = plt.subplots(4, 4, figsize=(16, 16))
    axes = axes.ravel()
    
    indices = np.random.choice(len(dataset), num_images, replace=False)
    
    with torch.no_grad():
        for i, idx in enumerate(indices):
            img, true_label = dataset[idx]
            
            # Get prediction
            img_tensor = img.unsqueeze(0).to(device)
            output = model(img_tensor)
            probs = torch.softmax(output, dim=1)
            pred_label = output.argmax(1).item()
            confidence = probs[0][pred_label].item()
            
            # Denormalize for display
            img_display = img.clone()
            for t, m, s in zip(img_display, cifar10_mean, cifar10_std):
                t.mul_(s).add_(m)
            img_display = torch.clamp(img_display, 0, 1)
            
            # Display
            axes[i].imshow(img_display.permute(1, 2, 0))
            color = 'green' if pred_label == true_label else 'red'
            axes[i].set_title(
                f"True: {classes[true_label]}\nPred: {classes[pred_label]} ({confidence*100:.1f}%)",
                color=color, fontsize=10
            )
            axes[i].axis('off')
    
    plt.tight_layout()
    plt.savefig('../01_Image_Classification/results/predictions.png', dpi=150, bbox_inches='tight')
    plt.show()

show_predictions(model, testset, device=device)

## 9. Save Results <a id='save'></a>

In [None]:
# Save metrics to JSON
metrics = {
    'model_name': 'SimpleCNN',
    'dataset': 'CIFAR-10',
    'training_time_minutes': training_time / 60,
    'num_epochs': num_epochs,
    'best_epoch': best_epoch,
    'total_parameters': total_params,
    'trainable_parameters': trainable_params,
    'final_metrics': {
        'accuracy': float(accuracy),
        'precision': float(precision),
        'recall': float(recall),
        'f1_score': float(f1)
    },
    'per_class_accuracy': {
        classes[i]: float(class_accuracy[i]) for i in range(len(classes))
    },
    'training_history': {
        'train_loss': [float(x) for x in history['train_loss']],
        'train_acc': [float(x) for x in history['train_acc']],
        'val_loss': [float(x) for x in history['val_loss']],
        'val_acc': [float(x) for x in history['val_acc']],
        'learning_rate': [float(x) for x in history['lr']]
    },
    'confusion_matrix': cm.tolist()
}

with open('../01_Image_Classification/results/metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print("✅ Metrics saved to metrics.json")
print("\n" + "="*50)
print("NOTEBOOK EXECUTION COMPLETE")
print("="*50)
print("\nSaved files:")
print("  - Model: ../01_Image_Classification/models/best_model.pt")
print("  - Metrics: ../01_Image_Classification/results/metrics.json")
print("  - Visualizations: ../01_Image_Classification/results/*.png")
print("\nFinal Accuracy: {:.2f}%".format(accuracy * 100))