In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F 
import torchvision
import torchvision.transforms as transforms

In [3]:
# ======================================================================
# CUSTOM CNN on CIFAR-10 (PyTorch) (256 batch size)
# ======================================================================

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F 
import torchvision
import torchvision.transforms as transforms

# ======================================================================
# SECTION 1: SETUP AND HYPERPARAMETERS
# ======================================================================
print("--- Setting up hyperparameters and device ---")

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Hyperparameters
EPOCHS = 20
BATCH_SIZE = 256
LEARNING_RATE = 0.001
NUM_CLASSES = 10           # CIFAR-10 has 10 classes
IMAGE_SIZE = 64            # Resize from 32x32 → 64x64

# ======================================================================
# SECTION 2: DATA LOADING AND AUGMENTATION
# ======================================================================
print("\n--- Preparing CIFAR-10 dataset ---")

train_transform = transforms.Compose([
    transforms.Resize(IMAGE_SIZE),
    transforms.RandomCrop(IMAGE_SIZE, padding=8),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465),
                         (0.2023, 0.1994, 0.2010))
])

test_transform = transforms.Compose([
    transforms.Resize(IMAGE_SIZE),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465),
                         (0.2023, 0.1994, 0.2010))
])

trainset = torchvision.datasets.CIFAR10(
    root='./data', train=True, download=True, transform=train_transform)
testset = torchvision.datasets.CIFAR10(
    root='./data', train=False, download=True, transform=test_transform)

trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(
    testset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

# ======================================================================
# SECTION 3: CUSTOM CNN MODEL DEFINITION
# ======================================================================

class CustomCNN(nn.Module):
    """Custom CNN architecture for CIFAR-10 (64x64)."""
    def __init__(self, num_classes=10):
        super(CustomCNN, self).__init__()
        
        # Block 1: Conv -> BN -> ReLU -> MaxPool
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )

        # Block 2
        self.layer2 = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )

        # Block 3
        self.layer3 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )

        # Fully connected layers
        self.classifier = nn.Sequential(
            nn.Linear(64 * 8 * 8, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

# ======================================================================
# SECTION 4: TRAINING AND EVALUATION FUNCTIONS
# ======================================================================

def train_model(model, trainloader, criterion, optimizer, epochs=10, device='cpu'):
    """Train a PyTorch model."""
    history = {'loss': [], 'acc': []}
    
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        correct_predictions = 0
        total_samples = 0
        
        print(f"\n--- Epoch {epoch+1}/{epochs} ---")
        for i, (inputs, labels) in enumerate(trainloader, 0):
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total_samples += labels.size(0)
            correct_predictions += (predicted == labels).sum().item()

        epoch_loss = running_loss / total_samples
        epoch_acc = (correct_predictions / total_samples) * 100
        history['loss'].append(epoch_loss)
        history['acc'].append(epoch_acc)
        print(f"Epoch Summary | Loss: {epoch_loss:.4f} | Accuracy: {epoch_acc:.2f}%")
    
    print('\n--- Finished Training ---')
    return history


def evaluate_model(model, testloader, criterion, device='cpu'):
    """Evaluate model performance on test set."""
    model.eval()
    running_loss = 0.0
    correct_predictions = 0
    total_samples = 0

    with torch.no_grad():
        for inputs, labels in testloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total_samples += labels.size(0)
            correct_predictions += (predicted == labels).sum().item()

    test_loss = running_loss / total_samples
    test_acc = (correct_predictions / total_samples) * 100
    print(f"\nTest Loss: {test_loss:.4f} | Test Accuracy: {test_acc:.2f}%")
    return test_loss, test_acc

# ======================================================================
# SECTION 5: TRAIN + EVALUATE PIPELINE
# ======================================================================

model = CustomCNN(num_classes=NUM_CLASSES).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

train_history = train_model(model, trainloader, criterion, optimizer,
                            epochs=EPOCHS, device=device)
evaluate_model(model, testloader, criterion, device=device)


--- Setting up hyperparameters and device ---
Using device: cuda:0

--- Preparing CIFAR-10 dataset ---
Files already downloaded and verified
Files already downloaded and verified

--- Epoch 1/20 ---
Epoch Summary | Loss: 1.7751 | Accuracy: 35.31%

--- Epoch 2/20 ---
Epoch Summary | Loss: 1.4656 | Accuracy: 45.93%

--- Epoch 3/20 ---
Epoch Summary | Loss: 1.3420 | Accuracy: 51.06%

--- Epoch 4/20 ---
Epoch Summary | Loss: 1.2499 | Accuracy: 54.81%

--- Epoch 5/20 ---
Epoch Summary | Loss: 1.1885 | Accuracy: 57.31%

--- Epoch 6/20 ---
Epoch Summary | Loss: 1.1433 | Accuracy: 58.97%

--- Epoch 7/20 ---
Epoch Summary | Loss: 1.1039 | Accuracy: 60.74%

--- Epoch 8/20 ---
Epoch Summary | Loss: 1.0767 | Accuracy: 61.54%

--- Epoch 9/20 ---
Epoch Summary | Loss: 1.0430 | Accuracy: 62.69%

--- Epoch 10/20 ---
Epoch Summary | Loss: 1.0225 | Accuracy: 63.75%

--- Epoch 11/20 ---
Epoch Summary | Loss: 1.0043 | Accuracy: 64.33%

--- Epoch 12/20 ---
Epoch Summary | Loss: 0.9852 | Accuracy: 65.23%

-

(0.7690620652198792, 73.50999999999999)

In [2]:
# ======================================================================
# CUSTOM CNN on CIFAR-10 (PyTorch) (256 batch size)
# ======================================================================

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F 
import torchvision
import torchvision.transforms as transforms

# ======================================================================
# SECTION 1: SETUP AND HYPERPARAMETERS
# ======================================================================
print("--- Setting up hyperparameters and device ---")

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Hyperparameters
EPOCHS = 20
BATCH_SIZE = 256
LEARNING_RATE = 0.005
NUM_CLASSES = 10           # CIFAR-10 has 10 classes
IMAGE_SIZE = 64            # Resize from 32x32 → 64x64

# ======================================================================
# SECTION 2: DATA LOADING AND AUGMENTATION
# ======================================================================
print("\n--- Preparing CIFAR-10 dataset ---")

train_transform = transforms.Compose([
    transforms.Resize(IMAGE_SIZE),
    transforms.RandomCrop(IMAGE_SIZE, padding=8),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465),
                         (0.2023, 0.1994, 0.2010))
])

test_transform = transforms.Compose([
    transforms.Resize(IMAGE_SIZE),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465),
                         (0.2023, 0.1994, 0.2010))
])

trainset = torchvision.datasets.CIFAR10(
    root='./data', train=True, download=True, transform=train_transform)
testset = torchvision.datasets.CIFAR10(
    root='./data', train=False, download=True, transform=test_transform)

trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(
    testset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

# ======================================================================
# SECTION 3: CUSTOM CNN MODEL DEFINITION
# ======================================================================

class CustomCNN(nn.Module):
    """Custom CNN architecture for CIFAR-10 (64x64)."""
    def __init__(self, num_classes=10):
        super(CustomCNN, self).__init__()
        
        # Block 1: Conv -> BN -> ReLU -> MaxPool
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )

        # Block 2
        self.layer2 = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )

        # Block 3
        self.layer3 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )

        # Fully connected layers
        self.classifier = nn.Sequential(
            nn.Linear(64 * 8 * 8, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

# ======================================================================
# SECTION 4: TRAINING AND EVALUATION FUNCTIONS
# ======================================================================

def train_model(model, trainloader, criterion, optimizer, epochs=10, device='cpu'):
    """Train a PyTorch model."""
    history = {'loss': [], 'acc': []}
    
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        correct_predictions = 0
        total_samples = 0
        
        print(f"\n--- Epoch {epoch+1}/{epochs} ---")
        for i, (inputs, labels) in enumerate(trainloader, 0):
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total_samples += labels.size(0)
            correct_predictions += (predicted == labels).sum().item()

        epoch_loss = running_loss / total_samples
        epoch_acc = (correct_predictions / total_samples) * 100
        history['loss'].append(epoch_loss)
        history['acc'].append(epoch_acc)
        print(f"Epoch Summary | Loss: {epoch_loss:.4f} | Accuracy: {epoch_acc:.2f}%")
    
    print('\n--- Finished Training ---')
    return history


def evaluate_model(model, testloader, criterion, device='cpu'):
    """Evaluate model performance on test set."""
    model.eval()
    running_loss = 0.0
    correct_predictions = 0
    total_samples = 0

    with torch.no_grad():
        for inputs, labels in testloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total_samples += labels.size(0)
            correct_predictions += (predicted == labels).sum().item()

    test_loss = running_loss / total_samples
    test_acc = (correct_predictions / total_samples) * 100
    print(f"\nTest Loss: {test_loss:.4f} | Test Accuracy: {test_acc:.2f}%")
    return test_loss, test_acc

# ======================================================================
# SECTION 5: TRAIN + EVALUATE PIPELINE
# ======================================================================

model = CustomCNN(num_classes=NUM_CLASSES).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

train_history = train_model(model, trainloader, criterion, optimizer,
                            epochs=EPOCHS, device=device)
evaluate_model(model, testloader, criterion, device=device)


--- Setting up hyperparameters and device ---
Using device: cuda:0

--- Preparing CIFAR-10 dataset ---
Files already downloaded and verified
Files already downloaded and verified

--- Epoch 1/20 ---
Epoch Summary | Loss: 2.3201 | Accuracy: 22.74%

--- Epoch 2/20 ---
Epoch Summary | Loss: 1.8456 | Accuracy: 29.40%

--- Epoch 3/20 ---
Epoch Summary | Loss: 1.7491 | Accuracy: 33.13%

--- Epoch 4/20 ---
Epoch Summary | Loss: 1.6814 | Accuracy: 36.43%

--- Epoch 5/20 ---
Epoch Summary | Loss: 1.6373 | Accuracy: 37.86%

--- Epoch 6/20 ---
Epoch Summary | Loss: 1.5992 | Accuracy: 39.38%

--- Epoch 7/20 ---
Epoch Summary | Loss: 1.5587 | Accuracy: 41.11%

--- Epoch 8/20 ---
Epoch Summary | Loss: 1.5268 | Accuracy: 42.82%

--- Epoch 9/20 ---
Epoch Summary | Loss: 1.4994 | Accuracy: 44.25%

--- Epoch 10/20 ---
Epoch Summary | Loss: 1.4771 | Accuracy: 45.33%

--- Epoch 11/20 ---
Epoch Summary | Loss: 1.4538 | Accuracy: 45.69%

--- Epoch 12/20 ---
Epoch Summary | Loss: 1.4323 | Accuracy: 46.92%

-

(1.1126872608184815, 61.019999999999996)

In [4]:
# ======================================================================
# CUSTOM CNN on CIFAR-10 (PyTorch) (256 batch size)
# ======================================================================

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F 
import torchvision
import torchvision.transforms as transforms

# ======================================================================
# SECTION 1: SETUP AND HYPERPARAMETERS
# ======================================================================
print("--- Setting up hyperparameters and device ---")

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Hyperparameters
EPOCHS = 20
BATCH_SIZE = 256
LEARNING_RATE = 0.0005
NUM_CLASSES = 10           # CIFAR-10 has 10 classes
IMAGE_SIZE = 64            # Resize from 32x32 → 64x64

# ======================================================================
# SECTION 2: DATA LOADING AND AUGMENTATION
# ======================================================================
print("\n--- Preparing CIFAR-10 dataset ---")

train_transform = transforms.Compose([
    transforms.Resize(IMAGE_SIZE),
    transforms.RandomCrop(IMAGE_SIZE, padding=8),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465),
                         (0.2023, 0.1994, 0.2010))
])

test_transform = transforms.Compose([
    transforms.Resize(IMAGE_SIZE),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465),
                         (0.2023, 0.1994, 0.2010))
])

trainset = torchvision.datasets.CIFAR10(
    root='./data', train=True, download=True, transform=train_transform)
testset = torchvision.datasets.CIFAR10(
    root='./data', train=False, download=True, transform=test_transform)

trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(
    testset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

# ======================================================================
# SECTION 3: CUSTOM CNN MODEL DEFINITION
# ======================================================================

class CustomCNN(nn.Module):
    """Custom CNN architecture for CIFAR-10 (64x64)."""
    def __init__(self, num_classes=10):
        super(CustomCNN, self).__init__()
        
        # Block 1: Conv -> BN -> ReLU -> MaxPool
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )

        # Block 2
        self.layer2 = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )

        # Block 3
        self.layer3 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )

        # Fully connected layers
        self.classifier = nn.Sequential(
            nn.Linear(64 * 8 * 8, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

# ======================================================================
# SECTION 4: TRAINING AND EVALUATION FUNCTIONS
# ======================================================================

def train_model(model, trainloader, criterion, optimizer, epochs=10, device='cpu'):
    """Train a PyTorch model."""
    history = {'loss': [], 'acc': []}
    
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        correct_predictions = 0
        total_samples = 0
        
        print(f"\n--- Epoch {epoch+1}/{epochs} ---")
        for i, (inputs, labels) in enumerate(trainloader, 0):
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total_samples += labels.size(0)
            correct_predictions += (predicted == labels).sum().item()

        epoch_loss = running_loss / total_samples
        epoch_acc = (correct_predictions / total_samples) * 100
        history['loss'].append(epoch_loss)
        history['acc'].append(epoch_acc)
        print(f"Epoch Summary | Loss: {epoch_loss:.4f} | Accuracy: {epoch_acc:.2f}%")
    
    print('\n--- Finished Training ---')
    return history


def evaluate_model(model, testloader, criterion, device='cpu'):
    """Evaluate model performance on test set."""
    model.eval()
    running_loss = 0.0
    correct_predictions = 0
    total_samples = 0

    with torch.no_grad():
        for inputs, labels in testloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total_samples += labels.size(0)
            correct_predictions += (predicted == labels).sum().item()

    test_loss = running_loss / total_samples
    test_acc = (correct_predictions / total_samples) * 100
    print(f"\nTest Loss: {test_loss:.4f} | Test Accuracy: {test_acc:.2f}%")
    return test_loss, test_acc

# ======================================================================
# SECTION 5: TRAIN + EVALUATE PIPELINE
# ======================================================================

model = CustomCNN(num_classes=NUM_CLASSES).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

train_history = train_model(model, trainloader, criterion, optimizer,
                            epochs=EPOCHS, device=device)
evaluate_model(model, testloader, criterion, device=device)


--- Setting up hyperparameters and device ---
Using device: cuda:0

--- Preparing CIFAR-10 dataset ---
Files already downloaded and verified
Files already downloaded and verified

--- Epoch 1/20 ---
Epoch Summary | Loss: 1.6751 | Accuracy: 38.54%

--- Epoch 2/20 ---
Epoch Summary | Loss: 1.4081 | Accuracy: 48.67%

--- Epoch 3/20 ---
Epoch Summary | Loss: 1.3022 | Accuracy: 52.82%

--- Epoch 4/20 ---
Epoch Summary | Loss: 1.2198 | Accuracy: 56.33%

--- Epoch 5/20 ---
Epoch Summary | Loss: 1.1669 | Accuracy: 58.04%

--- Epoch 6/20 ---
Epoch Summary | Loss: 1.1097 | Accuracy: 60.53%

--- Epoch 7/20 ---
Epoch Summary | Loss: 1.0783 | Accuracy: 61.45%

--- Epoch 8/20 ---
Epoch Summary | Loss: 1.0335 | Accuracy: 63.33%

--- Epoch 9/20 ---
Epoch Summary | Loss: 1.0054 | Accuracy: 64.44%

--- Epoch 10/20 ---
Epoch Summary | Loss: 0.9948 | Accuracy: 64.55%

--- Epoch 11/20 ---
Epoch Summary | Loss: 0.9594 | Accuracy: 65.97%

--- Epoch 12/20 ---
Epoch Summary | Loss: 0.9439 | Accuracy: 66.71%

-

(0.7219692619323731, 74.96000000000001)

In [None]:

# Hyperparameters
EPOCHS = 20
BATCH_SIZE = 256
LEARNING_RATE = 0.001
NUM_CLASSES = 10           # CIFAR-10 has 10 classes
IMAGE_SIZE = 64            # Resize from 32x32 → 64x64
# ======================================================================
# SECTION 5: TRAIN + EVALUATE PIPELINE
# ======================================================================

model = CustomCNN(num_classes=NUM_CLASSES).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=5e-4)


train_history = train_model(model, trainloader, criterion, optimizer,
                            epochs=EPOCHS, device=device)
evaluate_model(model, testloader, criterion, device=device)



--- Epoch 1/20 ---
Epoch Summary | Loss: 1.6866 | Accuracy: 38.15%

--- Epoch 2/20 ---
Epoch Summary | Loss: 1.4022 | Accuracy: 48.84%

--- Epoch 3/20 ---
Epoch Summary | Loss: 1.2644 | Accuracy: 54.04%

--- Epoch 4/20 ---
Epoch Summary | Loss: 1.1520 | Accuracy: 58.57%

--- Epoch 5/20 ---
Epoch Summary | Loss: 1.0859 | Accuracy: 61.31%

--- Epoch 6/20 ---
Epoch Summary | Loss: 1.0294 | Accuracy: 63.68%

--- Epoch 7/20 ---
Epoch Summary | Loss: 0.9774 | Accuracy: 65.27%

--- Epoch 8/20 ---
Epoch Summary | Loss: 0.9373 | Accuracy: 66.67%

--- Epoch 9/20 ---
Epoch Summary | Loss: 0.9041 | Accuracy: 67.87%

--- Epoch 10/20 ---
Epoch Summary | Loss: 0.8773 | Accuracy: 68.88%

--- Epoch 11/20 ---
Epoch Summary | Loss: 0.8479 | Accuracy: 69.98%

--- Epoch 12/20 ---
Epoch Summary | Loss: 0.8265 | Accuracy: 71.02%

--- Epoch 13/20 ---
Epoch Summary | Loss: 0.8071 | Accuracy: 71.55%

--- Epoch 14/20 ---
Epoch Summary | Loss: 0.7835 | Accuracy: 72.55%

--- Epoch 15/20 ---
Epoch Summary | Loss: 

(0.6450141662597656, 77.92)

In [8]:
# ======================================================================
# SECTION 5: TRAIN + EVALUATE PIPELINE (SGD + Scheduler, batch=256)
# ======================================================================

BATCH_SIZE = 256  # Update batch size

# Re-create DataLoaders with new batch size
trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(
    testset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

# Model
model = CustomCNN(num_classes=NUM_CLASSES).to(device)
criterion = nn.CrossEntropyLoss()

# SGD optimizer with momentum and weight decay
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=5e-4)

# StepLR scheduler: reduce LR by 0.1 every 10 epochs
from torch.optim.lr_scheduler import StepLR
scheduler = StepLR(optimizer, step_size=10, gamma=0.1)

# Training function with scheduler
def train_model_with_scheduler(model, trainloader, criterion, optimizer, scheduler, epochs=10, device='cpu'):
    history = {'loss': [], 'acc': []}
    
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        correct_predictions = 0
        total_samples = 0
        
        print(f"\n--- Epoch {epoch+1}/{epochs} ---")
        for i, (inputs, labels) in enumerate(trainloader, 0):
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total_samples += labels.size(0)
            correct_predictions += (predicted == labels).sum().item()

        # Step the scheduler at the end of each epoch
        scheduler.step()

        epoch_loss = running_loss / total_samples
        epoch_acc = (correct_predictions / total_samples) * 100
        history['loss'].append(epoch_loss)
        history['acc'].append(epoch_acc)
        print(f"Epoch Summary | Loss: {epoch_loss:.4f} | Accuracy: {epoch_acc:.2f}%")
    
    print('\n--- Finished Training ---')
    return history

# Train with scheduler
train_history = train_model_with_scheduler(model, trainloader, criterion, optimizer, scheduler,
                                           epochs=20, device=device)

# Evaluate model
evaluate_model(model, testloader, criterion, device=device)



--- Epoch 1/20 ---
Epoch Summary | Loss: 1.9742 | Accuracy: 27.70%

--- Epoch 2/20 ---
Epoch Summary | Loss: 1.6561 | Accuracy: 39.01%

--- Epoch 3/20 ---
Epoch Summary | Loss: 1.5276 | Accuracy: 43.76%

--- Epoch 4/20 ---
Epoch Summary | Loss: 1.4615 | Accuracy: 46.71%

--- Epoch 5/20 ---
Epoch Summary | Loss: 1.4137 | Accuracy: 48.68%

--- Epoch 6/20 ---
Epoch Summary | Loss: 1.3698 | Accuracy: 50.28%

--- Epoch 7/20 ---
Epoch Summary | Loss: 1.3334 | Accuracy: 52.06%

--- Epoch 8/20 ---
Epoch Summary | Loss: 1.2956 | Accuracy: 53.70%

--- Epoch 9/20 ---
Epoch Summary | Loss: 1.2587 | Accuracy: 54.78%

--- Epoch 10/20 ---
Epoch Summary | Loss: 1.2234 | Accuracy: 56.08%

--- Epoch 11/20 ---
Epoch Summary | Loss: 1.1885 | Accuracy: 57.65%

--- Epoch 12/20 ---
Epoch Summary | Loss: 1.1785 | Accuracy: 58.11%

--- Epoch 13/20 ---
Epoch Summary | Loss: 1.1731 | Accuracy: 58.20%

--- Epoch 14/20 ---
Epoch Summary | Loss: 1.1694 | Accuracy: 58.43%

--- Epoch 15/20 ---
Epoch Summary | Loss: 

(1.0292995738983155, 63.4)

In [9]:
# ======================================================================
# SECTION 5: TRAIN + EVALUATE PIPELINE (SGD + Scheduler, batch=256)
# ======================================================================

BATCH_SIZE = 256  # Update batch size

# Re-create DataLoaders with new batch size
trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(
    testset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

# Model
model = CustomCNN(num_classes=NUM_CLASSES).to(device)
criterion = nn.CrossEntropyLoss()

# SGD optimizer with momentum and weight decay
optimizer = optim.SGD(model.parameters(), lr=0.05, momentum=0.9, weight_decay=5e-4)

# StepLR scheduler: reduce LR by 0.1 every 10 epochs
from torch.optim.lr_scheduler import StepLR
scheduler = StepLR(optimizer, step_size=10, gamma=0.1)

# Training function with scheduler
def train_model_with_scheduler(model, trainloader, criterion, optimizer, scheduler, epochs=10, device='cpu'):
    history = {'loss': [], 'acc': []}
    
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        correct_predictions = 0
        total_samples = 0
        
        print(f"\n--- Epoch {epoch+1}/{epochs} ---")
        for i, (inputs, labels) in enumerate(trainloader, 0):
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total_samples += labels.size(0)
            correct_predictions += (predicted == labels).sum().item()

        # Step the scheduler at the end of each epoch
        scheduler.step()

        epoch_loss = running_loss / total_samples
        epoch_acc = (correct_predictions / total_samples) * 100
        history['loss'].append(epoch_loss)
        history['acc'].append(epoch_acc)
        print(f"Epoch Summary | Loss: {epoch_loss:.4f} | Accuracy: {epoch_acc:.2f}%")
    
    print('\n--- Finished Training ---')
    return history

# Train with scheduler
train_history = train_model_with_scheduler(model, trainloader, criterion, optimizer, scheduler,
                                           epochs=20, device=device)

# Evaluate model
evaluate_model(model, testloader, criterion, device=device)



--- Epoch 1/20 ---
Epoch Summary | Loss: 1.7536 | Accuracy: 35.31%

--- Epoch 2/20 ---
Epoch Summary | Loss: 1.4960 | Accuracy: 45.07%

--- Epoch 3/20 ---
Epoch Summary | Loss: 1.2910 | Accuracy: 53.45%

--- Epoch 4/20 ---
Epoch Summary | Loss: 1.1791 | Accuracy: 58.02%

--- Epoch 5/20 ---
Epoch Summary | Loss: 1.0859 | Accuracy: 61.60%

--- Epoch 6/20 ---
Epoch Summary | Loss: 1.0212 | Accuracy: 64.15%

--- Epoch 7/20 ---
Epoch Summary | Loss: 0.9776 | Accuracy: 65.64%

--- Epoch 8/20 ---
Epoch Summary | Loss: 0.9385 | Accuracy: 67.00%

--- Epoch 9/20 ---
Epoch Summary | Loss: 0.9119 | Accuracy: 68.24%

--- Epoch 10/20 ---
Epoch Summary | Loss: 0.8827 | Accuracy: 69.08%

--- Epoch 11/20 ---
Epoch Summary | Loss: 0.7840 | Accuracy: 72.79%

--- Epoch 12/20 ---
Epoch Summary | Loss: 0.7513 | Accuracy: 74.01%

--- Epoch 13/20 ---
Epoch Summary | Loss: 0.7474 | Accuracy: 74.10%

--- Epoch 14/20 ---
Epoch Summary | Loss: 0.7292 | Accuracy: 74.64%

--- Epoch 15/20 ---
Epoch Summary | Loss: 

(0.6089787331581116, 79.06)

In [10]:
# ======================================================================
# CUSTOM CNN on CIFAR-10 (PyTorch) (256 batch size)
# ======================================================================

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F 
import torchvision
import torchvision.transforms as transforms

# ======================================================================
# SECTION 1: SETUP AND HYPERPARAMETERS
# ======================================================================
print("--- Setting up hyperparameters and device ---")

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Hyperparameters
EPOCHS = 20
BATCH_SIZE = 256
LEARNING_RATE = 0.001
NUM_CLASSES = 10           # CIFAR-10 has 10 classes
IMAGE_SIZE = 64            # Resize from 32x32 → 64x64

# ======================================================================
# SECTION 2: DATA LOADING AND AUGMENTATION
# ======================================================================
print("\n--- Preparing CIFAR-10 dataset ---")

train_transform = transforms.Compose([
    transforms.Resize(IMAGE_SIZE),
    transforms.RandomCrop(IMAGE_SIZE, padding=8),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465),
                         (0.2023, 0.1994, 0.2010))
])

test_transform = transforms.Compose([
    transforms.Resize(IMAGE_SIZE),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465),
                         (0.2023, 0.1994, 0.2010))
])

trainset = torchvision.datasets.CIFAR10(
    root='./data', train=True, download=True, transform=train_transform)
testset = torchvision.datasets.CIFAR10(
    root='./data', train=False, download=True, transform=test_transform)

trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(
    testset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

# ======================================================================
# SECTION 3: CUSTOM CNN MODEL DEFINITION
# ======================================================================

class CustomCNN(nn.Module):
    """Custom CNN architecture for CIFAR-10 (64x64)."""
    def __init__(self, num_classes=10):
        super(CustomCNN, self).__init__()
        
        # Block 1: Conv -> BN -> ReLU -> MaxPool
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )

        # Block 2
        self.layer2 = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )

        # Block 3
        self.layer3 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )

        # Fully connected layers
        self.classifier = nn.Sequential(
            nn.Linear(64 * 8 * 8, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

# ======================================================================
# SECTION 4: TRAINING AND EVALUATION FUNCTIONS
# ======================================================================

def train_model(model, trainloader, criterion, optimizer, epochs=10, device='cpu'):
    """Train a PyTorch model."""
    history = {'loss': [], 'acc': []}
    
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        correct_predictions = 0
        total_samples = 0
        
        print(f"\n--- Epoch {epoch+1}/{epochs} ---")
        for i, (inputs, labels) in enumerate(trainloader, 0):
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total_samples += labels.size(0)
            correct_predictions += (predicted == labels).sum().item()

        epoch_loss = running_loss / total_samples
        epoch_acc = (correct_predictions / total_samples) * 100
        history['loss'].append(epoch_loss)
        history['acc'].append(epoch_acc)
        print(f"Epoch Summary | Loss: {epoch_loss:.4f} | Accuracy: {epoch_acc:.2f}%")
    
    print('\n--- Finished Training ---')
    return history


def evaluate_model(model, testloader, criterion, device='cpu'):
    """Evaluate model performance on test set."""
    model.eval()
    running_loss = 0.0
    correct_predictions = 0
    total_samples = 0

    with torch.no_grad():
        for inputs, labels in testloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total_samples += labels.size(0)
            correct_predictions += (predicted == labels).sum().item()

    test_loss = running_loss / total_samples
    test_acc = (correct_predictions / total_samples) * 100
    print(f"\nTest Loss: {test_loss:.4f} | Test Accuracy: {test_acc:.2f}%")
    return test_loss, test_acc

# ======================================================================
# SECTION 5: TRAIN + EVALUATE PIPELINE (RMSProp)
# ======================================================================

model = CustomCNN(num_classes=NUM_CLASSES).to(device)
criterion = nn.CrossEntropyLoss()

# Use RMSProp optimizer
optimizer = optim.RMSprop(model.parameters(), lr=LEARNING_RATE, alpha=0.9, weight_decay=5e-4, momentum=0.9)

train_history = train_model(model, trainloader, criterion, optimizer,
                            epochs=EPOCHS, device=device)
evaluate_model(model, testloader, criterion, device=device)


--- Setting up hyperparameters and device ---
Using device: cuda:0

--- Preparing CIFAR-10 dataset ---
Files already downloaded and verified
Files already downloaded and verified

--- Epoch 1/20 ---
Epoch Summary | Loss: 2.7925 | Accuracy: 27.36%

--- Epoch 2/20 ---
Epoch Summary | Loss: 1.6178 | Accuracy: 41.29%

--- Epoch 3/20 ---
Epoch Summary | Loss: 1.4218 | Accuracy: 48.23%

--- Epoch 4/20 ---
Epoch Summary | Loss: 1.3317 | Accuracy: 51.85%

--- Epoch 5/20 ---
Epoch Summary | Loss: 1.2769 | Accuracy: 54.39%

--- Epoch 6/20 ---
Epoch Summary | Loss: 1.2184 | Accuracy: 56.69%

--- Epoch 7/20 ---
Epoch Summary | Loss: 1.1679 | Accuracy: 58.76%

--- Epoch 8/20 ---
Epoch Summary | Loss: 1.1360 | Accuracy: 59.82%

--- Epoch 9/20 ---
Epoch Summary | Loss: 1.1386 | Accuracy: 59.49%

--- Epoch 10/20 ---
Epoch Summary | Loss: 1.1165 | Accuracy: 60.43%

--- Epoch 11/20 ---
Epoch Summary | Loss: 1.1025 | Accuracy: 61.03%

--- Epoch 12/20 ---
Epoch Summary | Loss: 1.0922 | Accuracy: 61.30%

-

(1.043992974472046, 64.03999999999999)

In [11]:
# ======================================================================
# CUSTOM CNN on CIFAR-10 (PyTorch) (256 batch size)
# ======================================================================

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F 
import torchvision
import torchvision.transforms as transforms

# ======================================================================
# SECTION 1: SETUP AND HYPERPARAMETERS
# ======================================================================
print("--- Setting up hyperparameters and device ---")

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Hyperparameters
EPOCHS = 20
BATCH_SIZE = 256
LEARNING_RATE = 0.001
NUM_CLASSES = 10           # CIFAR-10 has 10 classes
IMAGE_SIZE = 64            # Resize from 32x32 → 64x64

# ======================================================================
# SECTION 2: DATA LOADING AND AUGMENTATION
# ======================================================================
print("\n--- Preparing CIFAR-10 dataset ---")

train_transform = transforms.Compose([
    transforms.Resize(IMAGE_SIZE),
    transforms.RandomCrop(IMAGE_SIZE, padding=8),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465),
                         (0.2023, 0.1994, 0.2010))
])

test_transform = transforms.Compose([
    transforms.Resize(IMAGE_SIZE),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465),
                         (0.2023, 0.1994, 0.2010))
])

trainset = torchvision.datasets.CIFAR10(
    root='./data', train=True, download=True, transform=train_transform)
testset = torchvision.datasets.CIFAR10(
    root='./data', train=False, download=True, transform=test_transform)

trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(
    testset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

# ======================================================================
# SECTION 3: CUSTOM CNN MODEL DEFINITION
# ======================================================================

class CustomCNN(nn.Module):
    """Custom CNN architecture for CIFAR-10 (64x64)."""
    def __init__(self, num_classes=10):
        super(CustomCNN, self).__init__()
        
        # Block 1: Conv -> BN -> ReLU -> MaxPool
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )

        # Block 2
        self.layer2 = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )

        # Block 3
        self.layer3 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )

        # Fully connected layers
        self.classifier = nn.Sequential(
            nn.Linear(64 * 8 * 8, 512),
            nn.ReLU(),
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

# ======================================================================
# SECTION 4: TRAINING AND EVALUATION FUNCTIONS
# ======================================================================

def train_model(model, trainloader, criterion, optimizer, epochs=10, device='cpu'):
    """Train a PyTorch model."""
    history = {'loss': [], 'acc': []}
    
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        correct_predictions = 0
        total_samples = 0
        
        print(f"\n--- Epoch {epoch+1}/{epochs} ---")
        for i, (inputs, labels) in enumerate(trainloader, 0):
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total_samples += labels.size(0)
            correct_predictions += (predicted == labels).sum().item()

        epoch_loss = running_loss / total_samples
        epoch_acc = (correct_predictions / total_samples) * 100
        history['loss'].append(epoch_loss)
        history['acc'].append(epoch_acc)
        print(f"Epoch Summary | Loss: {epoch_loss:.4f} | Accuracy: {epoch_acc:.2f}%")
    
    print('\n--- Finished Training ---')
    return history


def evaluate_model(model, testloader, criterion, device='cpu'):
    """Evaluate model performance on test set."""
    model.eval()
    running_loss = 0.0
    correct_predictions = 0
    total_samples = 0

    with torch.no_grad():
        for inputs, labels in testloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total_samples += labels.size(0)
            correct_predictions += (predicted == labels).sum().item()

    test_loss = running_loss / total_samples
    test_acc = (correct_predictions / total_samples) * 100
    print(f"\nTest Loss: {test_loss:.4f} | Test Accuracy: {test_acc:.2f}%")
    return test_loss, test_acc

# ======================================================================
# SECTION 5: TRAIN + EVALUATE PIPELINE (RMSProp)
# ======================================================================

model = CustomCNN(num_classes=NUM_CLASSES).to(device)
criterion = nn.CrossEntropyLoss()

# Use RMSProp optimizer
optimizer = optim.RMSprop(model.parameters(), lr=LEARNING_RATE, alpha=0.9, weight_decay=5e-4, momentum=0.9)

train_history = train_model(model, trainloader, criterion, optimizer,
                            epochs=EPOCHS, device=device)
evaluate_model(model, testloader, criterion, device=device)


--- Setting up hyperparameters and device ---
Using device: cuda:0

--- Preparing CIFAR-10 dataset ---
Files already downloaded and verified
Files already downloaded and verified

--- Epoch 1/20 ---
Epoch Summary | Loss: 2.8296 | Accuracy: 33.46%

--- Epoch 2/20 ---
Epoch Summary | Loss: 1.4674 | Accuracy: 46.85%

--- Epoch 3/20 ---
Epoch Summary | Loss: 1.2817 | Accuracy: 53.44%

--- Epoch 4/20 ---
Epoch Summary | Loss: 1.1758 | Accuracy: 57.87%

--- Epoch 5/20 ---
Epoch Summary | Loss: 1.1191 | Accuracy: 59.97%

--- Epoch 6/20 ---
Epoch Summary | Loss: 1.0554 | Accuracy: 62.18%

--- Epoch 7/20 ---
Epoch Summary | Loss: 1.0168 | Accuracy: 64.10%

--- Epoch 8/20 ---
Epoch Summary | Loss: 0.9869 | Accuracy: 65.08%

--- Epoch 9/20 ---
Epoch Summary | Loss: 0.9679 | Accuracy: 65.87%

--- Epoch 10/20 ---
Epoch Summary | Loss: 0.9535 | Accuracy: 66.37%

--- Epoch 11/20 ---
Epoch Summary | Loss: 0.9440 | Accuracy: 66.90%

--- Epoch 12/20 ---
Epoch Summary | Loss: 0.9345 | Accuracy: 66.95%

-

(1.268196096420288, 60.01)

In [12]:
# ======================================================================
# CUSTOM CNN on CIFAR-10 (PyTorch) (256 batch size)
# ======================================================================

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F 
import torchvision
import torchvision.transforms as transforms

# ======================================================================
# SECTION 1: SETUP AND HYPERPARAMETERS
# ======================================================================
print("--- Setting up hyperparameters and device ---")

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Hyperparameters
EPOCHS = 20
BATCH_SIZE = 256
LEARNING_RATE = 0.0005
NUM_CLASSES = 10           # CIFAR-10 has 10 classes
IMAGE_SIZE = 64            # Resize from 32x32 → 64x64

# ======================================================================
# SECTION 2: DATA LOADING AND AUGMENTATION
# ======================================================================
print("\n--- Preparing CIFAR-10 dataset ---")

train_transform = transforms.Compose([
    transforms.Resize(IMAGE_SIZE),
    transforms.RandomCrop(IMAGE_SIZE, padding=8),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465),
                         (0.2023, 0.1994, 0.2010))
])

test_transform = transforms.Compose([
    transforms.Resize(IMAGE_SIZE),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465),
                         (0.2023, 0.1994, 0.2010))
])

trainset = torchvision.datasets.CIFAR10(
    root='./data', train=True, download=True, transform=train_transform)
testset = torchvision.datasets.CIFAR10(
    root='./data', train=False, download=True, transform=test_transform)

trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(
    testset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

# ======================================================================
# SECTION 3: CUSTOM CNN MODEL DEFINITION
# ======================================================================

class CustomCNN(nn.Module):
    """Custom CNN architecture for CIFAR-10 (64x64)."""
    def __init__(self, num_classes=10):
        super(CustomCNN, self).__init__()
        
        # Block 1: Conv -> BN -> ReLU -> MaxPool
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )

        # Block 2
        self.layer2 = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )

        # Block 3
        self.layer3 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )

        # Fully connected layers
        self.classifier = nn.Sequential(
            nn.Linear(64 * 8 * 8, 512),
            nn.ReLU(),
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

# ======================================================================
# SECTION 4: TRAINING AND EVALUATION FUNCTIONS
# ======================================================================

def train_model(model, trainloader, criterion, optimizer, epochs=10, device='cpu'):
    """Train a PyTorch model."""
    history = {'loss': [], 'acc': []}
    
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        correct_predictions = 0
        total_samples = 0
        
        print(f"\n--- Epoch {epoch+1}/{epochs} ---")
        for i, (inputs, labels) in enumerate(trainloader, 0):
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total_samples += labels.size(0)
            correct_predictions += (predicted == labels).sum().item()

        epoch_loss = running_loss / total_samples
        epoch_acc = (correct_predictions / total_samples) * 100
        history['loss'].append(epoch_loss)
        history['acc'].append(epoch_acc)
        print(f"Epoch Summary | Loss: {epoch_loss:.4f} | Accuracy: {epoch_acc:.2f}%")
    
    print('\n--- Finished Training ---')
    return history


def evaluate_model(model, testloader, criterion, device='cpu'):
    """Evaluate model performance on test set."""
    model.eval()
    running_loss = 0.0
    correct_predictions = 0
    total_samples = 0

    with torch.no_grad():
        for inputs, labels in testloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total_samples += labels.size(0)
            correct_predictions += (predicted == labels).sum().item()

    test_loss = running_loss / total_samples
    test_acc = (correct_predictions / total_samples) * 100
    print(f"\nTest Loss: {test_loss:.4f} | Test Accuracy: {test_acc:.2f}%")
    return test_loss, test_acc

# ======================================================================
# SECTION 5: TRAIN + EVALUATE PIPELINE
# ======================================================================

model = CustomCNN(num_classes=NUM_CLASSES).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

train_history = train_model(model, trainloader, criterion, optimizer,
                            epochs=EPOCHS, device=device)
evaluate_model(model, testloader, criterion, device=device)


--- Setting up hyperparameters and device ---
Using device: cuda:0

--- Preparing CIFAR-10 dataset ---
Files already downloaded and verified
Files already downloaded and verified

--- Epoch 1/20 ---
Epoch Summary | Loss: 1.5392 | Accuracy: 44.02%

--- Epoch 2/20 ---
Epoch Summary | Loss: 1.2462 | Accuracy: 55.27%

--- Epoch 3/20 ---
Epoch Summary | Loss: 1.1152 | Accuracy: 60.24%

--- Epoch 4/20 ---
Epoch Summary | Loss: 1.0248 | Accuracy: 63.44%

--- Epoch 5/20 ---
Epoch Summary | Loss: 0.9604 | Accuracy: 66.17%

--- Epoch 6/20 ---
Epoch Summary | Loss: 0.9040 | Accuracy: 68.12%

--- Epoch 7/20 ---
Epoch Summary | Loss: 0.8604 | Accuracy: 69.60%

--- Epoch 8/20 ---
Epoch Summary | Loss: 0.8188 | Accuracy: 70.99%

--- Epoch 9/20 ---
Epoch Summary | Loss: 0.7981 | Accuracy: 71.97%

--- Epoch 10/20 ---
Epoch Summary | Loss: 0.7762 | Accuracy: 72.67%

--- Epoch 11/20 ---
Epoch Summary | Loss: 0.7418 | Accuracy: 74.16%

--- Epoch 12/20 ---
Epoch Summary | Loss: 0.7219 | Accuracy: 74.46%

-

(0.6180855454444886, 78.32000000000001)

In [13]:
# ======================================================================
# SECTION 5: TRAIN + EVALUATE PIPELINE (SGD + Scheduler, batch=256)
# ======================================================================

BATCH_SIZE = 256  # Update batch size

# Re-create DataLoaders with new batch size
trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(
    testset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

# Model
model = CustomCNN(num_classes=NUM_CLASSES).to(device)
criterion = nn.CrossEntropyLoss()

# SGD optimizer with momentum and weight decay
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=5e-4)

# StepLR scheduler: reduce LR by 0.1 every 10 epochs
from torch.optim.lr_scheduler import StepLR
scheduler = StepLR(optimizer, step_size=10, gamma=0.1)

# Training function with scheduler
def train_model_with_scheduler(model, trainloader, criterion, optimizer, scheduler, epochs=10, device='cpu'):
    history = {'loss': [], 'acc': []}
    
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        correct_predictions = 0
        total_samples = 0
        
        print(f"\n--- Epoch {epoch+1}/{epochs} ---")
        for i, (inputs, labels) in enumerate(trainloader, 0):
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total_samples += labels.size(0)
            correct_predictions += (predicted == labels).sum().item()

        # Step the scheduler at the end of each epoch
        scheduler.step()

        epoch_loss = running_loss / total_samples
        epoch_acc = (correct_predictions / total_samples) * 100
        history['loss'].append(epoch_loss)
        history['acc'].append(epoch_acc)
        print(f"Epoch Summary | Loss: {epoch_loss:.4f} | Accuracy: {epoch_acc:.2f}%")
    
    print('\n--- Finished Training ---')
    return history

# Train with scheduler
train_history = train_model_with_scheduler(model, trainloader, criterion, optimizer, scheduler,
                                           epochs=20, device=device)

# Evaluate model
evaluate_model(model, testloader, criterion, device=device)



--- Epoch 1/20 ---
Epoch Summary | Loss: 1.9146 | Accuracy: 31.53%

--- Epoch 2/20 ---
Epoch Summary | Loss: 1.5725 | Accuracy: 43.33%

--- Epoch 3/20 ---
Epoch Summary | Loss: 1.4489 | Accuracy: 47.30%

--- Epoch 4/20 ---
Epoch Summary | Loss: 1.3763 | Accuracy: 50.49%

--- Epoch 5/20 ---
Epoch Summary | Loss: 1.3211 | Accuracy: 52.83%

--- Epoch 6/20 ---
Epoch Summary | Loss: 1.2662 | Accuracy: 54.73%

--- Epoch 7/20 ---
Epoch Summary | Loss: 1.2233 | Accuracy: 56.50%

--- Epoch 8/20 ---
Epoch Summary | Loss: 1.1782 | Accuracy: 58.25%

--- Epoch 9/20 ---
Epoch Summary | Loss: 1.1465 | Accuracy: 59.50%

--- Epoch 10/20 ---
Epoch Summary | Loss: 1.1150 | Accuracy: 60.51%

--- Epoch 11/20 ---
Epoch Summary | Loss: 1.0687 | Accuracy: 62.60%

--- Epoch 12/20 ---
Epoch Summary | Loss: 1.0696 | Accuracy: 62.44%

--- Epoch 13/20 ---
Epoch Summary | Loss: 1.0628 | Accuracy: 62.64%

--- Epoch 14/20 ---
Epoch Summary | Loss: 1.0579 | Accuracy: 62.81%

--- Epoch 15/20 ---
Epoch Summary | Loss: 

(0.9588596880912781, 66.64)

In [14]:
# Hyperparameters
EPOCHS = 20
BATCH_SIZE = 256
LEARNING_RATE = 0.001
NUM_CLASSES = 10           # CIFAR-10 has 10 classes
IMAGE_SIZE = 64            # Resize from 32x32 → 64x64
# ======================================================================
# SECTION 5: TRAIN + EVALUATE PIPELINE
# ======================================================================

model = CustomCNN(num_classes=NUM_CLASSES).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)


train_history = train_model(model, trainloader, criterion, optimizer,
                            epochs=EPOCHS, device=device)
evaluate_model(model, testloader, criterion, device=device)



--- Epoch 1/20 ---
Epoch Summary | Loss: 1.5937 | Accuracy: 41.50%

--- Epoch 2/20 ---
Epoch Summary | Loss: 1.2696 | Accuracy: 53.61%

--- Epoch 3/20 ---
Epoch Summary | Loss: 1.0955 | Accuracy: 60.68%

--- Epoch 4/20 ---
Epoch Summary | Loss: 1.0090 | Accuracy: 63.83%

--- Epoch 5/20 ---
Epoch Summary | Loss: 0.9226 | Accuracy: 67.23%

--- Epoch 6/20 ---
Epoch Summary | Loss: 0.8718 | Accuracy: 69.02%

--- Epoch 7/20 ---
Epoch Summary | Loss: 0.8225 | Accuracy: 70.82%

--- Epoch 8/20 ---
Epoch Summary | Loss: 0.7812 | Accuracy: 72.54%

--- Epoch 9/20 ---
Epoch Summary | Loss: 0.7479 | Accuracy: 73.74%

--- Epoch 10/20 ---
Epoch Summary | Loss: 0.7205 | Accuracy: 74.75%

--- Epoch 11/20 ---
Epoch Summary | Loss: 0.6986 | Accuracy: 75.28%

--- Epoch 12/20 ---
Epoch Summary | Loss: 0.6709 | Accuracy: 76.39%

--- Epoch 13/20 ---
Epoch Summary | Loss: 0.6442 | Accuracy: 77.45%

--- Epoch 14/20 ---
Epoch Summary | Loss: 0.6302 | Accuracy: 77.82%

--- Epoch 15/20 ---
Epoch Summary | Loss: 

(0.5634178160667419, 80.64)