In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchvision.models import vgg16, VGG16_Weights
import matplotlib.pyplot as plt
import numpy as np
import time


In [3]:
# ======================================================================
# CUSTOM CNN on CIFAR-10 (PyTorch)
# ======================================================================

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F 
import torchvision
import torchvision.transforms as transforms

# ======================================================================
# SECTION 1: SETUP AND HYPERPARAMETERS
# ======================================================================
print("--- Setting up hyperparameters and device ---")

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Hyperparameters
EPOCHS = 20
BATCH_SIZE = 128
LEARNING_RATE = 0.001
NUM_CLASSES = 10           # CIFAR-10 has 10 classes
IMAGE_SIZE = 64            # Resize from 32x32 → 64x64

# ======================================================================
# SECTION 2: DATA LOADING AND AUGMENTATION
# ======================================================================
print("\n--- Preparing CIFAR-10 dataset ---")

train_transform = transforms.Compose([
    transforms.Resize(IMAGE_SIZE),
    transforms.RandomCrop(IMAGE_SIZE, padding=8),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465),
                         (0.2023, 0.1994, 0.2010))
])

test_transform = transforms.Compose([
    transforms.Resize(IMAGE_SIZE),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465),
                         (0.2023, 0.1994, 0.2010))
])

trainset = torchvision.datasets.CIFAR10(
    root='./data', train=True, download=True, transform=train_transform)
testset = torchvision.datasets.CIFAR10(
    root='./data', train=False, download=True, transform=test_transform)

trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(
    testset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

# ======================================================================
# SECTION 3: CUSTOM CNN MODEL DEFINITION
# ======================================================================

class CustomCNN(nn.Module):
    """Custom CNN architecture for CIFAR-10 (64x64)."""
    def __init__(self, num_classes=10):
        super(CustomCNN, self).__init__()
        
        # Block 1: Conv -> BN -> ReLU -> MaxPool
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )

        # Block 2
        self.layer2 = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )

        # Block 3
        self.layer3 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )

        # Fully connected layers
        self.classifier = nn.Sequential(
            nn.Linear(64 * 8 * 8, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

# ======================================================================
# SECTION 4: TRAINING AND EVALUATION FUNCTIONS
# ======================================================================

def train_model(model, trainloader, criterion, optimizer, epochs=10, device='cpu'):
    """Train a PyTorch model."""
    history = {'loss': [], 'acc': []}
    
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        correct_predictions = 0
        total_samples = 0
        
        print(f"\n--- Epoch {epoch+1}/{epochs} ---")
        for i, (inputs, labels) in enumerate(trainloader, 0):
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total_samples += labels.size(0)
            correct_predictions += (predicted == labels).sum().item()

        epoch_loss = running_loss / total_samples
        epoch_acc = (correct_predictions / total_samples) * 100
        history['loss'].append(epoch_loss)
        history['acc'].append(epoch_acc)
        print(f"Epoch Summary | Loss: {epoch_loss:.4f} | Accuracy: {epoch_acc:.2f}%")
    
    print('\n--- Finished Training ---')
    return history


def evaluate_model(model, testloader, criterion, device='cpu'):
    """Evaluate model performance on test set."""
    model.eval()
    running_loss = 0.0
    correct_predictions = 0
    total_samples = 0

    with torch.no_grad():
        for inputs, labels in testloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total_samples += labels.size(0)
            correct_predictions += (predicted == labels).sum().item()

    test_loss = running_loss / total_samples
    test_acc = (correct_predictions / total_samples) * 100
    print(f"\nTest Loss: {test_loss:.4f} | Test Accuracy: {test_acc:.2f}%")
    return test_loss, test_acc

# ======================================================================
# SECTION 5: TRAIN + EVALUATE PIPELINE
# ======================================================================

model = CustomCNN(num_classes=NUM_CLASSES).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

train_history = train_model(model, trainloader, criterion, optimizer,
                            epochs=EPOCHS, device=device)
evaluate_model(model, testloader, criterion, device=device)


--- Setting up hyperparameters and device ---
Using device: cuda:0

--- Preparing CIFAR-10 dataset ---
Files already downloaded and verified
Files already downloaded and verified

--- Epoch 1/20 ---
Epoch Summary | Loss: 1.7058 | Accuracy: 37.67%

--- Epoch 2/20 ---
Epoch Summary | Loss: 1.4588 | Accuracy: 46.56%

--- Epoch 3/20 ---
Epoch Summary | Loss: 1.3365 | Accuracy: 51.34%

--- Epoch 4/20 ---
Epoch Summary | Loss: 1.2391 | Accuracy: 55.37%

--- Epoch 5/20 ---
Epoch Summary | Loss: 1.1668 | Accuracy: 58.15%

--- Epoch 6/20 ---
Epoch Summary | Loss: 1.1141 | Accuracy: 60.24%

--- Epoch 7/20 ---
Epoch Summary | Loss: 1.0720 | Accuracy: 61.98%

--- Epoch 8/20 ---
Epoch Summary | Loss: 1.0389 | Accuracy: 63.40%

--- Epoch 9/20 ---
Epoch Summary | Loss: 1.0068 | Accuracy: 64.54%

--- Epoch 10/20 ---
Epoch Summary | Loss: 0.9756 | Accuracy: 65.43%

--- Epoch 11/20 ---
Epoch Summary | Loss: 0.9630 | Accuracy: 66.22%

--- Epoch 12/20 ---
Epoch Summary | Loss: 0.9340 | Accuracy: 67.42%

-

(0.7330012047767639, 75.17)

In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
import torchvision
import torchvision.transforms as transforms
from torchvision.models import resnet18

# ==============================================================================
# SECTION 1: SETUP AND HYPERPARAMETERS RESNET-18
# ==============================================================================
print("--- Setting up hyperparameters and device ---")
# Set the device to a GPU if available, otherwise use the CPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Define hyperparameters
# NOTE: Training from scratch requires more epochs to converge.
# 40-50 epochs is a good starting point.
EPOCHS = 20
BATCH_SIZE = 128
LEARNING_RATE = 0.01 # Initial learning rate for SGD
NUM_CLASSES = 10     # For CIFAR-10


# ==============================================================================
# SECTION 2: DATA LOADING AND AUGMENTATION
# ==============================================================================
print("\n--- Preparing CIFAR-10 dataset ---")
# Define data augmentation and normalization for the training set
train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

# Define normalization for the test set (no augmentation)
test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

# Load the datasets
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=train_transform)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=test_transform)

# Create the data loaders
trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(testset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)


# ==============================================================================
# SECTION 3: MODEL AND HELPER FUNCTIONS
# ==============================================================================

def create_resnet18_from_scratch(num_classes=10):
    """
    Creates a ResNet-18 model with random initial weights (trained from scratch).
    """
    print("Initializing ResNet-18 model with random weights.")
    # Load a ResNet18 model with weights=None for random initialization
    model = resnet18(weights=None, num_classes=num_classes)
    
    # --- MODIFICATION FOR CIFAR-10 ---
    # To improve performance on small 32x32 images, it's highly recommended
    # to make the first layer less aggressive.
    model.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
    model.maxpool = nn.Identity()
    # --- END OF MODIFICATION ---
    
    return model

def train_model(model, trainloader, criterion, optimizer, scheduler=None, epochs=20, device='cpu'):
    """Function to train a PyTorch model."""
    history = {'loss': [], 'accuracy': []}
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        correct_predictions = 0
        total_samples = 0
        print(f"\n--- Epoch {epoch+1}/{epochs} ---")
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total_samples += labels.size(0)
            correct_predictions += (predicted == labels).sum().item()
        if scheduler:
            scheduler.step()
        epoch_loss = running_loss / total_samples
        epoch_acc = (correct_predictions / total_samples) * 100
        history['loss'].append(epoch_loss)
        history['accuracy'].append(epoch_acc)
        print(f"Epoch Summary | Loss: {epoch_loss:.4f} | Accuracy: {epoch_acc:.2f}%")
    print('\n--- Finished Training ---')
    return history

def evaluate_model(model, testloader, criterion, device='cpu'):
    """Function to evaluate a PyTorch model's performance."""
    model.eval()
    running_loss = 0.0
    correct_predictions = 0
    total_samples = 0
    with torch.no_grad():
        for data in testloader:
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total_samples += labels.size(0)
            correct_predictions += (predicted == labels).sum().item()
    final_loss = running_loss / total_samples
    final_acc = (correct_predictions / total_samples) * 100
    return final_loss, final_acc


# ==============================================================================
# SECTION 4: MAIN EXECUTION BLOCK
# ==============================================================================

print("\n--- Initializing Model, Loss, and Optimizer ---")
# Create the model FROM SCRATCH and move it to the configured device
model_from_scratch = create_resnet18_from_scratch(num_classes=NUM_CLASSES)
model_from_scratch.to(device)

# Define the loss function
criterion = nn.CrossEntropyLoss()

# Define the optimizer to train ALL model parameters
optimizer = optim.SGD(model_from_scratch.parameters(), lr=LEARNING_RATE, momentum=0.9, weight_decay=5e-4)

# Define a learning rate scheduler
scheduler = StepLR(optimizer, step_size=15, gamma=0.1) # Adjusted step_size for more epochs

# --- Start Training ---
print("\n--- Training ResNet-18 from scratch ---")
history = train_model(model_from_scratch, trainloader, criterion, optimizer, scheduler=scheduler, epochs=EPOCHS, device=device)

# --- Evaluate the Model ---
print("\n--- Evaluating model on the test set ---")
final_loss, final_acc = evaluate_model(model_from_scratch, testloader, criterion, device=device)

print(f"\nFinal Test Results:")
print(f"Loss: {final_loss:.4f}")
print(f"Accuracy: {final_acc:.2f}%")

--- Setting up hyperparameters and device ---
Using device: cuda:0

--- Preparing CIFAR-10 dataset ---
Files already downloaded and verified
Files already downloaded and verified

--- Initializing Model, Loss, and Optimizer ---
Initializing ResNet-18 model with random weights.

--- Training ResNet-18 from scratch ---

--- Epoch 1/20 ---
Epoch Summary | Loss: 1.5690 | Accuracy: 41.89%

--- Epoch 2/20 ---
Epoch Summary | Loss: 1.1133 | Accuracy: 59.83%

--- Epoch 3/20 ---
Epoch Summary | Loss: 0.8755 | Accuracy: 68.89%

--- Epoch 4/20 ---
Epoch Summary | Loss: 0.7295 | Accuracy: 74.25%

--- Epoch 5/20 ---
Epoch Summary | Loss: 0.6315 | Accuracy: 78.03%

--- Epoch 6/20 ---
Epoch Summary | Loss: 0.5598 | Accuracy: 80.40%

--- Epoch 7/20 ---
Epoch Summary | Loss: 0.5104 | Accuracy: 82.42%

--- Epoch 8/20 ---
Epoch Summary | Loss: 0.4638 | Accuracy: 83.85%

--- Epoch 9/20 ---
Epoch Summary | Loss: 0.4306 | Accuracy: 84.97%

--- Epoch 10/20 ---
Epoch Summary | Loss: 0.3996 | Accuracy: 86.00%


In [4]:
# =====================================================
# DenseNet-121 for CIFAR-10 - PyTorch
# High accuracy setup with CUDA
# =====================================================

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader

# ----------------- Check CUDA -----------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# ----------------- Data Augmentation ----------------
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), 
                         (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), 
                         (0.2023, 0.1994, 0.2010)),
])

# ----------------- Load CIFAR-10 --------------------
train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=100, shuffle=False, num_workers=4)

# ----------------- Load DenseNet-121 ----------------
model = models.densenet121(weights=None)  # start from scratch
model.classifier = nn.Linear(model.classifier.in_features, 10)  # CIFAR-10 has 10 classes
model = model.to(device)

# ----------------- Loss & Optimizer -----------------
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)

# Learning rate scheduler
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)

# ----------------- Training Function ----------------
def train(model, loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
    
    return running_loss / total, 100. * correct / total

# ----------------- Testing Function -----------------
def test(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * images.size(0)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
    
    return running_loss / total, 100. * correct / total

# ----------------- Main Training Loop ----------------
num_epochs = 20
for epoch in range(num_epochs):
    train_loss, train_acc = train(model, train_loader, criterion, optimizer, device)
    test_loss, test_acc = test(model, test_loader, criterion, device)
    scheduler.step()

    print(f"Epoch [{epoch+1}/{num_epochs}] "
          f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}% "
          f"Test Loss: {test_loss:.4f} | Test Acc: {test_acc:.2f}%")


Using device: cuda
Files already downloaded and verified
Files already downloaded and verified
Epoch [1/20] Train Loss: 1.5408 | Train Acc: 43.61% Test Loss: 1.2972 | Test Acc: 52.38%
Epoch [2/20] Train Loss: 1.1596 | Train Acc: 58.54% Test Loss: 1.0406 | Test Acc: 63.38%
Epoch [3/20] Train Loss: 0.9854 | Train Acc: 65.17% Test Loss: 0.9201 | Test Acc: 67.55%
Epoch [4/20] Train Loss: 0.8713 | Train Acc: 69.31% Test Loss: 0.8659 | Test Acc: 69.28%
Epoch [5/20] Train Loss: 0.7907 | Train Acc: 72.18% Test Loss: 0.8316 | Test Acc: 71.30%
Epoch [6/20] Train Loss: 0.7395 | Train Acc: 73.92% Test Loss: 0.8089 | Test Acc: 72.71%
Epoch [7/20] Train Loss: 0.6820 | Train Acc: 76.25% Test Loss: 0.7469 | Test Acc: 73.94%
Epoch [8/20] Train Loss: 0.6471 | Train Acc: 77.10% Test Loss: 0.6893 | Test Acc: 76.22%
Epoch [9/20] Train Loss: 0.6137 | Train Acc: 78.53% Test Loss: 0.7084 | Test Acc: 76.05%
Epoch [10/20] Train Loss: 0.5823 | Train Acc: 79.77% Test Loss: 0.7492 | Test Acc: 74.47%
Epoch [11/20] 

In [7]:
# ======================================================
# WideResNet for CIFAR-10 (PyTorch) - 20 Epochs
# ======================================================

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

# ---------------- Device -----------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# ---------------- Data Augmentation -----------------
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261))
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261))
])

train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=100, shuffle=False, num_workers=4)

# ---------------- WideResNet Definition -----------------
# Using torchvision Wide_ResNet-28-10 pretrained on CIFAR
from torchvision.models import wide_resnet50_2

class CIFARWideResNet(nn.Module):
    def __init__(self, num_classes=10):
        super(CIFARWideResNet, self).__init__()
        self.model = wide_resnet50_2(weights=None)  # train from scratch
        self.model.fc = nn.Linear(self.model.fc.in_features, num_classes)

    def forward(self, x):
        return self.model(x)

model = CIFARWideResNet().to(device)

# ---------------- Loss and Optimizer -----------------
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=20)

# ---------------- Training Loop -----------------
def train():
    model.train()
    running_loss = 0
    correct = 0
    total = 0
    for inputs, targets in train_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()

    epoch_loss = running_loss / total
    epoch_acc = 100. * correct / total
    return epoch_loss, epoch_acc

def test():
    model.eval()
    running_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, targets)

            running_loss += loss.item() * inputs.size(0)
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

    epoch_loss = running_loss / total
    epoch_acc = 100. * correct / total
    return epoch_loss, epoch_acc

# ---------------- Run Training -----------------
for epoch in range(20):
    train_loss, train_acc = train()
    test_loss, test_acc = test()
    scheduler.step()
    print(f"Epoch [{epoch+1}/20] | "
          f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}% | "
          f"Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}%")


Using device: cuda
Files already downloaded and verified
Files already downloaded and verified
Epoch [1/20] | Train Loss: 4.0218, Train Acc: 16.29% | Test Loss: 1.9460, Test Acc: 23.10%
Epoch [2/20] | Train Loss: 1.8678, Train Acc: 29.53% | Test Loss: 1.7239, Test Acc: 35.29%
Epoch [3/20] | Train Loss: 1.7325, Train Acc: 34.55% | Test Loss: 1.6168, Test Acc: 40.12%
Epoch [4/20] | Train Loss: 1.6092, Train Acc: 40.59% | Test Loss: 1.5219, Test Acc: 44.18%
Epoch [5/20] | Train Loss: 1.5083, Train Acc: 44.81% | Test Loss: 1.3691, Test Acc: 49.40%
Epoch [6/20] | Train Loss: 1.4136, Train Acc: 48.63% | Test Loss: 1.2995, Test Acc: 52.85%
Epoch [7/20] | Train Loss: 1.3212, Train Acc: 52.48% | Test Loss: 1.2197, Test Acc: 56.21%
Epoch [8/20] | Train Loss: 1.2129, Train Acc: 56.44% | Test Loss: 1.0920, Test Acc: 61.17%
Epoch [9/20] | Train Loss: 1.1304, Train Acc: 59.61% | Test Loss: 1.0420, Test Acc: 62.78%
Epoch [10/20] | Train Loss: 1.0344, Train Acc: 63.36% | Test Loss: 1.0195, Test Acc: 6

In [8]:
# =====================================================
# EfficientNet-B0/B1 on CIFAR-10 with CUDA (PyTorch)
# =====================================================

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, random_split

# ---------------- Device -----------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# ---------------- Hyperparameters -----------------
batch_size = 128
lr = 0.001
epochs = 20
model_name = "efficientnet_b0"  # Change to "efficientnet_b1" for B1

# ---------------- Data Transforms -----------------
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465),
                         (0.2023, 0.1994, 0.2010))
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465),
                         (0.2023, 0.1994, 0.2010))
])

# ---------------- CIFAR-10 Dataset -----------------
train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)

# Split train into train+val
train_size = int(0.9 * len(train_dataset))
val_size = len(train_dataset) - train_size
train_set, val_set = random_split(train_dataset, [train_size, val_size])

train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=2)
val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

# ---------------- Load Model -----------------
if model_name == "efficientnet_b0":
    model = models.efficientnet_b0(weights=models.EfficientNet_B0_Weights.IMAGENET1K_V1)
elif model_name == "efficientnet_b1":
    model = models.efficientnet_b1(weights=models.EfficientNet_B1_Weights.IMAGENET1K_V1)
else:
    raise ValueError("Choose model_name as 'efficientnet_b0' or 'efficientnet_b1'")

# Change the classifier for CIFAR-10 (10 classes)
model.classifier[1] = nn.Linear(model.classifier[1].in_features, 10)
model = model.to(device)

# ---------------- Loss & Optimizer -----------------
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=lr)

# ---------------- Training Loop -----------------
best_val_acc = 0
for epoch in range(epochs):
    model.train()
    running_loss = 0
    correct = 0
    total = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    train_loss = running_loss / total
    train_acc = 100. * correct / total

    # Validation
    model.eval()
    val_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item() * images.size(0)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

    val_loss /= total
    val_acc = 100. * correct / total
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), f"{model_name}_best.pth")

    print(f"Epoch [{epoch+1}/{epochs}] "
          f"Train Loss: {train_loss:.4f} Train Acc: {train_acc:.2f}% "
          f"Val Loss: {val_loss:.4f} Val Acc: {val_acc:.2f}%")

# ---------------- Test Accuracy -----------------
model.load_state_dict(torch.load(f"{model_name}_best.pth"))
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

print(f"Test Accuracy: {100.*correct/total:.2f}%")


Using device: cuda
Files already downloaded and verified
Files already downloaded and verified


Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth" to ./torch_cache\hub\checkpoints\efficientnet_b0_rwightman-7f5810bc.pth
100.0%


Epoch [1/20] Train Loss: 1.1732 Train Acc: 59.18% Val Loss: 0.7862 Val Acc: 72.72%
Epoch [2/20] Train Loss: 0.7584 Train Acc: 73.98% Val Loss: 0.6420 Val Acc: 77.14%
Epoch [3/20] Train Loss: 0.6375 Train Acc: 78.08% Val Loss: 0.5666 Val Acc: 80.32%
Epoch [4/20] Train Loss: 0.5616 Train Acc: 80.66% Val Loss: 0.5223 Val Acc: 82.32%
Epoch [5/20] Train Loss: 0.5176 Train Acc: 82.24% Val Loss: 0.5236 Val Acc: 82.30%
Epoch [6/20] Train Loss: 0.4811 Train Acc: 83.31% Val Loss: 0.4957 Val Acc: 83.42%
Epoch [7/20] Train Loss: 0.4485 Train Acc: 84.48% Val Loss: 0.5072 Val Acc: 83.74%
Epoch [8/20] Train Loss: 0.4223 Train Acc: 85.36% Val Loss: 0.4748 Val Acc: 83.88%
Epoch [9/20] Train Loss: 0.4025 Train Acc: 86.20% Val Loss: 0.4625 Val Acc: 84.02%
Epoch [10/20] Train Loss: 0.3856 Train Acc: 86.69% Val Loss: 0.4547 Val Acc: 84.38%
Epoch [11/20] Train Loss: 0.3582 Train Acc: 87.61% Val Loss: 0.4382 Val Acc: 85.40%
Epoch [12/20] Train Loss: 0.3431 Train Acc: 88.05% Val Loss: 0.4461 Val Acc: 85.48%
E

  model.load_state_dict(torch.load(f"{model_name}_best.pth"))


Test Accuracy: 86.39%
