In [1]:
import numpy as np
import random
import torch
import torch.nn as nn
import torch.optim as optim
from matplotlib import pyplot as plt
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from tqdm import tqdm
from torch.optim import Adam, SGD, RMSprop

In [2]:
def make_model():
    model = nn.Sequential(
        # Block 1
        nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(32),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2),

        #Block 2
        nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(64),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2),

        # Block 3
        nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(128),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2),

        nn.Flatten(),
        nn.Linear(128 * 4 * 4, 256),
        nn.ReLU(),
        nn.Dropout(0.5),
        nn.Linear(256, 100)
    )
    return model

In [3]:
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

In [4]:
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761))])

train_set = datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)
test_set = datasets.CIFAR100(root='./data', train=False, transform=transform)

train_loader = DataLoader(train_set, batch_size=64, shuffle=True)
test_loader = DataLoader(test_set, batch_size=64, shuffle=True)

classes = train_set.classes

Files already downloaded and verified


In [5]:
def evaluate(model, loader, criterion, device):
    model.eval()
    correct, total = 0, 0
    running_loss = 0.0

    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            output = model(images)
            loss = criterion(output, labels)

            running_loss += loss.item()
            _, predicted = torch.max(output, 1)
            correct += (predicted==labels).sum().item()
            total += labels.size(0)

    val_loss = running_loss/len(loader)
    val_acc = 100 * correct / total
    return val_loss, val_acc

def train(model, train_loader, test_loader, criterion, optimizer, device, epochs):
    model.to(device)
    history = {'train_loss': [], 'train_acc': [], 'val_loss': [], 'val_acc': []}

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        correct, total = 0, 0

        train_bar = tqdm(train_loader, desc=f"Epoch {epoch + 1}/{epochs} [Train]", leave=False)
        for images, labels in train_bar:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            output = model(images)
            loss = criterion(output, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(output, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

            train_bar.set_postfix(loss=loss.item(), accuracy=100 * correct / total)

        train_loss = running_loss / len(train_loader)
        train_acc = 100 * correct / total
        val_loss, val_acc = evaluate(model, test_loader, criterion, device)

        history['train_loss'].append(train_loss)
        history['train_acc'].append(train_acc)
        history['val_loss'].append(val_loss)
        history['val_acc'].append(val_acc)

        print(f"Epoch {epoch+1}/{epochs} | TrainLoss: {train_loss:.4f} | ValLoss: {val_loss:.4f} | TrainAcc: {train_acc:.4f}% | ValAcc: {val_acc:.4f}%")
    return history

In [6]:
def plot_training_curves(results_dict):
    plt.figure(figsize=(12, 6))
    plt.subplot(2,2,1)
    for name, history in results_dict.items():
        plt.plot(history["train_loss"], label=f"{name} Train Loss")
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.title("Training Loss")
    plt.legend()

    plt.subplot(2,2,2)
    for name, history in results_dict.items():
        plt.plot(history["val_loss"], label=f"{name} Val Loss")
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.title("Validation Loss")
    plt.legend()

    plt.subplot(2,2,3)
    for name, history in results_dict.items():
        plt.plot(history["train_acc"], label=f"{name} Train Accuracy")
    plt.xlabel("Epochs")
    plt.ylabel("Accuracy (%)")
    plt.title("Training Accuracy")
    plt.legend()

    plt.subplot(2,2,4)
    for name, history in results_dict.items():
        plt.plot(history["val_acc"], label=f"{name} Val Accuracy")
    plt.xlabel("Epochs")
    plt.ylabel("Accuracy (%)")
    plt.title("Validation Accuracy")
    plt.legend()
    plt.tight_layout()
    plt.show()

In [7]:
def show_predictions(model, loader, device, n=8):
    model.eval()

    dataiter = iter(loader)
    images, labels = next(dataiter)

    # Move images to SAME device as model
    images = images.to(device)
    labels = labels.to(device)

    with torch.no_grad():
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)

    # Move back to CPU for plotting
    images = images.cpu().numpy()
    labels = labels.cpu()
    predicted = predicted.cpu()

    plt.figure(figsize=(12, 3))
    for i in range(n):
        plt.subplot(1, n, i+1)
        img = images[i].transpose((1, 2, 0))
        img = img * 0.2675 + 0.5071
        img = np.clip(img, 0, 1)
        plt.imshow(img)
        plt.title(f"P: {classes[predicted[i]]}\nT: {classes[labels[i]]}", fontsize=8)
        plt.axis('off')
    plt.show()

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
# 1
lrs = [1e-2, 1e-3, 1e-4]
results_lr = {}
for lr in lrs:
    print(f"\nTraining with LR={lr}")
    set_seed(0)
    model = make_model().to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()
    history = train(model, train_loader, test_loader, criterion, optimizer, device, epochs=5)
    results_lr[f"LR={lr}"] = history
plot_training_curves(results_lr)

best_lr_model = make_model().to(device)
optimizer = optim.Adam(best_lr_model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()
history = train(best_lr_model, train_loader, test_loader, criterion, optimizer, device, epochs=5)
show_predictions(best_lr_model, test_loader, device)

# 2
optimizers = [Adam, SGD, RMSprop]
results_optimizer = {}
for opt_cls in optimizers:
    name = opt_cls.__name__
    print(f"\nTraining with Optimizer: {name}")
    set_seed(0)
    model = make_model().to(device)
    optimizer = opt_cls(model.parameters(), lr=1e-3)
    criterion = nn.CrossEntropyLoss()
    history = train(model, train_loader, test_loader, criterion, optimizer, device, epochs=5)
    results_optimizer[name] = history
plot_training_curves(results_optimizer)

# 3
losses = {
    "CrossEntropy": nn.CrossEntropyLoss(),
    "LabelSmoothing0.1": nn.CrossEntropyLoss(label_smoothing=0.1)
}

results_loss = {}
for name, loss in losses.items():
    print(f"\nTraining with Loss: {name}")
    set_seed(0)
    model = make_model().to(device)
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    history = train(model, train_loader, test_loader, loss, optimizer, device, epochs=5)
    results_loss[name] = history
plot_training_curves(results_loss)

cuda

Training with LR=0.01


                                                                                               

Epoch 1/5 | TrainLoss: 4.6567 | ValLoss: 4.6074 | TrainAcc: 0.9880% | ValAcc: 1.0000%


                                                                                               

Epoch 2/5 | TrainLoss: 4.6093 | ValLoss: 4.6071 | TrainAcc: 0.9340% | ValAcc: 1.0000%


                                                                                               

Epoch 3/5 | TrainLoss: 4.6091 | ValLoss: 4.6068 | TrainAcc: 0.8920% | ValAcc: 1.0000%


                                                                                               

Epoch 4/5 | TrainLoss: 4.6089 | ValLoss: 4.6078 | TrainAcc: 0.9420% | ValAcc: 1.0000%


                                                                                               

Epoch 5/5 | TrainLoss: 4.6091 | ValLoss: 4.6075 | TrainAcc: 0.9660% | ValAcc: 1.0000%

Training with LR=0.001


                                                                                              

Epoch 1/5 | TrainLoss: 4.0238 | ValLoss: 3.4460 | TrainAcc: 7.7760% | ValAcc: 18.4000%


                                                                                              

Epoch 2/5 | TrainLoss: 3.5778 | ValLoss: 3.1260 | TrainAcc: 13.4640% | ValAcc: 24.2600%


                                                                                              

Epoch 3/5 | TrainLoss: 3.3628 | ValLoss: 2.9051 | TrainAcc: 16.6460% | ValAcc: 27.7500%


                                                                                              

Epoch 4/5 | TrainLoss: 3.2436 | ValLoss: 2.8291 | TrainAcc: 18.3960% | ValAcc: 28.8800%


                                                                                              

Epoch 5/5 | TrainLoss: 3.1509 | ValLoss: 2.6643 | TrainAcc: 19.7040% | ValAcc: 32.6100%

Training with LR=0.0001


                                                                                              

Epoch 1/5 | TrainLoss: 4.1362 | ValLoss: 3.5721 | TrainAcc: 7.9680% | ValAcc: 19.2900%


                                                                                              

Epoch 2/5 | TrainLoss: 3.5500 | ValLoss: 3.1294 | TrainAcc: 16.3940% | ValAcc: 26.4600%


                                                                                              

Epoch 3/5 | TrainLoss: 3.2088 | ValLoss: 2.8689 | TrainAcc: 22.1040% | ValAcc: 30.6900%


                                                                                              

Epoch 4/5 | TrainLoss: 2.9837 | ValLoss: 2.6794 | TrainAcc: 26.0160% | ValAcc: 33.8700%


                                                                                              