# Imports

In [1]:
from __future__ import print_function
import torch
import json

from data_loader import load_cifar10, get_class_names
from training_utils import train_model, continue_training

# CUDA

In [2]:
print(f"Is CUDA available? {torch.cuda.is_available()}")
!nvcc --version

Is CUDA available? True
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2024 NVIDIA Corporation
Built on Thu_Jun__6_02:18:23_PDT_2024
Cuda compilation tools, release 12.5, V12.5.82
Build cuda_12.5.r12.5/compiler.34385749_0


In [3]:
#!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124

# Initialization

In [4]:
# Set the device (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [5]:
train_loader, test_loader, X_train, X_test, Y_train, Y_test = load_cifar10(batch_size=64, seed=42)
class_names = get_class_names()

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170M/170M [00:01<00:00, 105MB/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


# Train CNN

In [6]:
from model import StandardCNN
model = StandardCNN()
model = model.to(device)

print(f"Model device: {next(model.parameters()).device}")

Model device: cuda:0


In [8]:
metrics = train_model(
    model=model,
    train_loader=train_loader,
    test_loader=test_loader,
    model_name='cnn',
    num_epochs=500,
    device=device
)

Epoch 1/500 | LR: 0.000100 | Train Acc: 35.8% | Test Acc: 39.5% | Gap: -3.8%
Epoch 2/500 | LR: 0.000100 | Train Acc: 41.2% | Test Acc: 42.6% | Gap: -1.4%
Epoch 3/500 | LR: 0.000100 | Train Acc: 44.0% | Test Acc: 44.7% | Gap: -0.7%
Epoch 4/500 | LR: 0.000100 | Train Acc: 45.4% | Test Acc: 45.2% | Gap: 0.1%
Epoch 5/500 | LR: 0.000100 | Train Acc: 46.6% | Test Acc: 45.8% | Gap: 0.8%
Epoch 6/500 | LR: 0.000100 | Train Acc: 47.3% | Test Acc: 47.7% | Gap: -0.4%
Epoch 7/500 | LR: 0.000100 | Train Acc: 48.2% | Test Acc: 48.1% | Gap: 0.1%
Epoch 8/500 | LR: 0.000100 | Train Acc: 49.0% | Test Acc: 49.4% | Gap: -0.4%
Epoch 9/500 | LR: 0.000100 | Train Acc: 49.5% | Test Acc: 49.5% | Gap: -0.0%
Epoch 10/500 | LR: 0.000100 | Train Acc: 50.0% | Test Acc: 49.4% | Gap: 0.5%
Epoch 11/500 | LR: 0.000100 | Train Acc: 50.7% | Test Acc: 50.8% | Gap: -0.1%
Epoch 12/500 | LR: 0.000100 | Train Acc: 51.0% | Test Acc: 50.0% | Gap: 1.0%
Epoch 13/500 | LR: 0.000100 | Train Acc: 51.4% | Test Acc: 51.5% | Gap: -0.1%


KeyboardInterrupt: 

# Continue training

In [None]:
checkpoint_dir = 'cnn_checkpoints'
with open(f'{checkpoint_dir}/training_metrics.json', 'r') as f:
    metrics = json.load(f)
print(f"Last completed epoch: {metrics['current_epoch']}")

# Continue training
metrics = continue_training(
    model=model,
    train_loader=train_loader,
    test_loader=test_loader,
    model_name='cnn',
    checkpoint_dir='cnn_checkpoints',
    target_epochs=2000,
    device=device
)