In [None]:
# ── A. Imports & Reproducibility ────────────────────────────────────────────────
import os
import csv                                                  # For result logging :contentReference[oaicite:0]{index=0}
import random                                               # For seeding :contentReference[oaicite:1]{index=1}
import numpy as np                                          # For numeric ops :contentReference[oaicite:2]{index=2}
import torch                                               # Core PyTorch :contentReference[oaicite:3]{index=3}
import torch.nn as nn                                       # Neural-net modules :contentReference[oaicite:4]{index=4}
import torch.nn.functional as F                             # Functional API :contentReference[oaicite:5]{index=5}
import torch.optim as optim                                 # Optimizers :contentReference[oaicite:6]{index=6}
from torch.optim.lr_scheduler import CosineAnnealingLR      # Scheduler :contentReference[oaicite:7]{index=7}
from torch.utils.data import DataLoader, random_split       # Data loaders & splits :contentReference[oaicite:8]{index=8}
import torchvision                                          # Datasets & transforms :contentReference[oaicite:9]{index=9}
import torchvision.transforms as T                          # Transforms :contentReference[oaicite:10]{index=10}
from torch.utils.tensorboard import SummaryWriter           # TensorBoard logging :contentReference[oaicite:11]{index=11}
import matplotlib.pyplot as plt                             # Plotting :contentReference[oaicite:12]{index=12}

In [None]:
# Seed everything for reproducibility
seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)


In [None]:
# ── B. Device ───────────────────────────────────────────────────────────────────
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")                             # Confirm GPU vs CPU :contentReference[oaicite:13]{index=13}


Using device: cpu


In [None]:
# ── C. Data Preparation ─────────────────────────────────────────────────────────
# Transforms
transform_train = T.Compose([
    T.RandomCrop(32, padding=4), T.RandomHorizontalFlip(),
    T.ToTensor(),
    T.Normalize((0.5071,0.4867,0.4408),(0.2675,0.2565,0.2761)),
])
transform_test = T.Compose([
    T.ToTensor(),
    T.Normalize((0.5071,0.4867,0.4408),(0.2675,0.2565,0.2761)),
])


In [None]:
# Download & train/val/test split
dataset_full = torchvision.datasets.CIFAR100(
    root='./data', train=True, download=True, transform=transform_train)
val_size = 5000
train_size = len(dataset_full) - val_size
train_dataset, val_dataset = random_split(
    dataset_full, [train_size, val_size],
    generator=torch.Generator().manual_seed(seed))
test_dataset = torchvision.datasets.CIFAR100(
    root='./data', train=False, download=True, transform=transform_test)

100%|██████████| 169M/169M [00:01<00:00, 90.8MB/s]


In [None]:
# ── D. Model Definition ─────────────────────────────────────────────────────────
class LELeNetCIFAR(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=5, padding=2)
        self.pool1 = nn.MaxPool2d(2)
        self.conv2 = nn.Conv2d(64, 64, kernel_size=5, padding=2)
        self.pool2 = nn.MaxPool2d(2)
        self.fc1   = nn.Linear(64*8*8, 384)
        self.fc2   = nn.Linear(384, 192)
        self.fc3   = nn.Linear(192, 100)
    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x)); x = F.relu(self.fc2(x))
        return self.fc3(x)

In [None]:
# ── E. Utilities: Train/Eval & Checkpointing ────────────────────────────────────
def train_one_epoch(model, optimizer, criterion, loader):
    model.train()
    running_loss = correct = total = 0
    for imgs, lbls in loader:
        imgs, lbls = imgs.to(device), lbls.to(device)
        optimizer.zero_grad()
        out = model(imgs)
        loss = criterion(out, lbls)
        loss.backward(); optimizer.step()
        running_loss += loss.item()*imgs.size(0)
        correct += out.argmax(1).eq(lbls).sum().item()
        total += lbls.size(0)
    return running_loss/total, correct/total

def eval_model(model, criterion, loader):
    model.eval()
    running_loss = correct = total = 0
    with torch.no_grad():
        for imgs, lbls in loader:
            imgs, lbls = imgs.to(device), lbls.to(device)
            out = model(imgs); loss = criterion(out, lbls)
            running_loss += loss.item()*imgs.size(0)
            correct += out.argmax(1).eq(lbls).sum().item()
            total += lbls.size(0)
    return running_loss/total, correct/total

In [None]:
# Checkpoint saves model + optimizer + scheduler + RNG
ckpt_dir = './checkpoints'
os.makedirs(ckpt_dir, exist_ok=True)
def save_checkpoint(model, optimizer, scheduler, epoch, is_best=False):
    fname = f"{'best' if is_best else 'last'}_ckpt_epoch_{epoch}.pth"
    torch.save({
        'epoch': epoch,
        'model_state': model.state_dict(),
        'optim_state': optimizer.state_dict(),
        'sched_state': scheduler.state_dict(),
        'rng_state': torch.get_rng_state(),
    }, os.path.join(ckpt_dir, fname))

In [None]:
# ── A. Hyperparameter Grid ─────────────────────────────────────────────────────
param_grid = [
    {'lr': 0.1, 'weight_decay': 1e-4, 'batch_size': 128, 'epochs': 100},
    # … add more combinations as needed …
]





#all config to test
#testing all with 128 and 100 for last 2 values

#tested lr = 0.001 and weight decay = 1e-5
"""# ── Define the values you want to test ────────────────────────────────────────────
learning_rates   = [0.001, 0.01, 0.1]        # typical LR grid for CIFAR-100 :contentReference[oaicite:0]{index=0}
weight_decays    = [1e-5, 1e-4, 1e-3]        # common L2 regs :contentReference[oaicite:1]{index=1}
batch_sizes      = [32, 64, 128]             # fits 6 GB VRAM on Colab GPUs :contentReference[oaicite:2]{index=2}
epoch_counts     = [50, 100]                 # short vs full training"""




# Prepare CSV logging
csv_path = './results_grid.csv'
if not os.path.exists(csv_path):
    with open(csv_path, 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow([
            'lr', 'weight_decay', 'batch_size', 'epochs',
            'best_val_acc', 'test_acc'
        ])

# ── B. Loop Over Configurations ────────────────────────────────────────────────
for cfg in param_grid:
    lr, wd, bs, epochs = cfg['lr'], cfg['weight_decay'], cfg['batch_size'], cfg['epochs']
    print(f"Running config: lr={lr}, wd={wd}, bs={bs}, epochs={epochs}")

    # Re-create DataLoaders per bs
    train_loader = DataLoader(train_dataset, batch_size=bs, shuffle=True, num_workers=2)
    val_loader   = DataLoader(val_dataset,   batch_size=bs, shuffle=False, num_workers=2)
    test_loader  = DataLoader(test_dataset,  batch_size=bs, shuffle=False, num_workers=2)

    # Instantiate fresh model, optimizer, scheduler, writer
    model     = LELeNetCIFAR().to(device)
    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=wd)
    scheduler = CosineAnnealingLR(optimizer, T_max=epochs)
    criterion = nn.CrossEntropyLoss()
    writer    = SummaryWriter(log_dir=f'./logs/lr{lr}_wd{wd}_bs{bs}_ep{epochs}')

    best_val_acc = 0.0
    for epoch in range(1, epochs+1):
        train_loss, train_acc = train_one_epoch(model, optimizer, criterion, train_loader)
        val_loss,   val_acc   = eval_model(model, criterion, val_loader)
        scheduler.step()

        # Log to TensorBoard
        writer.add_scalars('Loss', {'train': train_loss, 'val': val_loss}, epoch)
        writer.add_scalars('Acc',  {'train': train_acc,  'val': val_acc},   epoch)

        # Save checkpoints
        save_checkpoint(model, optimizer, scheduler, epoch, is_best=(val_acc>best_val_acc))
        if val_acc > best_val_acc:
            best_val_acc = val_acc

        print(f"  Epoch {epoch}/{epochs}  train_acc={train_acc:.4f}  val_acc={val_acc:.4f}")

    # Final Test Evaluation
    test_loss, test_acc = eval_model(model, criterion, test_loader)
    print(f"Config {cfg} → best_val_acc={best_val_acc:.4f}, test_acc={test_acc:.4f}")

    # Append results to CSV
    with open(csv_path, 'a', newline='') as f:
        writer = csv.writer(f)
        writer.writerow([lr, wd, bs, epochs, best_val_acc, test_acc])


Running config: lr=0.1, wd=0.0001, bs=128, epochs=100
  Epoch 1/100  train_acc=0.0623  val_acc=0.0950
  Epoch 2/100  train_acc=0.1017  val_acc=0.1318
  Epoch 3/100  train_acc=0.1217  val_acc=0.1360
  Epoch 4/100  train_acc=0.1371  val_acc=0.1506
  Epoch 5/100  train_acc=0.1470  val_acc=0.1336
  Epoch 6/100  train_acc=0.1574  val_acc=0.1580
  Epoch 7/100  train_acc=0.1665  val_acc=0.1746
  Epoch 8/100  train_acc=0.1687  val_acc=0.1504
  Epoch 9/100  train_acc=0.1734  val_acc=0.1584
  Epoch 10/100  train_acc=0.1782  val_acc=0.1836
  Epoch 11/100  train_acc=0.1884  val_acc=0.1862
  Epoch 12/100  train_acc=0.1890  val_acc=0.1930
  Epoch 13/100  train_acc=0.1900  val_acc=0.1904
  Epoch 14/100  train_acc=0.1966  val_acc=0.1952
  Epoch 15/100  train_acc=0.1939  val_acc=0.1882
  Epoch 16/100  train_acc=0.2024  val_acc=0.1790
  Epoch 17/100  train_acc=0.2020  val_acc=0.1990
  Epoch 18/100  train_acc=0.2056  val_acc=0.2122
  Epoch 19/100  train_acc=0.2117  val_acc=0.2072
  Epoch 20/100  train_ac

In [None]:
# ── Configuration Summary Cell ──────────────────────────────────────────────────
import torch, torchvision, sys, platform, time, os
import numpy as np
import random
from torch.utils.tensorboard import SummaryWriter

def summarize_run(cfg, train_loader, val_loader, test_loader, writer=None):
    """
    Print and log a full summary of the current run configuration and environment.

    Args:
        cfg (dict): Hyperparameter dict with 'lr', 'weight_decay', 'batch_size', 'epochs', etc.
        train_loader, val_loader, test_loader: DataLoaders for computing dataset sizes.
        writer (SummaryWriter, optional): if provided, logs summary to TensorBoard under 'RunInfo'.
    """
    # 1. Timestamp
    ts = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())

    # 2. Environment
    env_info = {
        'python_version': sys.version.split()[0],
        'pytorch_version': torch.__version__,
        'torchvision_version': torchvision.__version__,
        'cuda_available': torch.cuda.is_available(),
        'cuda_device': torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'CPU only',
        'device_count': torch.cuda.device_count() if torch.cuda.is_available() else 0,
        'platform': platform.platform(),
        'cwd': os.getcwd(),
    }

    # 3. Data sizes
    data_info = {
        'train_samples': len(train_loader.dataset),
        'val_samples': len(val_loader.dataset),
        'test_samples': len(test_loader.dataset),
        'batch_size': cfg.get('batch_size'),
        'num_batches_train': len(train_loader),
        'num_batches_val': len(val_loader),
        'num_batches_test': len(test_loader),
    }

    # 4. Seed & Hyperparams
    seed_info = {
        'seed': cfg.get('seed', 'not set'),
    }
    hyperparams = {k: v for k, v in cfg.items() if k not in seed_info}

    # 5. Print Summary
    print(f"{'='*20} RUN SUMMARY ({ts}) {'='*20}\n")
    print("➜ Environment:")
    for k, v in env_info.items():
        print(f"    • {k}: {v}")
    print("\n➜ Data:")
    for k, v in data_info.items():
        print(f"    • {k}: {v}")
    print("\n➜ Seed:")
    for k, v in seed_info.items():
        print(f"    • {k}: {v}")
    print("\n➜ Hyperparameters:")
    for k, v in hyperparams.items():
        print(f"    • {k}: {v}")
    print(f"\n{'='*60}\n")

    # 6. Optional TensorBoard Logging
    if writer is not None:
        for k, v in {**env_info, **data_info, **seed_info, **hyperparams}.items():
            # Non-numeric values will be logged as text under a scalar tag
            try:
                writer.add_text('RunInfo/' + k, str(v), 0)
            except Exception:
                pass

# ── Example Usage ────────────────────────────────────────────────────────────────
# After defining `cfg`, DataLoaders, and `writer` in your Run Cell, just call:
summarize_run(cfg, train_loader, val_loader, test_loader, writer)


In [None]:
import pandas as pd
df = pd.read_csv('./results_grid.csv')
display(df.sort_values('test_acc', ascending=False))


In [None]:
# ── Final Analysis & Plotting ────────────────────────────────────────────────────
import pandas as pd
import matplotlib.pyplot as plt

# 1. Load results
csv_path = './results_grid.csv'
df = pd.read_csv(csv_path)

# 2. Display top 5 configs by test accuracy
top5 = df.sort_values('test_acc', ascending=False).head(5)
print("Top 5 hyperparameter configurations:")
display(top5)

# 3. Bar plot of test accuracy for each config
plt.figure(figsize=(10, 6))
plt.bar(
    x=range(len(df)),
    height=df['test_acc'],
    tick_label=[f"lr={lr}\nwd={wd}\nbs={bs}\nep={ep}"
                for lr, wd, bs, ep in zip(df['lr'], df['weight_decay'], df['batch_size'], df['epochs'])]
)
plt.xticks(rotation=45, ha='right')
plt.ylabel('Test Accuracy')
plt.title('Grid Search Results: Test Accuracy by Hyperparameter Configuration')
plt.tight_layout()
plt.show()
