In [6]:
import torch

torch.cuda.empty_cache()

In [2]:
from omegaconf import OmegaConf
from hydra import initialize, compose

with initialize(config_path="configs", version_base="1.3.2"):
    cfg = compose(config_name="config")
print(OmegaConf.to_yaml(cfg))

model:
  block: BasicBlock
  layers:
  - 3
  - 4
  - 6
  - 3
dataset:
  name: CIFAR10
  root: ./data
  num_classes: 10
  color_channels: 3
scheduler:
  name: reduce_on_plateau
  mode: min
  factor: 0.1
  patience: 3
  threshold: 0.0001
  threshold_mode: rel
  cooldown: 0
  min_lr: 0.0
  eps: 1.0e-08
transforms:
  train:
    RandomCrop:
      size: 32
      padding: 4
    RandomHorizontalFlip: {}
    ToTensor: {}
    Normalize:
      mean:
      - 0.4914
      - 0.4822
      - 0.4465
      std:
      - 0.247
      - 0.2435
      - 0.2616
  val:
    ToTensor: {}
    Normalize:
      mean:
      - 0.4914
      - 0.4822
      - 0.4465
      std:
      - 0.247
      - 0.2435
      - 0.2616
training:
  epochs: 50
  lr: 0.1
  momentum: 0.9
  weight_decay: 0.0005
  batch_size: 64
  num_workers: 4
  seed: 42
  device: cuda
logging:
  log_dir: ./runs/${now:%Y-%m-%d_%H-%M-%S}
  tensorboard: true
  use_notebook: false





In [3]:
from datasets.cifar import get_cifar10_loaders
from models.cnns.resnet import resnet18
from torchinfo import summary

train_loader, val_loader = get_cifar10_loaders(
    root=cfg.dataset.root,
    batch_size=cfg.training.batch_size,
    num_workers=cfg.training.num_workers,
    transform_cfg=cfg.transforms,
)

model = resnet18(num_classes=cfg.dataset.num_classes, color_channels=cfg.dataset.color_channels)
summary(model)

Layer (type:depth-idx)                   Param #
ResNet                                   --
├─Conv2d: 1-1                            9,408
├─BatchNorm2d: 1-2                       128
├─ReLU: 1-3                              --
├─MaxPool2d: 1-4                         --
├─Sequential: 1-5                        --
│    └─BasicBlock: 2-1                   --
│    │    └─Conv2d: 3-1                  36,864
│    │    └─BatchNorm2d: 3-2             128
│    │    └─Conv2d: 3-3                  36,864
│    │    └─BatchNorm2d: 3-4             128
│    │    └─ReLU: 3-5                    --
│    └─BasicBlock: 2-2                   --
│    │    └─Conv2d: 3-6                  36,864
│    │    └─BatchNorm2d: 3-7             128
│    │    └─Conv2d: 3-8                  36,864
│    │    └─BatchNorm2d: 3-9             128
│    │    └─ReLU: 3-10                   --
├─Sequential: 1-6                        --
│    └─BasicBlock: 2-3                   --
│    │    └─Sequential: 3-11             8,448


In [4]:
import torch.nn as nn, torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(),
                      lr=cfg.training.lr,
                      momentum=cfg.training.momentum,
                      weight_decay=cfg.training.weight_decay)

scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer,
    mode=cfg.scheduler.mode,
    factor=cfg.scheduler.factor,
    patience=cfg.scheduler.patience,
    threshold=cfg.scheduler.threshold,
    threshold_mode=cfg.scheduler.threshold_mode,
    cooldown=cfg.scheduler.cooldown,
    min_lr=cfg.scheduler.min_lr,
    eps=cfg.scheduler.eps
)

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

for epoch in range(2):
    model.train()
    train_loss = 0
    for imgs, labels in train_loader:
        imgs, labels = imgs.to(device), labels.to(device)
        preds = model(imgs)
        loss = criterion(preds, labels)
        train_loss += loss.item()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f"Train Loss: {train_loss / len(train_loader):.4f}")

    model.eval()
    correct, val_loss, total = 0, 0, 0
    with torch.no_grad():
        for imgs, labels in val_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            outputs = model(imgs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            preds = outputs.argmax(dim=1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    print(f"Val Loss: {val_loss / len(val_loader):.4f} Val  Acc : {correct / total:.4f}")
    scheduler.step(val_loss)

Train Loss: 2.0062
Val Loss: 1.6399 Val  Acc : 0.3998
Train Loss: 1.5096
Val Loss: 1.4099 Val  Acc : 0.4873
