In [1]:
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
from torchvision import transforms as T
from torchvision.datasets import CIFAR10
from torch.utils.data import Dataset, DataLoader

In [4]:
CIFAR10_MEAN, CIFAR10_STD = (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)

train_transforms = T.Compose([
    T.RandomCrop(32, padding=4),
    T.RandomHorizontalFlip(),
    T.ToTensor(),
    T.Normalize(mean=CIFAR10_MEAN, std=CIFAR10_STD)
])

val_transforms = T.Compose([
    T.ToTensor(),
    T.Normalize(mean=CIFAR10_MEAN, std=CIFAR10_STD)
])

In [15]:
TRAIN_BATCH_SIZE = 64
VAL_BATCH_SIZE = 250

train_dataset = CIFAR10(root='./data', train=True, download=True, transform=train_transforms)
train_loader = DataLoader(train_dataset, batch_size=TRAIN_BATCH_SIZE, shuffle=True, num_workers=1)

val_dataset = CIFAR10(root='./data', train=False, download=True, transform=val_transforms)
val_loader = DataLoader(val_dataset, batch_size=VAL_BATCH_SIZE, shuffle=False, num_workers=1)

Files already downloaded and verified
Files already downloaded and verified


In [12]:
from src.engine import train
from src.groups import P4, P4m
from experiments.CIFAR10 import ResNet, GResNet

### Non-equivariant architecture
---

In [18]:
block_kwargs = [
    {"num_blocks" : 3, "out_channels" : 16, "stride" : 2},
    {"num_blocks" : 3, "out_channels" : 32, "stride" : 2},
    {"num_blocks" : 3, "out_channels" : 64, "stride" : 2}
]

model = ResNet(
    num_classes=10, 
    block_kwargs=block_kwargs,
    img_channels=3,
    in_channels=16
)

print(f"Model has {sum(param.numel() for param in model.parameters())} parameters")

Model has 272762 parameters


In [19]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

model = model.to(device)
criterion = nn.CrossEntropyLoss()

optimizer = torch.optim.SGD(model.parameters(), lr=0.05, momentum=0.95)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[30, 40], gamma=0.2)

In [20]:
NUM_EPOCHS = 50

history = train(
    model,
    dataloaders={"train" : train_loader, "val" : val_loader},
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    num_epochs=NUM_EPOCHS, 
    device='cuda'
)

Epoch 0/49
----------
train Loss: 1.7589 Acc: 0.3413
  val Loss: 1.4006 Acc: 0.4874
Epoch 1/49
----------
train Loss: 1.3086 Acc: 0.5251
  val Loss: 1.2827 Acc: 0.5596
Epoch 2/49
----------
train Loss: 1.0912 Acc: 0.6102
  val Loss: 1.0531 Acc: 0.6295
Epoch 3/49
----------
train Loss: 0.9676 Acc: 0.6586
  val Loss: 0.9361 Acc: 0.6728
Epoch 4/49
----------
train Loss: 0.8734 Acc: 0.6917
  val Loss: 0.8703 Acc: 0.6980
Epoch 5/49
----------
train Loss: 0.8057 Acc: 0.7183
  val Loss: 0.8827 Acc: 0.6937
Epoch 6/49
----------
train Loss: 0.7527 Acc: 0.7364
  val Loss: 0.7690 Acc: 0.7410
Epoch 7/49
----------
train Loss: 0.7078 Acc: 0.7523
  val Loss: 0.6991 Acc: 0.7608
Epoch 8/49
----------
train Loss: 0.6657 Acc: 0.7665
  val Loss: 0.6896 Acc: 0.7604
Epoch 9/49
----------
train Loss: 0.6382 Acc: 0.7783
  val Loss: 0.6350 Acc: 0.7820
Epoch 10/49
----------
train Loss: 0.6090 Acc: 0.7889
  val Loss: 0.6179 Acc: 0.7901
Epoch 11/49
----------
train Loss: 0.5860 Acc: 0.7963
  val Loss: 0.6402 Ac

### Equivariant architecture
---

In [26]:
block_kwargs = [
    {"num_blocks" : 3, "out_channels" : 6, "stride" : 2},
    {"num_blocks" : 3, "out_channels" : 12, "stride" : 2},
    {"num_blocks" : 3, "out_channels" : 24, "stride" : 2}
]

model = GResNet(
    group = P4m,
    num_classes=10, 
    block_kwargs=block_kwargs,
    img_channels=3,
    in_channels=6
)

print(f"Model has {sum(param.numel() for param in model.parameters())} parameters")

Model has 302080 parameters


In [27]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

model = model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=2e-3)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=100, eta_min=1e-5)

In [None]:
NUM_EPOCHS = 50

history = train(
    model,
    dataloaders={"train" : train_loader, "val" : val_loader},
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    num_epochs=NUM_EPOCHS, 
    device='cuda'
)

Epoch 0/49
----------




train Loss: 1.7122 Acc: 0.3539
  val Loss: 1.5372 Acc: 0.4338
Epoch 1/49
----------
train Loss: 1.3592 Acc: 0.5003
  val Loss: 1.4672 Acc: 0.4897
Epoch 2/49
----------
train Loss: 1.1659 Acc: 0.5797
  val Loss: 1.2072 Acc: 0.5736
Epoch 3/49
----------
train Loss: 1.0335 Acc: 0.6328
  val Loss: 1.0988 Acc: 0.5946
Epoch 4/49
----------
train Loss: 0.9546 Acc: 0.6609
  val Loss: 0.9992 Acc: 0.6414
Epoch 5/49
----------
train Loss: 0.8996 Acc: 0.6815
  val Loss: 0.9008 Acc: 0.6776
Epoch 6/49
----------
train Loss: 0.8424 Acc: 0.7041
  val Loss: 1.0829 Acc: 0.6191
Epoch 7/49
----------
train Loss: 0.8078 Acc: 0.7171
  val Loss: 0.8181 Acc: 0.7093
Epoch 8/49
----------
train Loss: 0.7712 Acc: 0.7283
  val Loss: 0.8232 Acc: 0.7141
Epoch 9/49
----------
train Loss: 0.7478 Acc: 0.7384
  val Loss: 0.8379 Acc: 0.7121
Epoch 10/49
----------
train Loss: 0.7143 Acc: 0.7486
  val Loss: 0.8844 Acc: 0.6942
Epoch 11/49
----------
train Loss: 0.6967 Acc: 0.7551
  val Loss: 0.7231 Acc: 0.7453
Epoch 12/49
