# Train CIFAR with the `policy` module

In [1]:
import torch

In [2]:
cuda_device = 0
device = torch.device("cuda:%d" % cuda_device if torch.cuda.is_available() else "cpu")

# Load the data

In [3]:
from torchvision import transforms

_mean = [0.485, 0.456, 0.406]
_std = [0.229, 0.224, 0.225]


train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(.3, .3, .3),
    transforms.ToTensor(),
    transforms.Normalize(_mean, _std),
])
val_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(_mean, _std),
])

In [4]:
import torchvision.datasets as datasets

root = "data"
train_ds = datasets.CIFAR10(root, train=True, transform=train_transform, download=True)
val_ds = datasets.CIFAR10(root, train=False, transform=val_transform, download=True)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/cifar-10-python.tar.gz
Files already downloaded and verified


In [5]:
from torch.utils.data import DataLoader

BATCH_SIZE = 1024

train_dl = DataLoader(
    train_ds,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=8,
)
val_dl = DataLoader(
    val_ds,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=8,
)

# The model
We'll train a simple resnet18 network.
This takes a while without GPU but is pretty quick with GPU.

In [6]:
from torchvision.models import resnet18
import torch.nn as nn
import torch.optim as optim


def get_module():
    model = resnet18(pretrained=False)
    model.avgpool = nn.AdaptiveAvgPool2d(1)
    model.fc = nn.Linear(512, 10)
    return model

In [7]:
epochs = 5

# Training without the `policies` module

In [8]:
from poutyne.framework import Model

In [9]:
pytorch_network = get_module().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(pytorch_network.parameters(), lr=0.01)

model = Model(
    pytorch_network,
    optimizer,
    criterion,
    batch_metrics=["acc"],
)
model = model.to(device)

history = model.fit_generator(
    train_dl,
    val_dl,
    epochs=epochs,
)

Epoch 1/5 8.24s Step 49/49: loss: 2.092977, acc: 23.660000, val_loss: 1.841767, val_acc: 33.580000
Epoch 2/5 8.14s Step 49/49: loss: 1.772642, acc: 35.740000, val_loss: 1.643668, val_acc: 40.350000
Epoch 3/5 8.25s Step 49/49: loss: 1.631970, acc: 40.464000, val_loss: 1.557429, val_acc: 43.020000
Epoch 4/5 8.23s Step 49/49: loss: 1.549494, acc: 43.436000, val_loss: 1.501173, val_acc: 45.340000
Epoch 5/5 8.09s Step 49/49: loss: 1.485391, acc: 46.050000, val_loss: 1.441768, val_acc: 47.470000


# Training with the `policies` module

In [10]:
steps_per_epoch = len(train_dl)
steps_per_epoch

49

In [11]:
from poutyne.framework import OptimizerPolicy, one_cycle_phases


pytorch_network = get_module().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(pytorch_network.parameters(), lr=0.01)

model = Model(
    pytorch_network,
    optimizer,
    criterion,
    metrics=["acc"],
)
model = model.to(device)

policy = OptimizerPolicy(
    one_cycle_phases(epochs * steps_per_epoch, lr=(0.01, 0.1, 0.008)),
)
history = model.fit_generator(
    train_dl,
    val_dl,
    epochs=epochs,
    callbacks=[policy],
)

Epoch 1/5 8.34s Step 49/49: loss: 1.860783, acc: 32.882000, val_loss: 1.894520, val_acc: 39.270000
Epoch 2/5 8.42s Step 49/49: loss: 1.426773, acc: 48.764000, val_loss: 1.236555, val_acc: 55.940000
Epoch 3/5 8.42s Step 49/49: loss: 1.187993, acc: 57.988000, val_loss: 1.105385, val_acc: 60.670000
Epoch 4/5 8.19s Step 49/49: loss: 1.005736, acc: 64.486000, val_loss: 0.972105, val_acc: 65.840000
Epoch 5/5 8.28s Step 49/49: loss: 0.847678, acc: 70.214000, val_loss: 0.909907, val_acc: 67.970000
