In [16]:
import torchvision.transforms as transforms
import torchvision
from torch.utils.data import DataLoader
import torch
import torch.nn as nn
from torchvision.models import resnet18
import pickle
import os

In [4]:
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4), #32x32 -> 40x40 -> 32x32
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(), #PIL to tensor
    transforms.Normalize(#apprently help to converge.
        mean=[0.5071, 0.4867, 0.4408], 
        std=[0.2675, 0.2565, 0.2761]
    ),
])#help the model to be more robust

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.5071, 0.4867, 0.4408],
        std=[0.2675, 0.2565, 0.2761] #necessary to have the same kind of data for evaluation
    ),
])

In [None]:

trainset = torchvision.datasets.CIFAR100(
    root='./data',
    train=True,
    download=True,
    transform=transform_train
)

testset = torchvision.datasets.CIFAR100(
    root='./data',
    train=False,
    download=True,
    transform=transform_test
)

trainloader = DataLoader(trainset, batch_size=512, shuffle=True, num_workers=8, pin_memory=True, persistent_workers=True)
testloader = DataLoader(testset, batch_size=256, shuffle=False, num_workers=8)

Files already downloaded and verified
Files already downloaded and verified


In [None]:
model = resnet18(weights=None)  # pas ImageNet pretrained

#H_out = (H + 2p − k) / s + 1
#Donc sans toucher H_out = (32 + 2x3 - 7) / 2  + 1 = 16 
#Puis avec le max pool on tombe à 8. 
# Très génant car les canaux stocknte des motifs mais pas la position, donc en passant de 32 à 8 on "flouterai" enormement l'image

# Modifie la première couche pour prendre du 32x32
model.conv1 = nn.Conv2d(
    3, 64, kernel_size=3, stride=1, padding=1, bias=False
)
model.maxpool = nn.Identity() #out = 64x32x32 bien mieux que 64x8x8

# Modifier la dernière couche pour avoir 100 classes
model.fc = nn.Linear(512, 100)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device: {device}")
model = model.to(device)
model = torch.compile(model)

Device: cuda


In [11]:
criterion = nn.CrossEntropyLoss()

optimizer = torch.optim.SGD(
    model.parameters(),
    lr=0.1,
    momentum=0.9,
    weight_decay=5e-4
)

scheduler = torch.optim.lr_scheduler.MultiStepLR(
    optimizer,
    milestones=[100, 150],
    gamma=0.1
)

In [None]:
scaler = torch.amp.GradScaler("cuda")
def train_one_epoch(model, loader):
    model.train()
    correct, total, running_loss = 0, 0, 0

    for inputs, targets in loader:
        inputs = inputs.to(device, non_blocking=True)
        targets = targets.to(device, non_blocking=True)

        optimizer.zero_grad()

        with torch.amp.autocast("cuda"):
            outputs = model(inputs)
            loss = criterion(outputs, targets)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()

    acc = 100. * correct / total
    return running_loss / len(loader), acc


In [13]:
@torch.no_grad()
def evaluate(model, loader):
    model.eval()
    correct, total = 0, 0

    for inputs, targets in loader:
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = model(inputs)
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()

    return 100. * correct / total

In [None]:
import pickle

In [None]:
num_epochs = 200
train_accs, test_accs, train_losses = [], [], []

save_every = 20
os.makedirs("checkpoints", exist_ok=True)
torch.backends.cudnn.benchmark = True
for epoch in range(num_epochs):
    train_loss, train_acc = train_one_epoch(model, trainloader)
    test_acc = evaluate(model, testloader)
    scheduler.step()

    train_accs.append(train_acc)
    test_accs.append(test_acc)
    train_losses.append(train_loss)

    #sauvegarde périodique
    if (epoch + 1) % save_every == 0:
        checkpoint = {
            "epoch": epoch + 1,
            "model_state": model.state_dict(),
            "optimizer_state": optimizer.state_dict(),
            "scheduler_state": scheduler.state_dict(),
            "train_losses": train_losses,
            "train_accs": train_accs,
            "test_accs": test_accs,
        }
        torch.save(checkpoint, f"checkpoints/checkpoint_epoch_{epoch+1}.pth")

    print(f"Epoch {epoch:03d} | Train Acc: {train_acc:.2f}% | Test Acc: {test_acc:.2f}%")

torch.save(model.state_dict(), "resnet18_cifar100.pth")

history = {
    "train_losses": train_losses,
    "train_accs": train_accs,
    "test_accs": test_accs
}

with open("training_history.pkl", "wb") as f:
    pickle.dump(history, f)

Epoch 000 | Train Acc: 55.11% | Test Acc: 44.17%
Epoch 001 | Train Acc: 56.96% | Test Acc: 45.73%
Epoch 002 | Train Acc: 58.67% | Test Acc: 53.94%
Epoch 003 | Train Acc: 59.55% | Test Acc: 52.43%
Epoch 004 | Train Acc: 60.94% | Test Acc: 49.49%
Epoch 005 | Train Acc: 61.62% | Test Acc: 51.84%
Epoch 006 | Train Acc: 62.46% | Test Acc: 54.53%
Epoch 007 | Train Acc: 63.31% | Test Acc: 55.51%
Epoch 008 | Train Acc: 63.89% | Test Acc: 52.24%
Epoch 009 | Train Acc: 64.18% | Test Acc: 53.67%
