In [1]:
import torch
from torch import nn
import torchvision
import numpy as np
import torchvision.models as models
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR10

## Data

In [3]:
BS = 1024

transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

trainset = torchvision.datasets.CIFAR10(
    root='./data', train=True, download=True, transform=transform_train)
train_loader = torch.utils.data.DataLoader(
    trainset, batch_size=BS, shuffle=True)

testset = torchvision.datasets.CIFAR10(
    root='./data', train=False, download=True, transform=transform_test)
valid_loader = torch.utils.data.DataLoader(
    testset, batch_size=BS, shuffle=False)

Files already downloaded and verified
Files already downloaded and verified


## Gradient Accumulation

In [5]:
%%time

model = models.resnet18(pretrained=True).cuda(0)
model.fc = torch.nn.Linear(in_features=512, out_features=10).cuda(0)
model.fc.requires_grad_(True)

LR = 0.003
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
cost_function = torch.nn.CrossEntropyLoss()
TRAIN_STEP = len(trainset)/BS
VALID_STEP = len(testset)/BS
EPOCHS = 5 
accumulation_steps = 2 # update weights at 2*Batchsize steps

TRAIN_LOSS = []
VAL_LOSS = []
VAL_ACC = []

for epoch in range(EPOCHS):
  print("Epoch", epoch+1)
  running_loss = 0.0
  model.train()
  for step, (x, y) in enumerate(train_loader):

    x, y = x.cuda(0), y.cuda(0)
    z = model(x)
    loss = cost_function(z, y) / accumulation_steps
    running_loss+=loss.detach()
    loss.backward()
    if (step+1) % accumulation_steps == 0:
        optimizer.step() 
        optimizer.zero_grad()     

  train_l = running_loss/TRAIN_STEP
  TRAIN_LOSS.append(train_l.item())
  correct = 0 
  print("Training loss:", train_l.item())
  running_loss = 0.0

  model.eval()
  for (x,y) in valid_loader:
    
    x, y = x.cuda(0), y.cuda(0)
    z = model(x)
    loss = cost_function(z, y)
    running_loss+=loss.detach()
    _, yhat = torch.max(z,1)
    correct += torch.eq(yhat, y).sum().item()

  valid_l = running_loss/VALID_STEP
  valid_a = (correct / len(testset)) * 100
  print("Valid loss:", valid_l.item())
  VAL_LOSS.append(valid_l)
  print('VAL_Accuracy: %d %%' % valid_a)
  VAL_ACC.append(valid_a)

Epoch 1
Training loss: 0.7538629770278931
Valid loss: 1.1441924571990967
VAL_Accuracy: 63 %
Epoch 2
Training loss: 0.4281767010688782
Valid loss: 1.0574060678482056
VAL_Accuracy: 66 %
Epoch 3
Training loss: 0.34999096393585205
Valid loss: 0.9526497721672058
VAL_Accuracy: 70 %
Epoch 4
Training loss: 0.307162880897522
Valid loss: 0.7776311039924622
VAL_Accuracy: 74 %
Epoch 5
Training loss: 0.2815788686275482
Valid loss: 0.7442079782485962
VAL_Accuracy: 76 %
CPU times: user 6min 10s, sys: 3min 42s, total: 9min 52s
Wall time: 9min 52s
