In [2]:
from torchsummary import summary

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data import sampler

import torchvision.datasets as dset
import torchvision.transforms as T
from torchvision import models
from torchvision import utils

import numpy as np

In [3]:
USE_GPU = True
dtype = torch.float32 # We will be using float throughout this tutorial.

if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

# Constant to control how frequently we print train loss.
print_every = 100
print('using device:', device)

using device: cuda


In [5]:
NUM_TRAIN = 49000

transform_train = T.Compose([
                      # T.RandomResizedCrop(32),
                      # T.RandomHorizontalFlip(),
                      # T.Resize(256), T.CenterCrop(224),
                      # T.AutoAugment(policy=T.AutoAugmentPolicy.CIFAR10),
                      T.RandomCrop(32, padding=4),
                      T.RandomHorizontalFlip(),
                      T.ToTensor(), 
                      T.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
                      ])

transform_test = T.Compose([
                      T.ToTensor(), 
                      T.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
                      ])

cifar10_train = dset.CIFAR10('./cs231n/datasets', train=True, download=True,
                             transform=transform_train)
loader_train = DataLoader(cifar10_train, batch_size=64, 
                          sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN)))

cifar10_val = dset.CIFAR10('./cs231n/datasets', train=True, download=True,
                           transform=transform_test)
loader_val = DataLoader(cifar10_val, batch_size=64, 
                        sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN, 50000)))

cifar10_test = dset.CIFAR10('./cs231n/datasets', train=False, download=True, 
                            transform=transform_test)
loader_test = DataLoader(cifar10_test, batch_size=64)

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


In [6]:
!pip install torchsummary



In [7]:
def check_accuracy(loader, model):
    if loader.dataset.train:
        print('Checking accuracy on validation set')
    else:
        print('Checking accuracy on test set')   
    num_correct = 0
    num_samples = 0
    model.eval()  # set model to evaluation mode
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)
            scores = model(x)
            _, preds = scores.max(1)
            num_correct += (preds == y).sum()
            num_samples += preds.size(0)
        acc = float(num_correct) / num_samples
        print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))
        return acc

In [8]:
def train(model, optimizer, scheduler, epochs=1):
    model = model.to(device=device)
    for e in range(epochs):
        print(f"--------------------  Epoch: {e+1}  --------------------")
        for t, (x, y) in enumerate(loader_train):
            model.train()  # put model to training mode
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)

            scores = model(x)
            loss = F.cross_entropy(scores, y)

            optimizer.zero_grad()
            loss.backward()
            # Gradient clipping
            if grad_clip: 
                nn.utils.clip_grad_value_(model.parameters(), grad_clip)
            optimizer.step()

            if t % print_every == 0:
                print('Epoch %d' % (e+1))
                print('Iteration %d, loss = %.4f' % (t, loss.item()))
                val_acc = check_accuracy(loader_val, model)
                print()

        scheduler.step()

In [9]:
model = models.resnet50(pretrained=True)
num_features = model.fc.in_features
model.fc = torch.nn.Linear(num_features, 10)
model.conv1 = nn.Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
model.maxpool = nn.Identity()
model = model.to(device)
print(model.conv1)
summary(model, (3, 32, 32))

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth


  0%|          | 0.00/97.8M [00:00<?, ?B/s]

Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 32, 32]           1,728
       BatchNorm2d-2           [-1, 64, 32, 32]             128
              ReLU-3           [-1, 64, 32, 32]               0
          Identity-4           [-1, 64, 32, 32]               0
            Conv2d-5           [-1, 64, 32, 32]           4,096
       BatchNorm2d-6           [-1, 64, 32, 32]             128
              ReLU-7           [-1, 64, 32, 32]               0
            Conv2d-8           [-1, 64, 32, 32]          36,864
       BatchNorm2d-9           [-1, 64, 32, 32]             128
             ReLU-10           [-1, 64, 32, 32]               0
           Conv2d-11          [-1, 256, 32, 32]          16,384
      BatchNorm2d-12          [-1, 256, 32, 32]             512
           Conv2d-13      

## hyperparameter

In [10]:
epochs = 50
max_lr = 0.01
grad_clip = 0.1
weight_decay = 1e-4

## optimizer

In [11]:
learning_rate = 1e-2
momentum=0.9

optimizer = torch.optim.Adam(model.parameters(), max_lr, weight_decay=weight_decay)

## scheduler

In [13]:
scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr, epochs=epochs, 
                                                steps_per_epoch=len(loader_train))
print(optimizer)

Adam (
Parameter Group 0
    amsgrad: False
    base_momentum: 0.85
    betas: (0.95, 0.999)
    eps: 1e-08
    initial_lr: 0.0004
    lr: 0.0003999999999999993
    max_lr: 0.01
    max_momentum: 0.95
    min_lr: 4e-08
    weight_decay: 0.0001
)


In [None]:
train(model, optimizer, scheduler, epochs=epochs)

--------------------  Epoch: 1  --------------------
Epoch 1
Iteration 0, loss = 2.3723
Checking accuracy on validation set
Got 138 / 1000 correct (13.80)

Epoch 1
Iteration 100, loss = 1.2447
Checking accuracy on validation set
Got 624 / 1000 correct (62.40)

Epoch 1
Iteration 200, loss = 1.0124
Checking accuracy on validation set
Got 753 / 1000 correct (75.30)

Epoch 1
Iteration 300, loss = 0.9330
Checking accuracy on validation set
Got 780 / 1000 correct (78.00)

Epoch 1
Iteration 400, loss = 0.8002
Checking accuracy on validation set
Got 788 / 1000 correct (78.80)

Epoch 1
Iteration 500, loss = 0.3745
Checking accuracy on validation set
Got 815 / 1000 correct (81.50)

Epoch 1
Iteration 600, loss = 0.6838
Checking accuracy on validation set
Got 812 / 1000 correct (81.20)

Epoch 1
Iteration 700, loss = 0.4303
Checking accuracy on validation set
Got 823 / 1000 correct (82.30)

--------------------  Epoch: 2  --------------------
Epoch 2
Iteration 0, loss = 0.5229
Checking accuracy on 

In [None]:
best_model = model
test_acc = check_accuracy(loader_test, best_model)