In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F  # useful stateless functions
from torch.utils.data import DataLoader
from torch.utils.data import sampler

import torchvision.datasets as dset
import torchvision.transforms as T

import numpy as np

USE_GPU = True

dtype = torch.float32 # we will be using float throughout this tutorial

if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

# Constant to control how frequently we print train loss
print_every = 100

print('using device:', device)

using device: cuda


In [2]:
NUM_TRAIN = 49000

transform = T.Compose([
                T.ToTensor(),
                T.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
            ])

cifar10_train = dset.CIFAR10('./cs231n/datasets', train=True, download=True,
                             transform=transform)
loader_train = DataLoader(cifar10_train, batch_size=64, 
                          sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN)))

cifar10_val = dset.CIFAR10('./cs231n/datasets', train=True, download=True,
                           transform=transform)
loader_val = DataLoader(cifar10_val, batch_size=64, 
                        sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN, 50000)))

cifar10_test = dset.CIFAR10('./cs231n/datasets', train=False, download=True, 
                            transform=transform)
loader_test = DataLoader(cifar10_test, batch_size=64)

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


In [3]:
def flatten(x):
    N = x.shape[0] # read in N, C, H, W
    return x.view(N, -1)  # "flatten" the C * H * W values into a single vector per image

def check_accuracy_part34(loader, model, verbose=True):
    if verbose:
        if loader.dataset.train:
            print('Checking accuracy on validation set')
        else:
            print('Checking accuracy on test set')   
    num_correct = 0
    num_samples = 0
    model.eval()  # set model to evaluation mode
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)
            scores = model(x)
            _, preds = scores.max(1)
            num_correct += (preds == y).sum()
            num_samples += preds.size(0)
        acc = float(num_correct) / num_samples
        if verbose:
            print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))
        return 100*acc
        
def train_part34(model, optimizer, epochs=1, verbose=True):
    best_model = None
    best_acc = 0
    
    model = model.to(device=device)  # move the model parameters to CPU/GPU
    
    for e in range(epochs):
        for t, (x, y) in enumerate(loader_train):
            model.train()  # put model to training mode
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)

            scores = model(x)
            
            loss = F.cross_entropy(scores, y)
            
            optimizer.zero_grad()
            
            loss.backward()
            optimizer.step()
            
            if t % print_every == 0:
                cur_acc = check_accuracy_part34(loader_val, model, verbose)
                if verbose:
                    print('epochs %d: Iteration %d, loss = %.4f' % (e, t, loss.item()))
                    print()
                if cur_acc > best_acc:
                    best_model = model
                    best_acc = cur_acc
    best_model = best_model.to('cpu')
    return best_model, best_acc

In [4]:
class MyModel(nn.Module):
    def __init__(self, in_channel, channel_1, channel_2, num_classes):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channel, channel_1, (5,5), padding=2)
        self.batchnorm1 = nn.BatchNorm2d(channel_1)
        self.maxpool1 = nn.MaxPool2d((2,2))
        self.conv2 = nn.Conv2d(channel_1, channel_2, (3,3))
        self.batchnorm2 = nn.BatchNorm2d(channel_2)
        self.maxpool2 = nn.MaxPool2d((2,2))
        self.fc1 = nn.Linear(7*7*channel_2, 100)
        self.fc2 = nn.Linear(100, num_classes)


    def forward(self, x):
        x = self.conv1(x)
        x = self.batchnorm1(x)
        x = F.leaky_relu(x)
        x = F.relu(x)
        x = self.maxpool1(x)
        x = self.conv2(x)
        x = self.batchnorm2(x)
        x = F.leaky_relu(x)
        x = self.maxpool2(x)
        x = flatten(x)
#         x = F.relu(F.dropout(self.fc1(x), p=0.5))
#         x = F.relu(self.fc1(x))
        x = F.leaky_relu(F.dropout(self.fc1(x), p=0.5))
        x = self.fc2(x)
        # print(x.shape)
        scores = x

        return scores

In [5]:
# learning_rate = 1e-4, 1e-2
best_acc = 0
best_model = None
for reg in [1e-2, 1e-3, 1e-4, 1e-5]:
    for learning_rate in [1e-2, 1e-3, 1e-4, 1e-5]:
        model = MyModel(in_channel=3, channel_1=64, channel_2=32, num_classes=10)
        optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, nesterov=True, weight_decay=reg)
        # optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=reg)
        cur_model, cur_acc = train_part34(model, optimizer, epochs=10, verbose=False)
        print('reg=%.4f, learning_rate=%.4f, cur_acc=%.4f' % (reg, learning_rate, cur_acc))

reg=0.0100, learning_rate=0.0100, cur_acc=64.2000
reg=0.0100, learning_rate=0.0010, cur_acc=68.3000
reg=0.0100, learning_rate=0.0001, cur_acc=55.3000
reg=0.0100, learning_rate=0.0000, cur_acc=31.6000
reg=0.0010, learning_rate=0.0100, cur_acc=71.0000
reg=0.0010, learning_rate=0.0010, cur_acc=67.8000
reg=0.0010, learning_rate=0.0001, cur_acc=54.9000
reg=0.0010, learning_rate=0.0000, cur_acc=31.4000
reg=0.0001, learning_rate=0.0100, cur_acc=71.3000
reg=0.0001, learning_rate=0.0010, cur_acc=70.4000
reg=0.0001, learning_rate=0.0001, cur_acc=54.9000
reg=0.0001, learning_rate=0.0000, cur_acc=32.7000
reg=0.0000, learning_rate=0.0100, cur_acc=71.0000
reg=0.0000, learning_rate=0.0010, cur_acc=68.2000
reg=0.0000, learning_rate=0.0001, cur_acc=52.8000
reg=0.0000, learning_rate=0.0000, cur_acc=31.1000


In [9]:
for reg in [2e-3, 1e-3, 5e-4]:
    for learning_rate in [2e-2, 1e-2, 5e-3]:
        model = MyModel(in_channel=3, channel_1=64, channel_2=32, num_classes=10)
        optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, nesterov=True, weight_decay=reg)
        # optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=reg)
        cur_model, cur_acc = train_part34(model, optimizer, epochs=10, verbose=False)
        print('reg=%.4f, learning_rate=%.4f, cur_acc=%.4f' % (reg, learning_rate, cur_acc))
        if cur_acc > best_acc:
            best_acc = cur_acc
            best_model = cur_model

reg=0.0020, learning_rate=0.0200, cur_acc=69.4000
reg=0.0020, learning_rate=0.0100, cur_acc=70.2000
reg=0.0020, learning_rate=0.0050, cur_acc=70.0000
reg=0.0010, learning_rate=0.0200, cur_acc=69.4000
reg=0.0010, learning_rate=0.0100, cur_acc=70.4000
reg=0.0010, learning_rate=0.0050, cur_acc=71.4000
reg=0.0005, learning_rate=0.0200, cur_acc=70.0000
reg=0.0005, learning_rate=0.0100, cur_acc=70.2000
reg=0.0005, learning_rate=0.0050, cur_acc=71.3000


## Test set -- run this only once

Now that we've gotten a result we're happy with, we test our final model on the test set (which you should store in best_model). Think about how this compares to your validation set accuracy.

In [10]:
# best_model = model
best_model = best_model.to(device=device)
check_accuracy_part34(loader_test, best_model)

Checking accuracy on test set
Got 7012 / 10000 correct (70.12)


70.12