In [1]:
import torch
#assert '.'.join(torch.__version__.split('.')[:2]) == '1.4'
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data import sampler

import torchvision.datasets as dset
import torchvision.transforms as T

import numpy as np
import os
# the fourth v100 GPU
os.environ['CUDA_VISIBLE_DEVICES'] = '3'
device = 'cuda' if torch.cuda.is_available() else 'cpu'

print_every = 100

In [2]:
NUM_TRAIN = 49000

# The torchvision.transforms package provides tools for preprocessing data
# and for performing data augmentation; here we set up a transform to
# preprocess the data by subtracting the mean RGB value and dividing by the
# standard deviation of each RGB value; we've hardcoded the mean and std.
transform = T.Compose([
                T.ToTensor(),
                T.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
            ])

# We set up a Dataset object for each split (train / val / test); Datasets load
# training examples one at a time, so we wrap each Dataset in a DataLoader which
# iterates through the Dataset and forms minibatches. We divide the CIFAR-10
# training set into train and val sets by passing a Sampler object to the
# DataLoader telling how it should sample from the underlying Dataset.
cifar10_train = dset.CIFAR10('./cs231n/datasets', train=True, download=True,
                             transform=transform)
loader_train = DataLoader(cifar10_train, batch_size=64, 
                          sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN)))

cifar10_val = dset.CIFAR10('./cs231n/datasets', train=True, download=True,
                           transform=transform)
loader_val = DataLoader(cifar10_val, batch_size=64, 
                        sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN, 50000)))

cifar10_test = dset.CIFAR10('./cs231n/datasets', train=False, download=True, 
                            transform=transform)
loader_test = DataLoader(cifar10_test, batch_size=64)

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


In [3]:
def check_accuracy_part34(loader, model):
    if loader.dataset.train:
        print('Checking accuracy on validation set')
    else:
        print('Checking accuracy on test set')   
    num_correct = 0
    num_samples = 0
    model.eval()  # set model to evaluation mode
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device, dtype=torch.float)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)
            scores = model(x)
            _, preds = scores.max(1)
            num_correct += (preds == y).sum()
            num_samples += preds.size(0)
        acc = float(num_correct) / num_samples
        print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))

In [4]:
def train_part34(model, optimizer, epochs=1):
    """
    Train a model on CIFAR-10 using the PyTorch Module API.
    
    Inputs:
    - model: A PyTorch Module giving the model to train.
    - optimizer: An Optimizer object we will use to train the model
    - epochs: (Optional) A Python integer giving the number of epochs to train for
    
    Returns: Nothing, but prints model accuracies during training.
    """
    model = model.to(device=device)  # move the model parameters to CPU/GPU
    for e in range(epochs):
        for t, (x, y) in enumerate(loader_train):
            model.train()  # put model to training mode
            x = x.to(device=device, dtype=torch.float)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)

            scores = model(x)
            loss = F.cross_entropy(scores, y)

            # Zero out all of the gradients for the variables which the optimizer
            # will update.
            optimizer.zero_grad()

            # This is the backwards pass: compute the gradient of the loss with
            # respect to each  parameter of the model.
            loss.backward()

            # Actually update the parameters of the model using the gradients
            # computed by the backwards pass.
            optimizer.step()

            if t % print_every == 0:
                print('Iteration %d, loss = %.4f' % (t, loss.item()))
                check_accuracy_part34(loader_val, model)
                print()

In [6]:
################################################################################
# TODO:                                                                        #         
# Experiment with any architectures, optimizers, and hyperparameters.          #
# Achieve AT LEAST 70% accuracy on the *validation set* within 10 epochs.      #
#                                                                              #
# Note that you can use the check_accuracy function to evaluate on either      #
# the test set or the validation set, by passing either loader_test or         #
# loader_val as the second argument to check_accuracy. You should not touch    #
# the test set until you have finished your architecture and  hyperparameter   #
# tuning, and only run the test set once at the end to report a final value.   #
################################################################################
model = None
optimizer = None

# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

import torch.nn.functional as F

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3,
                               stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.shortcut = nn.Sequential()

        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class Bottleneck(nn.Module):

    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion*planes,
                               kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion*planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out


def ResNet18():
    return ResNet(BasicBlock, [2,2,2,2])

def ResNet34():
    return ResNet(BasicBlock, [3,4,6,3])

def ResNet50():
    return ResNet(Bottleneck, [3,4,6,3])

def ResNet101():
    return ResNet(Bottleneck, [3,4,23,3])

def ResNet152():
    return ResNet(Bottleneck, [3,8,36,3])

# model = ResNet34()
# optimizer = optim.Adam(model.parameters(), lr=1e-3)

# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
################################################################################
#                                 END OF YOUR CODE                             
################################################################################

# You should get at least 70% accuracy
# train_part34(model, optimizer, epochs=10)

In [7]:
# test which ResNet has the highest accuracy

# ResNet18
print('Training ResNet18...')
model = ResNet18()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
train_part34(model, optimizer, epochs=10)
print('ResNet18 has been trained')

# ResNet34
print('Training ResNet34...')
model = ResNet34()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
train_part34(model, optimizer, epochs=10)
print('ResNet34 has been trained')

# ResNet50
print('Training ResNet50...')
model = ResNet50()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
train_part34(model, optimizer, epochs=10)
print('ResNet50 has been trained')

# ResNet101
print('Training ResNet101...')
model = ResNet101()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
train_part34(model, optimizer, epochs=10)
print('ResNet101 has been trained')

# ResNet152
print('Training ResNet152...')
model = ResNet152()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
train_part34(model, optimizer, epochs=10)
print('ResNet152 has been trained')

Training ResNet18...
Iteration 0, loss = 2.4753
Checking accuracy on validation set
Got 137 / 1000 correct (13.70)

Iteration 100, loss = 1.5830
Checking accuracy on validation set
Got 347 / 1000 correct (34.70)

Iteration 200, loss = 1.4220
Checking accuracy on validation set
Got 457 / 1000 correct (45.70)

Iteration 300, loss = 1.3352
Checking accuracy on validation set
Got 500 / 1000 correct (50.00)

Iteration 400, loss = 1.1313
Checking accuracy on validation set
Got 539 / 1000 correct (53.90)

Iteration 500, loss = 1.0814
Checking accuracy on validation set
Got 564 / 1000 correct (56.40)

Iteration 600, loss = 1.1486
Checking accuracy on validation set
Got 594 / 1000 correct (59.40)

Iteration 700, loss = 1.3395
Checking accuracy on validation set
Got 606 / 1000 correct (60.60)

Iteration 0, loss = 0.6734
Checking accuracy on validation set
Got 638 / 1000 correct (63.80)

Iteration 100, loss = 1.1218
Checking accuracy on validation set
Got 620 / 1000 correct (62.00)

Iteration 200

Got 467 / 1000 correct (46.70)

Iteration 500, loss = 1.2854
Checking accuracy on validation set
Got 486 / 1000 correct (48.60)

Iteration 600, loss = 1.1649
Checking accuracy on validation set
Got 521 / 1000 correct (52.10)

Iteration 700, loss = 1.2122
Checking accuracy on validation set
Got 587 / 1000 correct (58.70)

Iteration 0, loss = 1.0497
Checking accuracy on validation set
Got 603 / 1000 correct (60.30)

Iteration 100, loss = 1.0906
Checking accuracy on validation set
Got 640 / 1000 correct (64.00)

Iteration 200, loss = 1.1163
Checking accuracy on validation set
Got 605 / 1000 correct (60.50)

Iteration 300, loss = 1.0584
Checking accuracy on validation set
Got 642 / 1000 correct (64.20)

Iteration 400, loss = 0.8926
Checking accuracy on validation set
Got 638 / 1000 correct (63.80)

Iteration 500, loss = 1.0433
Checking accuracy on validation set
Got 660 / 1000 correct (66.00)

Iteration 600, loss = 0.7156
Checking accuracy on validation set
Got 670 / 1000 correct (67.00)



Iteration 100, loss = 0.9851
Checking accuracy on validation set
Got 579 / 1000 correct (57.90)

Iteration 200, loss = 1.1819
Checking accuracy on validation set
Got 633 / 1000 correct (63.30)

Iteration 300, loss = 1.0304
Checking accuracy on validation set
Got 581 / 1000 correct (58.10)

Iteration 400, loss = 0.7835
Checking accuracy on validation set
Got 674 / 1000 correct (67.40)

Iteration 500, loss = 1.0577
Checking accuracy on validation set
Got 589 / 1000 correct (58.90)

Iteration 600, loss = 0.9911
Checking accuracy on validation set
Got 647 / 1000 correct (64.70)

Iteration 700, loss = 0.7584
Checking accuracy on validation set
Got 665 / 1000 correct (66.50)

Iteration 0, loss = 0.7834
Checking accuracy on validation set
Got 713 / 1000 correct (71.30)

Iteration 100, loss = 0.8303
Checking accuracy on validation set
Got 706 / 1000 correct (70.60)

Iteration 200, loss = 0.7497
Checking accuracy on validation set
Got 726 / 1000 correct (72.60)

Iteration 300, loss = 0.8128
Che

Got 448 / 1000 correct (44.80)

Iteration 600, loss = 1.0933
Checking accuracy on validation set
Got 512 / 1000 correct (51.20)

Iteration 700, loss = 1.3337
Checking accuracy on validation set
Got 497 / 1000 correct (49.70)

Iteration 0, loss = 1.2264
Checking accuracy on validation set
Got 498 / 1000 correct (49.80)

Iteration 100, loss = 1.2261
Checking accuracy on validation set
Got 520 / 1000 correct (52.00)

Iteration 200, loss = 1.4569
Checking accuracy on validation set
Got 607 / 1000 correct (60.70)

Iteration 300, loss = 0.9405
Checking accuracy on validation set
Got 610 / 1000 correct (61.00)

Iteration 400, loss = 1.1342
Checking accuracy on validation set
Got 621 / 1000 correct (62.10)

Iteration 500, loss = 1.0395
Checking accuracy on validation set
Got 594 / 1000 correct (59.40)

Iteration 600, loss = 0.9356
Checking accuracy on validation set
Got 606 / 1000 correct (60.60)

Iteration 700, loss = 1.1859
Checking accuracy on validation set
Got 598 / 1000 correct (59.80)



Iteration 200, loss = 1.1682
Checking accuracy on validation set
Got 581 / 1000 correct (58.10)

Iteration 300, loss = 1.0978
Checking accuracy on validation set
Got 615 / 1000 correct (61.50)

Iteration 400, loss = 0.8460
Checking accuracy on validation set
Got 581 / 1000 correct (58.10)

Iteration 500, loss = 0.7462
Checking accuracy on validation set
Got 628 / 1000 correct (62.80)

Iteration 600, loss = 1.0297
Checking accuracy on validation set
Got 654 / 1000 correct (65.40)

Iteration 700, loss = 1.0411
Checking accuracy on validation set
Got 681 / 1000 correct (68.10)

Iteration 0, loss = 0.8302
Checking accuracy on validation set
Got 663 / 1000 correct (66.30)

Iteration 100, loss = 1.1100
Checking accuracy on validation set
Got 681 / 1000 correct (68.10)

Iteration 200, loss = 0.9073
Checking accuracy on validation set
Got 694 / 1000 correct (69.40)

Iteration 300, loss = 1.0113
Checking accuracy on validation set
Got 704 / 1000 correct (70.40)

Iteration 400, loss = 0.6775
Che

In [None]:
# ResNet18: 84.50
# ResNet34: 86.30
# ResNet50: 83.00
# ResNet101: 84.10
# ResNet152: 81.00