In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data import sampler

import torchvision.datasets as dset
import torchvision.transforms as T

import numpy as np

USE_GPU = True
dtype = torch.float32

if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
    
print_every = 100
print('using device:', device)

using device: cpu


In [15]:
import torch.nn.functional as F

In [6]:
NUM_TRAIN = 49000

cifar10_train = dset.CIFAR10('./cs231n/datasets', train = True, 
                             download = True, transform = T.ToTensor())
loader = DataLoader(cifar10_train, batch_size = 64, shuffle = False, 
                    num_workers=2)

mean = 0
std = 0
nb_samples = 0

for data in loader:
    data = data[0]
    batch_samples = data.size(0)
    data = data.view(batch_samples, data.size(1),-1)
    mean += data.mean(2).sum(0)
    std += data.std(2).sum(0)
    nb_samples += batch_samples
    
mean /= nb_samples
std /= nb_samples


transform = T.Compose([
    T.ToTensor(),
    T.Normalize(mean.tolist(),std.tolist())
])

cifar10_train = dset.CIFAR10('./cs231n/datasets', train = True,
                             download= True, transform= transform)
loader_train = DataLoader(cifar10_train, batch_size=64,
                          sampler= sampler.SubsetRandomSampler(range(NUM_TRAIN)))

cifar10_val = dset.CIFAR10('./cs231n/datasets', train = True, download = True,
                           transform=transform)
loader_val = DataLoader(cifar10_val, batch_size=64,
                        sampler = sampler.SubsetRandomSampler(range(NUM_TRAIN, 50000)))

cifar10_test = dset.CIFAR10('./cs231n/datasets', train = False, download = True,
                            transform=transform)
loader_test = DataLoader(cifar10_test,batch_size=64)




Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


In [14]:
def flatten(x):
    N = x.shape[0]
    return x.view(N,-1)

x = torch.arange(12).view(2,1,3,2)
print('After flattening: \n', flatten(x))

After flattening: 
 tensor([[ 0,  1,  2,  3,  4,  5],
        [ 6,  7,  8,  9, 10, 11]])


In [12]:
torch.flatten(x,1,-1)

tensor([[ 0,  1,  2,  3,  4,  5],
        [ 6,  7,  8,  9, 10, 11]])

In [16]:
def two_layer_fc (x, params):
    x = flatten(x)
    w1, w2 = params
    x = F.relu(x.mm(w1))
    x= x.mm(w2)
    return x

In [21]:
def three_layer_convnet(x, params):
    
    conv_w1, conv_b1, conv_w2, conv_b2, fc_w, fc_b = params
    scores = None
    
    conv1 = F.conv2d(x, conv_w1 , conv_b1, padding=2)
    relu1 = F.relu(conv1)
    conv2 = F.conv2d(conv1, conv_w2, conv_b2, padding=1)
    relu2 = F.relu(conv2)
    relu2_flat = torch.flatten(relu2,1,-1)
    scores = relu2_flat.mm(fc_w)+fc_b
    
    return scores

In [22]:
def three_layer_convnet_test():
    x = torch.zeros((64, 3, 32, 32), dtype=dtype)  # minibatch size 64, image size [3, 32, 32]

    conv_w1 = torch.zeros((6, 3, 5, 5), dtype=dtype)  # [out_channel, in_channel, kernel_H, kernel_W]
    conv_b1 = torch.zeros((6,))  # out_channel
    conv_w2 = torch.zeros((9, 6, 3, 3), dtype=dtype)  # [out_channel, in_channel, kernel_H, kernel_W]
    conv_b2 = torch.zeros((9,))  # out_channel

    # you must calculate the shape of the tensor after two conv layers, before the fully-connected layer
    fc_w = torch.zeros((9 * 32 * 32, 10))
    fc_b = torch.zeros(10)

    scores = three_layer_convnet(x, [conv_w1, conv_b1, conv_w2, conv_b2, fc_w, fc_b])
    print(scores.size())  # you should see [64, 10]
three_layer_convnet_test()

torch.Size([64, 10])


In [23]:
def random_weight(shape):
    if len(shape) == 2:
        fan_in = shape[0]
    else:
        fan_in = np.prod(shape[1:])
    w = torch.randn(shape, device = device, dtype=dtype)*np.sqrt(2. / fan_in)
    w.requires_grad  = True
    return w
def zero_weight(shape):
    return torch.zeros(shape, device=device , dtype= dtype, requires_grad=True)

    

In [31]:
def check_accuracy_part2(loader, model_fn, params):
    split = 'val' if loader.dataset.train else 'test'
    print(f'Checking accuracy on the {split} set')
    num_correct, num_samples = 0,0
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device = device, dtype = dtype)
            y = y.to(device = device, dtype = dtype)
            scores = model_fn(x, params)
            _ , preds = torch.max(scores, dim =1)
            num_correct += (preds == y).sum()
            num_samples += preds.size(0)
        acc = float (num_correct)/num_samples
        print('Got %d / %d correct (%.2f%%)' %(num_correct, num_samples, 100* acc))

In [36]:
def train_part2(model_fn, params, learning_rate):
    for t, (x,y) in enumerate(loader_train):
        x = x.to(device = device, dtype= dtype)
        y = y.to(device = device, dtype = torch.int64)
        scores = model_fn(x, params)
        loss = F.cross_entropy(scores,y)
        loss.backward()
        with torch.no_grad():
            for w in params:
                w -= learning_rate * w.grad
                w.grad.zero_()
        if t % print_every == 0:
            print('Iteration %d, loss = %.4f' %(t, loss.item()))
            check_accuracy_part2(loader_val, model_fn, params)
            print()
    


In [37]:
hidden_layer_size = 4000
learning_rate = 1e-2

w1 = random_weight((3 * 32 * 32, hidden_layer_size))
w2 = random_weight((hidden_layer_size,10))

train_part2(two_layer_fc, [w1,w2],learning_rate)

Iteration 0, loss = 4.3042
Checking accuracy on the val set
Got 139 / 1000 correct (13.90%)

Iteration 100, loss = 2.4231
Checking accuracy on the val set
Got 363 / 1000 correct (36.30%)

Iteration 200, loss = 1.7054
Checking accuracy on the val set
Got 318 / 1000 correct (31.80%)

Iteration 300, loss = 1.9891
Checking accuracy on the val set
Got 399 / 1000 correct (39.90%)

Iteration 400, loss = 1.6583
Checking accuracy on the val set
Got 406 / 1000 correct (40.60%)

Iteration 500, loss = 1.9078
Checking accuracy on the val set
Got 435 / 1000 correct (43.50%)

Iteration 600, loss = 1.8850
Checking accuracy on the val set
Got 456 / 1000 correct (45.60%)

Iteration 700, loss = 1.8756
Checking accuracy on the val set
Got 451 / 1000 correct (45.10%)



In [38]:
train_part2(two_layer_fc, [w1,w2],learning_rate)

Iteration 0, loss = 1.2595
Checking accuracy on the val set
Got 437 / 1000 correct (43.70%)

Iteration 100, loss = 1.4550
Checking accuracy on the val set
Got 480 / 1000 correct (48.00%)

Iteration 200, loss = 1.3386
Checking accuracy on the val set
Got 469 / 1000 correct (46.90%)

Iteration 300, loss = 1.1238
Checking accuracy on the val set
Got 463 / 1000 correct (46.30%)

Iteration 400, loss = 1.5193
Checking accuracy on the val set
Got 474 / 1000 correct (47.40%)

Iteration 500, loss = 1.5016
Checking accuracy on the val set
Got 456 / 1000 correct (45.60%)

Iteration 600, loss = 1.3309
Checking accuracy on the val set
Got 480 / 1000 correct (48.00%)

Iteration 700, loss = 1.3966
Checking accuracy on the val set
Got 466 / 1000 correct (46.60%)



In [40]:
learning_rate = 3e-3

channel_1 = 32
channel_2 = 16

conv_w1 = None
conv_b1 = None
conv_w2 = None
conv_b2 = None
fc_w = None
fc_b = None

conv_w1 = random_weight((channel_1, 3, 5, 5))
conv_b1 = zero_weight((channel_1,))
conv_w2 = random_weight((channel_2, channel_1, 3,3))
conv_b2 = zero_weight((channel_2))
fc_w = random_weight((channel_2 * 32 * 32, 10))
fc_b = zero_weight((10,))

params = [conv_w1, conv_b1, conv_w2, conv_b2, fc_w, fc_b]
train_part2(three_layer_convnet, params, learning_rate)

Iteration 0, loss = 3.5002
Checking accuracy on the val set
Got 122 / 1000 correct (12.20%)

Iteration 100, loss = 2.1151
Checking accuracy on the val set
Got 385 / 1000 correct (38.50%)

Iteration 200, loss = 1.8424
Checking accuracy on the val set
Got 405 / 1000 correct (40.50%)

Iteration 300, loss = 1.7020
Checking accuracy on the val set
Got 455 / 1000 correct (45.50%)

Iteration 400, loss = 1.4370
Checking accuracy on the val set
Got 468 / 1000 correct (46.80%)

Iteration 500, loss = 1.8262
Checking accuracy on the val set
Got 448 / 1000 correct (44.80%)

Iteration 600, loss = 1.3755
Checking accuracy on the val set
Got 463 / 1000 correct (46.30%)

Iteration 700, loss = 1.4467
Checking accuracy on the val set
Got 501 / 1000 correct (50.10%)



In [42]:
class TwoLayerFC(nn.Module):
    def __init__ (self, input_size, hidden_size, num_classes):
        super().__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        nn.init.kaiming_normal_(self.fc1.weight)
        self.fc2 = nn.Linear(hidden_size , num_classes)
        nn.init.kaiming_normal_(self.fc2.weight)
        
    def forward(self,x):
        x = flatten(x)
        scores = self.fc2(F.relu(self.fc1(x)))
        return scores


        

In [43]:
class ThreeLayerConvNet(nn.Module):
    def __init__(self, in_channel, channel_1, channel_2, num_classes):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channel, channel_1, kernel_size=5, padding=2)
        nn.init.kaiming_normal_(self.conv1.weight)
        self.conv2 = nn.Conv2d(channel_1, channel_2, kernel_size=3, padding=1)
        nn.init.kaiming_normal_(self.conv2.weight)
        self.fc = nn.Linear(channel_2 * 32 * 32, num_classes)
        nn.init.kaiming_normal_(self.fc.weight)
    
    def forward(self,x):
        x = F.relu(self.conv2(F.relu(self.conv1(x))))
        x = x.view(x.size(0),-1)
        scores = self.fc(x)
        return scores

In [44]:
def test_ThreeLayerConvNet():
    model = ThreeLayerConvNet(in_channel=3, channel_1=32, channel_2=16, num_classes=10)
    x = torch.randn(64, 3, 32, 32)  # minibatch size 64, 3 channels (RGB), 32x32 images
    scores = model(x)
    print(scores.size())  # you should see [64, 10]

test_ThreeLayerConvNet()

torch.Size([64, 10])


In [77]:
def check_accuracy_part34(loader, model):
    split = 'val' if loader.dataset.train else 'test'
    print('Checking accuracy on the %s set' % split)
    num_correct, num_samples = 0,0
    model.eval()
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device = device, dtype= dtype)
            y = y.to(device = device, dtype = torch.long)
            
            scores = model(x)
            _, preds = scores.max(1)
            num_correct += (preds==y).sum()
            num_samples += preds.size(0)
        acc = float(num_correct)/num_samples
        print('Got %d / %d correct %.2f%%' %(num_correct,num_samples, 100*acc))

In [81]:
def train_part34(model, optimizer, epochs =1):
    model = model.to(device = device)
    for e in range(epochs):
        for t,(x,y)in enumerate(loader_train):
            model.train()
            x = x.to(device= device, dtype= dtype)
            y = y.to(device= device, dtype = torch.long)
            
            scores = model(x)
            loss = F.cross_entropy(scores,y)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            if t % print_every == 0:
                print('Iteration %d, loss = %.4f' %(t, loss.item()))
                check_accuracy_part34(loader_val,model)
                print()            

In [66]:
hidden_layer_size = 4000
learning_rate = 1e-2
model = TwoLayerFC(3 *32 *32, hidden_layer_size, 10)
optimizer = optim.SGD(model.parameters(), lr = learning_rate)
train_part34(model, optimizer)


Iteration 0, loss = 3.3899
Checking accuracy on the val set
Got 143 / 1000 correct 14.30%

Iteration 100, loss = 2.0971
Checking accuracy on the val set
Got 354 / 1000 correct 35.40%

Iteration 200, loss = 1.9965
Checking accuracy on the val set
Got 358 / 1000 correct 35.80%

Iteration 300, loss = 2.1168
Checking accuracy on the val set
Got 361 / 1000 correct 36.10%

Iteration 400, loss = 1.7581
Checking accuracy on the val set
Got 371 / 1000 correct 37.10%

Iteration 500, loss = 1.8092
Checking accuracy on the val set
Got 412 / 1000 correct 41.20%

Iteration 600, loss = 1.5403
Checking accuracy on the val set
Got 427 / 1000 correct 42.70%

Iteration 700, loss = 1.5010
Checking accuracy on the val set
Got 444 / 1000 correct 44.40%



In [72]:
train_part34(model, optimizer, epochs=5)

Iteration 0, loss = 0.1101
Checking accuracy on the val set
Got 552 / 1000 correct 55.20%

Iteration 100, loss = 0.0923
Checking accuracy on the val set
Got 542 / 1000 correct 54.20%

Iteration 200, loss = 0.1455
Checking accuracy on the val set
Got 539 / 1000 correct 53.90%

Iteration 300, loss = 0.0727
Checking accuracy on the val set
Got 550 / 1000 correct 55.00%

Iteration 400, loss = 0.0737
Checking accuracy on the val set
Got 561 / 1000 correct 56.10%

Iteration 500, loss = 0.0764
Checking accuracy on the val set
Got 552 / 1000 correct 55.20%

Iteration 600, loss = 0.0771
Checking accuracy on the val set
Got 555 / 1000 correct 55.50%

Iteration 700, loss = 0.0742
Checking accuracy on the val set
Got 550 / 1000 correct 55.00%

Iteration 0, loss = 0.1016
Checking accuracy on the val set
Got 542 / 1000 correct 54.20%

Iteration 100, loss = 0.0550
Checking accuracy on the val set
Got 555 / 1000 correct 55.50%

Iteration 200, loss = 0.0598
Checking accuracy on the val set
Got 556 / 10

In [82]:
learning_rate = 3e-3
channel_1 = 32
channel_2 = 16

model = ThreeLayerConvNet(in_channel=3, channel_1=32, channel_2=16, num_classes=10)
optimizer = optim.SGD(model.parameters(), lr = learning_rate)

train_part34(model, optimizer)



Iteration 0, loss = 2.7688
Checking accuracy on the val set
Got 116 / 1000 correct 11.60%

Iteration 100, loss = 1.7705
Checking accuracy on the val set
Got 343 / 1000 correct 34.30%

Iteration 200, loss = 1.7883
Checking accuracy on the val set
Got 380 / 1000 correct 38.00%

Iteration 300, loss = 1.4381
Checking accuracy on the val set
Got 421 / 1000 correct 42.10%

Iteration 400, loss = 1.3594
Checking accuracy on the val set
Got 443 / 1000 correct 44.30%

Iteration 500, loss = 1.6429
Checking accuracy on the val set
Got 476 / 1000 correct 47.60%

Iteration 600, loss = 1.5164
Checking accuracy on the val set
Got 484 / 1000 correct 48.40%

Iteration 700, loss = 1.5828
Checking accuracy on the val set
Got 483 / 1000 correct 48.30%



In [85]:
class Flatten(nn.Module):
    def forward(self, x):
        return x.view(x.size(0),-1)
    
hidden_layer_size = 4000
learning_rate = 1e-2

model = nn.Sequential(
    Flatten(),
    nn.Linear(3 * 32*32, hidden_layer_size),
    nn.ReLU(),
    nn.Linear(hidden_layer_size,10)
)

optimizer = optim.SGD(model.parameters(), lr = learning_rate,
                      momentum=0.9, nesterov=True)

train_part34(model, optimizer)

    

Iteration 0, loss = 2.3536
Checking accuracy on the val set
Got 169 / 1000 correct 16.90%

Iteration 100, loss = 1.7509
Checking accuracy on the val set
Got 393 / 1000 correct 39.30%

Iteration 200, loss = 1.7583
Checking accuracy on the val set
Got 418 / 1000 correct 41.80%

Iteration 300, loss = 1.9626
Checking accuracy on the val set
Got 418 / 1000 correct 41.80%

Iteration 400, loss = 1.6779
Checking accuracy on the val set
Got 424 / 1000 correct 42.40%

Iteration 500, loss = 2.0461
Checking accuracy on the val set
Got 438 / 1000 correct 43.80%

Iteration 600, loss = 1.8734
Checking accuracy on the val set
Got 445 / 1000 correct 44.50%

Iteration 700, loss = 1.5013
Checking accuracy on the val set
Got 444 / 1000 correct 44.40%



In [87]:
class Flatten(nn.Module):
    def forward(self, x):
        return x.view(x.size(0),-1)
    
channel_1 = 32
channel_2 = 16
learning_rate = 1e-2

model = nn.Sequential(
    nn.Conv2d(in_channels=3, out_channels=channel_1, kernel_size=5, padding=2),
    nn.ReLU(),
    nn.Conv2d(in_channels=channel_1, out_channels=channel_2, kernel_size=3, padding=1),
    nn.ReLU(),
    Flatten(),
    nn.Linear(channel_2 *32 *32,10)
)

optimizer = optim.SGD(model.parameters(), lr = 1e-2, momentum=0.9, nesterov=True)

train_part34(model, optimizer)

Iteration 0, loss = 2.3282
Checking accuracy on the val set
Got 137 / 1000 correct 13.70%

Iteration 100, loss = 1.6690
Checking accuracy on the val set
Got 429 / 1000 correct 42.90%

Iteration 200, loss = 1.5154
Checking accuracy on the val set
Got 468 / 1000 correct 46.80%

Iteration 300, loss = 1.5724
Checking accuracy on the val set
Got 500 / 1000 correct 50.00%

Iteration 400, loss = 1.3488
Checking accuracy on the val set
Got 520 / 1000 correct 52.00%

Iteration 500, loss = 1.2996
Checking accuracy on the val set
Got 533 / 1000 correct 53.30%

Iteration 600, loss = 1.1008
Checking accuracy on the val set
Got 579 / 1000 correct 57.90%

Iteration 700, loss = 1.3165
Checking accuracy on the val set
Got 588 / 1000 correct 58.80%



In [88]:
best_model = model
check_accuracy_part34(loader_test, best_model)

Checking accuracy on the test set
Got 5719 / 10000 correct 57.19%
