In [8]:
import torch as t
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data import sampler

import torchvision.datasets as dset
import torchvision.transforms as T

import numpy as np

In [5]:
NUM_TRAIN = 49000
transform = T.Compose([
    T.ToTensor(),
    T.Normalize((0.4914,0.4822,0.4465),(0.2023,0.1994,0.2010))#三个维度归一化
])

cifar10_train = dset.CIFAR10('./cs231n/datasets',train=True,download=False,transform=transform)
loader_train = DataLoader(cifar10_train,batch_size=64,sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN)))

cifar10_val = dset.CIFAR10('./cs231n/datasets',train=True,download=False,transform=transform)
loader_val = DataLoader(cifar10_val,batch_size=64,sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN,50000)))

cifar10_test = dset.CIFAR10('./cs231n/datasets',train=False,download=False,transform=transform)
loader_test = DataLoader(cifar10_test,batch_size=64)

In [9]:
USE_GPU = True
dtype = torch.float32

if USE_GPU and torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device('cpu')

print_every = 100
print('using device:',device)

using device: cuda


In [10]:
def flatten(x):
    N = x.shape[0]
    return x.view(N,-1)

def test_flatten():
    x = torch.arange(12).view(2,1,3,2)
    print('Before flattening: ',x)
    print('After flattening: ',flatten(x))

test_flatten()

Before flattening:  tensor([[[[ 0,  1],
          [ 2,  3],
          [ 4,  5]]],


        [[[ 6,  7],
          [ 8,  9],
          [10, 11]]]])
After flattening:  tensor([[ 0,  1,  2,  3,  4,  5],
        [ 6,  7,  8,  9, 10, 11]])


In [11]:
import torch.nn.functional as F

def two_layer_fc(x,params):
    x = flatten(x)
    w1,w2 = params
    x = F.relu(x.mm(w1))
    x = x.mm(w2)
    return x

def two_layer_fc_test():
    hidden_layer_size = 42
    x = torch.zeros((64,50),dtype=dtype)
    w1 = torch.zeros((50,hidden_layer_size),dtype=dtype)
    w2 = torch.zeros((hidden_layer_size,10),dtype=dtype)
    scores = two_layer_fc(x,[w1,w2])
    print(scores.size())

two_layer_fc_test()

torch.Size([64, 10])


In [59]:
#三层卷积网络
def three_layer_convnet(x,params):
    conv_w1,conv_b1,conv_w2,conv_b2,fc_w,fc_b = params
    out1 = F.conv2d(x,conv_w1,bias=conv_b1,stride=1,padding=(2,2))
    relu1 = F.relu(out1)
    out2 = F.conv2d(relu1,conv_w2,bias=conv_b2,stride=1,padding=(1,1))
    relu2 = F.relu(out2)
    scores = t.mm(flatten(relu2),fc_w) + fc_b
    return scores

In [60]:
def three_layer_convnet_test():
    x = t.zeros((64,3,32,32),dtype=dtype)
    conv_w1 = t.zeros((6,3,5,5),dtype=dtype)
    conv_b1 = t.zeros((6,))
    conv_w2 = t.zeros((9,6,3,3),dtype=dtype)
    conv_b2 = t.zeros((9,))
    
    fc_w = t.zeros((9*32*32,10))
    fc_b = t.zeros(10)
    
    scores = three_layer_convnet(x,[conv_w1,conv_b1,conv_w2,conv_b2,fc_w,fc_b ])
    print(scores.size())
three_layer_convnet_test()

torch.Size([64, 10])


In [12]:
def random_weight(shape):
    if len(shape) == 2:
        fan_in = shape[0]
    else:
        fan_in = np.prod(shape[1:])
    w = torch.randn(shape,device=device,dtype=dtype)*np.sqrt(2./fan_in)
    w.requires_grad = True
    return w

def zero_weight(shape):
    return t.zeros(shape,device=device,dtype=dtype,requires_grad=True)

random_weight((3,5))

tensor([[ 0.2425, -1.5738, -0.5799,  0.2961, -0.2024],
        [-0.4393, -1.3839, -0.9638, -1.3439,  0.2409],
        [ 0.2264, -0.5587,  0.3888,  0.3330, -0.9121]], device='cuda:0',
       requires_grad=True)

In [13]:
def check_accuracy_part2(loader, model_fn, params):
    split = 'val' if loader.dataset.train else 'test'
    print('Checking accuracy on the %s set' % split)
    num_correct, num_samples = 0, 0
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device,dtype=dtype)
            y = y.to(device=device,dtype=torch.int64)
            scores = model_fn(x,params)
            _,preds = scores.max(1)
            num_correct += (preds==y).sum()
            num_samples += preds.size(0)
        acc = float(num_correct)/num_samples
        print("Got %d / %d correct (%.2f%%)" % (num_correct,num_samples,100*acc))

In [14]:
def train_part2(model_fn,params,learning_rate):
    for t,(x,y) in enumerate(loader_train):
        x = x.to(device=device,dtype=dtype)
        y = y.to(device=device,dtype=torch.long)
        
        scores = model_fn(x,params)
        loss = F.cross_entropy(scores,y)
        
        loss.backward()
        
        with torch.no_grad():
            for w in params:
                w -= learning_rate*w.grad
                w.grad.zero_()
        
        if t%print_every == 0:
            print("Iteration %d, loss = %.4f"%(t,loss.item()))
            check_accuracy_part2(loader_val,model_fn,params)
            print()
        

In [36]:
hidden_layer_size = 4000
learning_rate = 1e-2

w1 = random_weight((3*32*32,hidden_layer_size))
w2 = random_weight((hidden_layer_size,10))

train_part2(two_layer_fc,[w1,w2],learning_rate)

Iteration 0, loss = 3.6966
Checking accuracy on the val set
Got 149 / 1000 correct (14.90%)

Iteration 100, loss = 3.2287
Checking accuracy on the val set
Got 287 / 1000 correct (28.70%)

Iteration 200, loss = 2.0060
Checking accuracy on the val set
Got 386 / 1000 correct (38.60%)

Iteration 300, loss = 1.9690
Checking accuracy on the val set
Got 427 / 1000 correct (42.70%)

Iteration 400, loss = 2.1167
Checking accuracy on the val set
Got 366 / 1000 correct (36.60%)

Iteration 500, loss = 1.7700
Checking accuracy on the val set
Got 431 / 1000 correct (43.10%)

Iteration 600, loss = 1.9205
Checking accuracy on the val set
Got 439 / 1000 correct (43.90%)

Iteration 700, loss = 1.8867
Checking accuracy on the val set
Got 446 / 1000 correct (44.60%)



In [49]:
learning_rate = 3e-3
channel_1 = 32
channel_2 = 16

conv_w1 = random_weight((32,3,5,5))
conv_b1 = zero_weight((32,))
conv_w2 = random_weight((16,32,3,3))
conv_b2 = zero_weight((16))
fc_w = random_weight((16*32*32,10))
fc_b = zero_weight((10,))

params = [conv_w1,conv_b1,conv_w2,conv_b2,fc_w,fc_b]
train_part2(three_layer_convnet,params,learning_rate)

Iteration 0, loss = 3.5141
Checking accuracy on the val set
Got 142 / 1000 correct (14.20%)

Iteration 100, loss = 1.8537
Checking accuracy on the val set
Got 334 / 1000 correct (33.40%)

Iteration 200, loss = 1.4647
Checking accuracy on the val set
Got 380 / 1000 correct (38.00%)

Iteration 300, loss = 1.7131
Checking accuracy on the val set
Got 430 / 1000 correct (43.00%)

Iteration 400, loss = 1.6919
Checking accuracy on the val set
Got 434 / 1000 correct (43.40%)

Iteration 500, loss = 1.5352
Checking accuracy on the val set
Got 447 / 1000 correct (44.70%)

Iteration 600, loss = 1.4985
Checking accuracy on the val set
Got 470 / 1000 correct (47.00%)

Iteration 700, loss = 1.6987
Checking accuracy on the val set
Got 474 / 1000 correct (47.40%)



In [24]:
class TwoLayerFC(nn.Module):
    def __init__(self,input_size,hidden_size,num_classes):
        super().__init__()
        self.fc1 = nn.Linear(input_size,hidden_size)
        nn.init.kaiming_normal_(self.fc1.weight)
        self.fc2 = nn.Linear(hidden_size,num_classes)
        nn.init.kaiming_normal_(self.fc2.weight)
    def forward(self,x):
        x = flatten(x)
        scores = self.fc2(F.relu(self.fc1(x)))
        return scores
def test_TwoLayerFC():
    input_size = 50
    x = torch.zeros((64,input_size),dtype=dtype)
    model = TwoLayerFC(input_size,42,10)
    scores = model(x)
    print(scores.size())
test_TwoLayerFC()

torch.Size([64, 10])


In [67]:
class ThreeLayerConvNet(nn.Module):
    def __init__(self,in_channel,channel_1,channel_2,num_classes):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channel,channel_1,stride=1,kernel_size=5,padding=2)
        nn.init.kaiming_normal_(self.conv1.weight)
        self.conv2 = nn.Conv2d(channel_1,channel_2,stride=1,kernel_size=3,padding=1)
        nn.init.kaiming_normal_(self.conv2.weight)
        self.fc = nn.Linear(channel_2*32*32,num_classes)
        nn.init.kaiming_normal_(self.fc.weight)
        
    def forward(self,x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = flatten(x)
        x = F.relu(x)
        scores = self.fc(x)
        pass
    
        return scores 

def test_ThreeLayerConvNet():
    x = torch.zeros((64,3,32,32),dtype=dtype)
    model = ThreeLayerConvNet(in_channel=3,channel_1=12,channel_2=8,num_classes=10)
    scores = model(x)
    print(scores.size())
    
test_ThreeLayerConvNet()

torch.Size([64, 10])


In [25]:
def check_accuracy_part34(loader,model):
    if loader.dataset.train:
        print('Checking accuracy on validation set')
    else:
        print('Checking accuracy on test set')
    num_correct = 0
    num_samples = 0
    model.eval()
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device,dtype=dtype)
            y = y.to(device=device,dtype=torch.long)
            scores = model(x)
            _, pre = scores.max(1)
            num_correct += (pre==y).sum()
            num_samples += pre.size(0)
        acc = float(num_correct)/num_samples
        print('Got %d / %d correct(%.2f%%)'%(num_correct,num_samples,100*acc))

In [26]:
def train_part34(model,optimizer,epochs=1):
    model = model.to(device=device)
    for e in range(epochs):
        for t, (x,y) in enumerate(loader_train):
            model.train()
            x = x.to(device=device,dtype=dtype)
            y = y.to(device=device,dtype=torch.long)
            
            scores = model(x)
            print(scores.shape,y.shape)
            loss = F.cross_entropy(scores,y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            if t%print_every==0:
                print('Iteration %d,loss=%.4f'%(t,loss.item()))
                check_accuracy_part34(loader_val,model)
                print()

In [27]:
#训练两层网络
hidden_layer_size = 4000
learning_rate = 1e-2
model = TwoLayerFC(3*32*32,hidden_layer_size,10)
optimizer = optim.SGD(model.parameters(),lr=learning_rate)

train_part34(model,optimizer)

torch.Size([64, 10]) torch.Size([64])
Iteration 0,loss=3.8682
Checking accuracy on validation set
Got 156 / 1000 correct(15.60%)

torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([6

torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([

torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([

torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([

In [74]:
learning_rate = 3e-3
channel_1 = 32
channel_2 = 16
model = ThreeLayerConvNet(3,channel_1,channel_2,10)
optimizer = optim.SGD(model.parameters(),lr=learning_rate)
train_part34(model,optimizer)

Iteration 0,loss=3.3078
Checking accuracy on validation set
Got 112 / 1000 correct(11.20%)

Iteration 100,loss=2.1316
Checking accuracy on validation set
Got 349 / 1000 correct(34.90%)

Iteration 200,loss=1.8675
Checking accuracy on validation set
Got 403 / 1000 correct(40.30%)

Iteration 300,loss=1.6428
Checking accuracy on validation set
Got 414 / 1000 correct(41.40%)

Iteration 400,loss=1.4439
Checking accuracy on validation set
Got 432 / 1000 correct(43.20%)

Iteration 500,loss=1.6245
Checking accuracy on validation set
Got 463 / 1000 correct(46.30%)

Iteration 600,loss=1.4438
Checking accuracy on validation set
Got 477 / 1000 correct(47.70%)

Iteration 700,loss=1.7814
Checking accuracy on validation set
Got 459 / 1000 correct(45.90%)



In [75]:
#序贯模型
class Flatten(nn.Module):
    def forward(self, x):
        return flatten(x)

hidden_layer_size = 4000
learning_rate = 1e-2

model = nn.Sequential(
       Flatten(),
       nn.Linear(3*32*32,hidden_layer_size),
       nn.ReLU(),
       nn.Linear(hidden_layer_size,10),
)

optimizer = optim.SGD(model.parameters(),lr=learning_rate,
                     momentum=0.9,nesterov=True)
train_part34(model,optimizer)

Iteration 0,loss=2.3296
Checking accuracy on validation set
Got 150 / 1000 correct(15.00%)

Iteration 100,loss=1.7277
Checking accuracy on validation set
Got 402 / 1000 correct(40.20%)

Iteration 200,loss=1.6539
Checking accuracy on validation set
Got 419 / 1000 correct(41.90%)

Iteration 300,loss=2.0933
Checking accuracy on validation set
Got 423 / 1000 correct(42.30%)

Iteration 400,loss=1.4858
Checking accuracy on validation set
Got 435 / 1000 correct(43.50%)

Iteration 500,loss=1.2619
Checking accuracy on validation set
Got 414 / 1000 correct(41.40%)

Iteration 600,loss=1.7588
Checking accuracy on validation set
Got 461 / 1000 correct(46.10%)

Iteration 700,loss=1.7702
Checking accuracy on validation set
Got 438 / 1000 correct(43.80%)



In [85]:
class ThreeLayerConvNet2(nn.Module):
    def __init__(self,in_channel,channel1,channel2,num_classes):
        super().__init__()
        self.conv1 = nn.Conv2d(3,32,5,padding=2,bias=True)
        #nn.init.kaiming_normal_(self.conv1.weight)
        #nn.init.constant_(self.conv1.bias,0)
        self.conv2 = nn.Conv2d(32,16,3,padding=1,bias=True)
        #nn.init.kaiming_normal_(self.conv2.weight)
        #nn.init.constant_(self.conv2.bias,0)
        self.fc = nn.Linear(16*32*32,num_classes,bias=True)
        #nn.init.kaiming_normal_(self.fc.weight)
       #nn.init.constant_(self.fc.bias,0)
        self.feature = nn.Sequential(
            self.conv1,
            nn.ReLU(),
            self.conv2,
            nn.ReLU(),
            Flatten(),
            self.fc,
        )
        
    def forward(self,x):
        x = self.feature(x)
        return x

channel_1 = 32
channel_2 = 16
learning_rate = 1e-3
model = ThreeLayerConvNet2(3,channel_1,channel_2,10)
optimizer = optim.SGD(model.parameters(),lr=learning_rate,momentum=0.9,nesterov=True)
train_part34(model,optimizer)

Iteration 0,loss=2.3092
Checking accuracy on validation set
Got 108 / 1000 correct(10.80%)

Iteration 100,loss=1.8043
Checking accuracy on validation set
Got 464 / 1000 correct(46.40%)

Iteration 200,loss=1.5400
Checking accuracy on validation set
Got 488 / 1000 correct(48.80%)

Iteration 300,loss=1.3004
Checking accuracy on validation set
Got 516 / 1000 correct(51.60%)

Iteration 400,loss=1.2861
Checking accuracy on validation set
Got 555 / 1000 correct(55.50%)

Iteration 500,loss=1.1440
Checking accuracy on validation set
Got 555 / 1000 correct(55.50%)

Iteration 600,loss=1.2523
Checking accuracy on validation set
Got 591 / 1000 correct(59.10%)

Iteration 700,loss=1.2331
Checking accuracy on validation set
Got 607 / 1000 correct(60.70%)



In [19]:
class Flatten(nn.Module):
    def forward(self, x):
        return flatten(x)


class ThreeConvNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.feature = nn.Sequential(
            nn.Conv2d(3,32,5,padding=2,bias=True),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(32,64,3,padding=1,bias=True),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(64,128,3,padding=1,bias=True),
            nn.ReLU(),
            nn.MaxPool2d(2),
            Flatten(),
            nn.Linear(128*4*4,20,bias=True),
            nn.Linear(20,10,bias=True),
        )
    def forward(self,x):
        x = self.feature(x)
        return x
model = ThreeConvNetwork()
learning_rate = 1e-3
#学习率不能设置太
optimizer = optim.Adam(model.parameters(),lr=learning_rate)
train_part34(model,optimizer,epochs=10)

Iteration 0,loss=2.3195
Checking accuracy on validation set
Got 115 / 1000 correct(11.50%)

Iteration 100,loss=1.5465
Checking accuracy on validation set
Got 432 / 1000 correct(43.20%)

Iteration 200,loss=1.6579
Checking accuracy on validation set
Got 437 / 1000 correct(43.70%)

Iteration 300,loss=1.3772
Checking accuracy on validation set
Got 520 / 1000 correct(52.00%)

Iteration 400,loss=1.3333
Checking accuracy on validation set
Got 559 / 1000 correct(55.90%)

Iteration 500,loss=1.1405
Checking accuracy on validation set
Got 580 / 1000 correct(58.00%)

Iteration 600,loss=1.4264
Checking accuracy on validation set
Got 610 / 1000 correct(61.00%)

Iteration 700,loss=1.0799
Checking accuracy on validation set
Got 621 / 1000 correct(62.10%)

Iteration 0,loss=1.0310
Checking accuracy on validation set
Got 647 / 1000 correct(64.70%)

Iteration 100,loss=1.0998
Checking accuracy on validation set
Got 670 / 1000 correct(67.00%)

Iteration 200,loss=1.0505
Checking accuracy on validation set
Go

In [20]:
check_accuracy_part34(loader_test,model)

Checking accuracy on test set
Got 7421 / 10000 correct(74.21%)
