In [50]:
import time 
import math 

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader

import torchvision.datasets as dsets
import torchvision.transforms as transforms

In [51]:
BATCH_SIZE = 100

device = torch.device('mps' if torch.backends.mps.is_available() else 'cpu')

In [52]:
mean = [x/255 for x in [125.3, 123.0, 113.9]]
std = [x/255 for x in [63.0, 62.1, 66.7]]
n_train_samples = 5000

# 使用数据增强
train_set = dsets.CIFAR10(root='./data', train=True, transform=transforms.Compose([ 
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
]), download=True)
train_dl = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=6)
train_set.data = train_set.data[:n_train_samples]
train_set.targets = train_set.targets[:n_train_samples]

# 测试集也一样
test_set = dsets.CIFAR10(root='./data', train=False, transform=transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
]), download=True)
test_dl = DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=False, num_workers=6)
    

Files already downloaded and verified
Files already downloaded and verified


In [53]:
# 定义训练的辅助函数
def eval(model,criterion,dataloader):
    model.eval()
    loss, accuracy = 0., 0.
    
    # torch.no_grad()表示在计算图中不构建梯度计算
    with torch.no_grad():
        for batch_x, batch_y in dataloader:
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)
            
            logits = model(batch_x)
            error = criterion(logits, batch_y)
            loss += error.item()
            
            probs, pred_y = logits.data.max(dim=1)
            accuracy += (pred_y == batch_y.data).float().sum()/batch_y.size(0)
            
        loss /= len(dataloader)
        accuracy = accuracy*100.0/len(dataloader)
        return loss, accuracy
    
def train_epoch(model, criterion, optimizer, dataloader):
    model.train()
    for batch_x, batch_y in dataloader:
        batch_x, batch_y = batch_x.to(device), batch_y.to(device)
        
        optimizer.zero_grad()
        logits = model(batch_x)
        error = criterion(logits, batch_y)
        error.backward()
        optimizer.step()

In [54]:
class ResidualBlock(nn.Module):
    def __init__(self,in_channels,out_channels,stride=1):
        super(ResidualBlock,self).__init__()
        self.stride = stride
        self.F = nn.Sequential(nn.Conv2d(in_channels,out_channels,kernel_size=3,stride=stride,padding=1),
                                    nn.BatchNorm2d(out_channels),nn.ReLU(),
                                    nn.Conv2d(out_channels,out_channels,kernel_size=3,padding=1),
                                    nn.BatchNorm2d(out_channels))
        
        if self.stride != 1:
            self.identity= nn.Sequential(nn.Conv2d(in_channels,out_channels,kernel_size=1,stride=stride),
                                            nn.BatchNorm2d(out_channels))
            
            
    def forward(self,x):
        if self.stride == 1:
            x = self.F(x)+x
        else:
            x = self.F(x)+self.identity(x)
        return x 

In [55]:
rbk1 = ResidualBlock(32,32)
rbk2 = ResidualBlock(32,64,2)

x = torch.rand(5,32,32,32)
y1 = rbk1(x)
y2 = rbk2(x)

print(y1.size())
print(y2.size())


torch.Size([5, 32, 32, 32])
torch.Size([5, 64, 16, 16])


In [56]:
class ResNet(nn.Module):
    def __init__(self):
        super(ResNet,self).__init__()
        self.feature = nn.Sequential(nn.Conv2d(3,16,kernel_size=3,padding=1),nn.BatchNorm2d(16),nn.ReLU(),
                                    ResidualBlock(16,16),ResidualBlock(16,16),ResidualBlock(16,16),
                                    ResidualBlock(16,32,2),ResidualBlock(32,32),ResidualBlock(32,32),
                                    ResidualBlock(32,64,2),ResidualBlock(64,64),ResidualBlock(64,64),
                                    nn.AvgPool2d(8)
        )
        
    def forward(self,x):
        x = self.feature(x)
        x = x.view(x.size(0),-1)
        return x

In [57]:
nepochs = 50

net = ResNet().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(),
                            lr=0.2,momentum=0.9,nesterov=True)
scheduler = lr_scheduler.MultiStepLR(optimizer,milestones=[40],gamma=0.1)
learning_history = []

print('Start Training')
for epoch in range(nepochs):
    since = time.time()
    scheduler.step()
    
    current_lr = scheduler.get_lr()[0]
    train_epoch(net,criterion,optimizer,train_dl)
    tr_loss,tr_acc = eval(net,criterion,train_dl)
    te_loss,te_acc = eval(net,criterion,test_dl)
    now = time.time()
    
    learning_history.append([tr_loss,tr_acc,te_loss,te_acc])
    print('[%3d/%d, %.0f seconds] |\t tr_err: %.1e, tr_acc: %.2f |\t te_err: %.1e, te_acc: %.2f' % (
            epoch+1, nepochs, now-since, tr_loss, tr_acc, te_loss, te_acc))

Start Training
[  1/50, 114 seconds] |	 tr_err: 2.0e+00, tr_acc: 23.96 |	 te_err: 2.0e+00, te_acc: 25.29
[  2/50, 114 seconds] |	 tr_err: 1.8e+00, tr_acc: 31.98 |	 te_err: 1.8e+00, te_acc: 33.21
[  3/50, 114 seconds] |	 tr_err: 1.7e+00, tr_acc: 34.10 |	 te_err: 1.8e+00, te_acc: 33.64
[  4/50, 114 seconds] |	 tr_err: 1.8e+00, tr_acc: 31.40 |	 te_err: 1.9e+00, te_acc: 31.92
[  5/50, 114 seconds] |	 tr_err: 1.6e+00, tr_acc: 41.42 |	 te_err: 1.6e+00, te_acc: 40.67
[  6/50, 114 seconds] |	 tr_err: 1.7e+00, tr_acc: 37.78 |	 te_err: 1.8e+00, te_acc: 37.64
[  7/50, 114 seconds] |	 tr_err: 1.6e+00, tr_acc: 41.34 |	 te_err: 1.6e+00, te_acc: 41.16
[  8/50, 114 seconds] |	 tr_err: 1.6e+00, tr_acc: 43.48 |	 te_err: 1.6e+00, te_acc: 41.72
[  9/50, 114 seconds] |	 tr_err: 1.5e+00, tr_acc: 42.16 |	 te_err: 1.6e+00, te_acc: 40.89
[ 10/50, 114 seconds] |	 tr_err: 1.4e+00, tr_acc: 49.88 |	 te_err: 1.4e+00, te_acc: 48.34
[ 11/50, 114 seconds] |	 tr_err: 1.7e+00, tr_acc: 42.02 |	 te_err: 1.9e+00, te_acc: 3