In [22]:
%matplotlib inline
import time
import torch
from torch import nn, optim
import torch.nn.functional as F
import torchvision
from PIL import Image

import sys
sys.path.append('..')
import d2lzh_pytorch as d2l

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
data_dir = './Datasets/CIFAR10'
all_images = torchvision.datasets.CIFAR10(train=True, root=data_dir, download=False)

In [8]:
class Residual(nn.Module):
    """
    构造残差块
    """
    def __init__(self, in_channels, out_channels, use_1x1conv=False, stride=1):
    
        super(Residual, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, stride=stride)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1)
        if use_1x1conv:
            self.conv3 = nn.Conv2d(in_channels, out_channels, kernel_size=1,stride=stride)
        else:
            self.conv3 = None
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.bn2 = nn.BatchNorm2d(out_channels)
    def forward(self, X):
        Y = F.relu(self.bn1(self.conv1(X)))
        Y = self.bn2(self.conv2(Y))
        if self.conv3:
            X = self.conv3(X)
        return F.relu(Y + X)   

def resnet_block(in_channels, out_channels, num_residuals, first_block=False):
    if first_block:
        assert in_channels == out_channels
    blk = []
    for i in range(num_residuals):
        if i == 0 and not first_block:
            # 除第一个模块外的其他模块，在第一个残差块里将上一个模块的通道数翻倍，并将高和宽减半
            blk.append(Residual(in_channels, out_channels, use_1x1conv=True, stride=2))
        else:
            blk.append(Residual(out_channels, out_channels))
    return nn.Sequential(*blk)    

In [17]:
def resnet18(num_classes):
    # 第一个卷积层
    net = nn.Sequential(
        nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3),
        nn.BatchNorm2d(64),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
    )
    # 依次加入所有残差块，一共四个模块，每个模块使用两个残差块
    net.add_module('resnet_block1', resnet_block(64, 64, 2, first_block=True))
    net.add_module('resnet_block2', resnet_block(64, 128, 2))
    net.add_module('resnet_block3', resnet_block(128, 256, 2))
    net.add_module('resnet_block4', resnet_block(256, 512, 2))
    # 加入全局平均池化层
    net.add_module('global_avg_pool', d2l.GlobalAvgPool2d()) # output shape (batch_size, 512, 1, 1)
    # 加入全连接层
    net.add_module('fc', nn.Sequential(d2l.FlattenLayer(),nn.Linear(512, num_classes)))
    
    return net

In [13]:
num_workers = 4
flip_aug = torchvision.transforms.Compose([
    torchvision.transforms.R andomHorizontalFlip(),
    torchvision.transforms.ToTensor()
])
no_aug = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor()
])
def load_cifar10(is_train, augs, batch_size, root=data_dir):
    dataset = torchvision.datasets.CIFAR10(root=root, train=is_train, transform=augs, download=False)
    return torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=is_train, num_workers=num_workers)

In [24]:
def train(train_iter, test_iter, net, loss, optimizer, device, num_epochs):
    net = net.to(device)
    print('training on ', device)
    batch_count = 0
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, start_time = 0.0, 0.0, 0, time.time()
        for X, y in train_iter:
            X = X.to(device)
            y = y.to(device)
            y_hat = net(X)
            l = loss(y_hat, y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            train_l_sum += l.cpu().item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item()
            n += y.shape[0]
            batch_count += 1
        test_acc = d2l.evaluate_accuracy(test_iter, net)
        print('Epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec'%
             (epoch + 1, train_l_sum / batch_count, train_acc_sum / n, test_acc, time.time() - start_time))

In [15]:
def train_with_data_aug(train_augs, test_augs, lr=0.001):
    batch_size = 256
    net = resnet18(10)
    optimizer = optim.Adam(net.parameters(), lr=lr)
    loss = nn.CrossEntropyLoss()
    train_iter = load_cifar10(True, train_augs, batch_size)
    test_iter = load_cifar10(False, test_augs, batch_size)
    train(train_iter, test_iter, net, loss, optimizer, device, num_epochs=10)

In [25]:
train_with_data_aug(flip_aug, no_aug)

training on  cuda
Epoch 1, loss 1.3555, train acc 0.507, test acc 0.431, time 33.4 sec
Epoch 2, loss 0.5052, train acc 0.641, test acc 0.583, time 32.9 sec
Epoch 3, loss 0.2852, train acc 0.697, test acc 0.611, time 34.8 sec
Epoch 4, loss 0.1899, train acc 0.735, test acc 0.653, time 36.7 sec
Epoch 5, loss 0.1365, train acc 0.759, test acc 0.619, time 36.9 sec
Epoch 6, loss 0.1035, train acc 0.783, test acc 0.727, time 33.8 sec
Epoch 7, loss 0.0805, train acc 0.804, test acc 0.730, time 34.1 sec
Epoch 8, loss 0.0647, train acc 0.819, test acc 0.704, time 33.8 sec
Epoch 9, loss 0.0533, train acc 0.833, test acc 0.745, time 34.0 sec
Epoch 10, loss 0.0433, train acc 0.850, test acc 0.744, time 33.9 sec
