In [718]:
import pandas as pd
import numpy as np
import torch
from torch import nn
from torchvision import transforms, datasets
from torchvision.transforms import Lambda
import torchvision
from torch.utils.data import DataLoader

In [719]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

## 加载数据

In [720]:
train_transform = transforms.Compose([transforms.Resize(40),
transforms.RandomResizedCrop(32, scale=(0.64,1.0), ratio=(1.0, 1.0)),
transforms.RandomHorizontalFlip(),
transforms.Normalize([0.4914, 0.4822, 0.4465],[0.2023, 0.1994, 0.2010])
])
# 测试的时候不做图像增广
valid_transform = transforms.Compose([
    transforms.Normalize([0.4914, 0.4822, 0.4465],[0.2023, 0.1994, 0.2010])
])
test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.4914, 0.4822, 0.4465],[0.2023, 0.1994, 0.2010])
])
target_transform = Lambda(lambda y : torch.zeros(10, dtype=torch.float).scatter_(dim=0, index=torch.tensor(label_map[y]), value=1))

In [721]:
train_data = torchvision.datasets.CIFAR10(root='./data', train=True,download=True, transform=transforms.ToTensor())
test_data = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=test_transform)

In [722]:
num_all = len(train_data)
ratio = 0.8 
train_num = int(num_all * ratio)
valid_num = num_all - train_num
print(train_num)
print(valid_num)

In [723]:
train_split_data, valid_split_data = torch.utils.data.random_split(train_data, [train_num, valid_num])

In [724]:
len(train_split_data)

In [725]:
len(valid_split_data)

In [726]:
batch_size, lr, weight_decay, num_epochs = 32, 1e-3, 1e-3, 40

In [727]:
final_train_iter = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=2)
train_iter = DataLoader(train_split_data, batch_size=batch_size, shuffle=True, num_workers=2)
valid_iter = DataLoader(valid_split_data, batch_size=batch_size, shuffle=False, num_workers=2)
test_iter = DataLoader(test_data, batch_size=batch_size, shuffle=False, num_workers=2)

## 定义模型

In [728]:
from torch.nn import functional as F

In [729]:
def weight_init(layer):
    if type(layer) == 'nn.Linear':
        nn.init.xavier_normal(layer)

In [730]:
class LeNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv2d(3, 6, kernel_size=5, padding=0), nn.ReLU(),
            nn.MaxPool2d(2, stride=2),
            nn.Conv2d(6, 16, kernel_size=5), nn.ReLU(),
            nn.MaxPool2d(2, stride=2),
            nn.Flatten(),
            nn.Linear(16 * 5 * 5, 120), nn.ReLU(),
            nn.Linear(120, 84), nn.ReLU(),
            nn.Linear(84, 10)
        )
        self.net.apply(weight_init)
    def forward(self, X):
        return self.net(X)

In [731]:
class AlexNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv2d(3, 96, kernel_size=5, padding=0), nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(96, 256, kernel_size=5, padding=2), nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(256, 384, kernel_size=3, padding=1), nn.ReLU(),
            nn.Conv2d(384, 256, kernel_size=3, padding=1), nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Flatten(),
            nn.Linear(3*3*256, 3*256), nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(3*256, 256), nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(256 ,10)
        )
        self.net.apply(weight_init)
    def forward(self, X):
        return self.net(X)

In [732]:
class VggBlock(nn.Module):
    def __init__(self, num_conv, in_channels, out_channels):
        super().__init__()
        layers = []
        for _ in range(num_conv):
            layers.append(nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1))
            layers.append(nn.ReLU())
            in_channels = out_channels
        layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
        self.net = nn.Sequential(*layers)
        self.net.apply(weight_init)
    def forward(self, X):
        return self.net(X)

In [733]:
class VGGNet(nn.Module):
    def __init__(self, conv_arch):
        super().__init__()
        # conv_arch specifies series of (num_conv, out_channels) tuple
        vgg_blks = []
        in_channels = 3
        for (num_conv, out_channels) in conv_arch:
            vgg_blks.append(VggBlock(num_conv, in_channels, out_channels))
            in_channels = out_channels
        out_len = 32 // (2 ** len(vgg_blks))
        self.net = nn.Sequential(
            *vgg_blks,
            nn.Flatten(),
            nn.Linear(out_channels * out_len * out_len, 2048), nn.ReLU(),
            nn.Dropout(p=0.5), nn.Linear(2048, 2048), nn.ReLU(),
            nn.Dropout(p=0.5), nn.Linear(2048, 10)
        )
    def forward(self, X):
        return self.net(X)

In [734]:
class NiNBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride, padding):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding), nn.ReLU(),
            nn.Conv2d(out_channels, out_channels, kernel_size=1), nn.ReLU(),
            nn.Conv2d(out_channels, out_channels, kernel_size=1), nn.ReLU()
        )
    def forward(self, X):
        return self.net(X)

In [735]:
class NiN(nn.Module):
    def __init__(self, net_arch):
        super().__init__()
        layers = []
        for (in_channels, out_channels, kernel_size, stride, padding) in net_arch:
            layers.append(NiNBlock(in_channels, out_channels, kernel_size, stride, padding))
            layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
        layers.append(nn.Dropout(p=0.5)) 
        layers.append(NiNBlock(out_channels, 10, kernel_size=kernel_size, stride=stride, padding=padding))
        layers.append(nn.AdaptiveAvgPool2d((1, 1)))
        layers.append(nn.Flatten()) 
        self.net = nn.Sequential(*layers)
        self.net.apply(weight_init)
    def forward(self, X):
        return self.net(X)

In [736]:
class Bottleneck(nn.Module):

    expansion = 4
    
    def __init__(self, in_channels, zip_channels, stride=1):
        super(Bottleneck, self).__init__()
        out_channels = self.expansion * zip_channels
        self.features = nn.Sequential(
            nn.Conv2d(in_channels, zip_channels, kernel_size=1, bias=False),
            nn.BatchNorm2d(zip_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(zip_channels, zip_channels, kernel_size=3, stride=stride, padding=1, bias=False),
            nn.BatchNorm2d(zip_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(zip_channels, out_channels, kernel_size=1, bias=False),
            nn.BatchNorm2d(out_channels)
        )
        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )
            
    def forward(self, x):
        out = self.features(x)
        out += self.shortcut(x)
        out = torch.relu(out)
        return out

In [737]:
class ResNet(nn.Module):

    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_channels = 64
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True)
        )
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)

        self.avg_pool = nn.AvgPool2d(kernel_size=4)
        self.classifer = nn.Linear(512 * block.expansion, num_classes)
        
    def _make_layer(self, block, out_channels, num_blocks, stride):

        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_channels, out_channels, stride))

            self.in_channels = out_channels * block.expansion
        return nn.Sequential(*layers)
    
    def forward(self, x):
        out = self.features(x)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.avg_pool(out)
        out = out.view(out.size(0), -1)
        out = self.classifer(out)
        return out

In [738]:
def ResNet18():
    return ResNet(BasicBlock, [2,2,2,2])

In [739]:
resnet = ResNet18().to(device)
lenet = LeNet().to(device)
alex = AlexNet().to(device)
conv_arch = ((1, 64), (1, 128), (2, 256), (2, 512))
vggNet = VGGNet(conv_arch).to(device)
net_arch = ((3, 256, 3, 1, 1), (256, 512, 3, 1, 1), (512, 1024, 3, 1, 1), (1024, 2048, 3, 1, 1))
ninNet = NiN(net_arch)

In [740]:
net = resnet

## 训练

In [741]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model = model.to(device)
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)
        X = train_transform(X)
        model.train()
        # Compute prediction and loss
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [742]:
def validate_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    validate_loss, correct = 0, 0
    model = model.to(device)
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            X = valid_transform(X)
            pred = model(X)
            validate_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    validate_loss /= num_batches
    correct /= size
    print(f"Validate Error: \n Accuracy : {(100*correct):>0.1f}%, Avg loss: {validate_loss:>8f} \n")

In [743]:
def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0
    model = model.to(device)
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy : {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [744]:
loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=lr, weight_decay=weight_decay)

In [745]:
# for epoch in range(num_epochs):
    # print('------------------epoch',epoch+1 , '------------------')
    # train_loop(train_iter, net, loss_func, optimizer)
    # validate_loop(valid_iter, net, loss_func)

## 所有数据训练

In [746]:
for epoch in range(num_epochs):
    print('------------------epoch',epoch+1 , '------------------')
    train_loop(final_train_iter, net, loss_func, optimizer)

## 看看测试集上的表现

In [747]:
test_loop(test_iter, net, loss_func)