### 1 在MNIST 数据集上测试

In [1]:
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torch.nn.functional as F

from torch.utils.data import DataLoader

import time 

In [10]:
# 定义网络结构

cfg = {
    'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'B': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M',
          512, 512, 'M'],
    'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512,
          512, 512, 'M', 512, 512, 512, 'M'],
    'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512,
          512, 'M', 512, 512, 512, 512, 'M'],
}

class VGG(nn.Module):
    
    def __init__(self, features, num_classes=10):
        super(VGG, self).__init__()
        self.features = features
        self.classifier = nn.Linear(512, num_classes)
        self._initialize_weights()
        
    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x
        
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight.data)
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                m.weight.data.normal_(0, 0.01)
                m.bias.data.zero_()
    
def make_layers(cfg, batch_norm=False):
    layers = []
    in_channels = 1   # MNIST为灰度图像    通道数为1   若为彩色图像 通道数为 3
    for v in cfg:
        if v == 'M':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
            if batch_norm:
                layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
            else:
                layers += [conv2d, nn.ReLU(inplace=True)]
            in_channels = v
    return nn.Sequential(*layers)

def vgg11(**kwargs):
    return VGG(make_layers(cfg['A'], batch_norm=True), **kwargs)


def vgg13(**kwargs):
    return VGG(make_layers(cfg['B'], batch_norm=True), **kwargs)


def vgg16(**kwargs):
    return VGG(make_layers(cfg['D'], batch_norm=True), **kwargs)


def vgg19(**kwargs):
    return VGG(make_layers(cfg['E'], batch_norm=True), **kwargs)

In [11]:
# 定义超参数
batch_size = 100
lr = 1e-3
momentum = 0.9

# 数据增强
transform = transforms.Compose(
[
    transforms.RandomHorizontalFlip(),   # 对图片进行概率为0.5 随机翻转
    transforms.RandomGrayscale(),         # 随机调整图片的亮度
    transforms.ToTensor()
])

# 加载数据
trainset = torchvision.datasets.MNIST(root='data/', train=True, download=True, transform=transform)
trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True)
testset = torchvision.datasets.MNIST(root='data/', train=False, download=True, transform=transform)
testloader = DataLoader(testset, batch_size=batch_size, shuffle=False)

In [12]:
# load model

vgg_model = vgg11()   # num_classes 默认为 10  

In [13]:
criterian = nn.CrossEntropyLoss()
optimizer = optim.SGD(vgg_model.parameters(), lr=lr, momentum=momentum)

# device: GPU  or CPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

vgg_model.to(device)

print("Start Training!")

num_epoches = 20

for epoch in range(num_epoches):
    running_loss = 0.
    running_acc = 0.
    batch_size = 100
    start = time.time()
    
    for (img, label) in trainloader:
        img = img.to(device)
        label = label.to(device)
        
        outputs = vgg_model(img)
        loss = criterian(outputs, label)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        _, pred = outputs.max(1)
        num_correct = (pred == label).sum().item()
        running_acc += num_correct
    
    running_loss /= len(trainset)
    running_acc /= len(trainset)
    print("[%d/%d] Loss: %.5f, Acc: %.2f, Time: %.1f s" %(epoch+1, num_epoches, running_loss, 100*running_acc, time.time()-start))

print("Finished Training!")

Start Training!
[1/20] Loss: 0.00469, Acc: 87.98, Time: 44.5 s
[2/20] Loss: 0.00111, Acc: 96.84, Time: 33.1 s
[3/20] Loss: 0.00071, Acc: 98.02, Time: 33.3 s
[4/20] Loss: 0.00050, Acc: 98.64, Time: 33.6 s
[5/20] Loss: 0.00038, Acc: 98.95, Time: 33.3 s
[6/20] Loss: 0.00029, Acc: 99.22, Time: 33.3 s
[7/20] Loss: 0.00022, Acc: 99.45, Time: 33.2 s
[8/20] Loss: 0.00018, Acc: 99.59, Time: 33.2 s
[9/20] Loss: 0.00014, Acc: 99.68, Time: 33.5 s
[10/20] Loss: 0.00011, Acc: 99.77, Time: 33.4 s
[11/20] Loss: 0.00009, Acc: 99.84, Time: 33.4 s
[12/20] Loss: 0.00007, Acc: 99.89, Time: 33.5 s
[13/20] Loss: 0.00007, Acc: 99.89, Time: 33.4 s
[14/20] Loss: 0.00005, Acc: 99.94, Time: 33.7 s
[15/20] Loss: 0.00004, Acc: 99.95, Time: 33.5 s
[16/20] Loss: 0.00004, Acc: 99.96, Time: 33.6 s
[17/20] Loss: 0.00004, Acc: 99.96, Time: 33.6 s
[18/20] Loss: 0.00003, Acc: 99.98, Time: 33.6 s
[19/20] Loss: 0.00002, Acc: 99.98, Time: 34.8 s
[20/20] Loss: 0.00002, Acc: 99.99, Time: 34.2 s
Finished Training!


In [14]:
# 保存模型
torch.save(vgg_model, 'model/vgg11_MNIST.pkl')
net = torch.load('model/vgg11_MNIST.pkl')

  "type " + obj.__name__ + ". It won't be checked "


In [15]:
# 评价
testloss = 0.
testacc = 0.
start = time.time()
for (img, label) in testloader:
    img = img.to(device)
    label = label.to(device)
    
    outputs = net(img)
    loss = criterian(outputs, label)
    
    testloss += loss.item()
    _, pred = outputs.max(1)
    num_correct = (pred == label).sum().item()
    testacc += num_correct
    
testloss /= len(testset)
testacc /= len(testset)
print("Test: Loss: %.5f, Acc: %.2f, Time: %.1f s" %(testloss, 100*testacc, time.time()-start))

Test: Loss: 0.00048, Acc: 98.62, Time: 2.6 s


#### 相对于AlexNet  test_loss=97.09%   vgg11的test_loss=98.62%有了明显的提升  说明VGG还是更优秀些

### 2  在CIFAR 数据集上测试

## CIFAR 10
cifar 10 这个数据集一共有 50000 张训练集，10000 张测试集，两个数据集里面的图片都是 png 彩色图片，图片大小是 32 x 32 x 3，一共是 10 分类问题，分别为飞机、汽车、鸟、猫、鹿、狗、青蛙、马、船和卡车。这个数据集是对网络性能测试一个非常重要的指标，可以说如果一个网络在这个数据集上超过另外一个网络，那么这个网络性能上一定要比另外一个网络好，目前这个数据集最好的结果是 **95%** 左右的测试集准确率。

![](https://ws1.sinaimg.cn/large/006tNc79ly1fmpjxxq7wcj30db0ae7ag.jpg)

你能用肉眼对这些图片进行分类吗？

cifar 10 已经被 pytorch 内置了，使用非常方便，只需要调用 `torchvision.datasets.CIFAR10` 就可以了

In [17]:
print(vgg_model)

VGG(
  (features): Sequential(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU(inplace)
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU(inplace)
    (11): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (13): ReLU(inplace)
    (14): MaxPool2d(kernel_size=2, stride=

In [46]:
# 定义网络结构

cfg = {
    'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'B': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M',
          512, 512, 'M'],
    'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512,
          512, 512, 'M', 512, 512, 512, 'M'],
    'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512,
          512, 'M', 512, 512, 512, 512, 'M'],
}

class VGG(nn.Module):
    
    def __init__(self, features, num_classes=10):
        super(VGG, self).__init__()
        self.features = features
        self.classifier = nn.Linear(512, num_classes)
        self._initialize_weights()
        
    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x
        
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight.data)
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                m.weight.data.normal_(0, 0.01)
                m.bias.data.zero_()
    
def make_layers(cfg, batch_norm=False):
    layers = []
    in_channels = 3   # CIFAR为彩色图像 通道数为 3
    for v in cfg:
        if v == 'M':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
            if batch_norm:
                layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
            else:
                layers += [conv2d, nn.ReLU(inplace=True)]
            in_channels = v
    return nn.Sequential(*layers)

def vgg11(**kwargs):
    return VGG(make_layers(cfg['A'], batch_norm=True), **kwargs)


def vgg13(**kwargs):
    return VGG(make_layers(cfg['B'], batch_norm=True), **kwargs)


def vgg16(**kwargs):
    return VGG(make_layers(cfg['D'], batch_norm=True), **kwargs)


def vgg19(**kwargs):
    return VGG(make_layers(cfg['E'], batch_norm=True), **kwargs)

In [50]:
# 数据增强
# transform = transforms.Compose(
# [
#     transforms.RandomHorizontalFlip(),   # 对图片进行概率为0.5 随机翻转
#     transforms.RandomGrayscale(),         # 随机调整图片的亮度
#     transforms.ToTensor()
# ])
import numpy as np

def data_tf(x):
    x = np.array(x, dtype='float32') / 255
    x = (x - 0.5) / 0.5    # 标准化，这个技巧常用到
    x = x.transpose((2, 0, 1)) # transpose 为转置函数    将 channel 放到第一维，只是 pytorch 要求的输入方式
    x = torch.from_numpy(x)
    return x


# 加载数据
trainset = torchvision.datasets.CIFAR10(root='data/', train=True, download=True, transform=data_tf)
trainloader = DataLoader(trainset, batch_size=64, shuffle=True)
testset = torchvision.datasets.CIFAR10(root='data/', train=False, download=True, transform=data_tf)
testloader = DataLoader(testset, batch_size=64, shuffle=False)

Files already downloaded and verified
Files already downloaded and verified


In [54]:
# vgg_model = vgg11() 
vgg_model = vgg19()

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(vgg_model.parameters(), lr=0.1)

In [52]:
# 定义训练函数  免得每次都得写一大坨
def train(net, train_data, valid_data, num_epochs, optimizer, criterion):
    
    # device: GPU  or CPU
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    vgg_model.to(device)
    print("Start...")

    for epoch in range(num_epochs):
        
        train_loss = 0.
        train_acc = 0.
        start = time.time()       
        for (img, label) in train_data:
            img = img.to(device)
            label = label.to(device)

            outputs = vgg_model(img)
            loss = criterion(outputs, label)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, pred = outputs.max(1)
            num_correct = (pred == label).sum().item()
            train_acc += num_correct

        train_loss /= len(train_data)
        train_acc /= len(train_data)
        
        
        eval_loss = 0.
        eval_acc = 0.
        for (img, label) in valid_data:
            img = img.to(device)
            label = label.to(device)
            
            outputs = vgg_model(img)
            loss = criterion(outputs, label)
            eval_loss += loss.item()
            _, pred = outputs.max(1)
            num_correct = (pred == label).sum().item()
            eval_acc += num_correct
            
        eval_loss /= len(valid_data)
        eval_acc /= len(valid_data)
        print('epoch: {}, Train Loss: {:.6f}, Train Acc: {:.6f}, Eval Loss: {:.6f}, Eval Acc: {:.6f}, Time: {:.1f} s'
          .format(epoch+1, train_loss, train_acc, eval_loss, eval_acc, time.time()-start))    
       
    print("Finished!")

In [55]:
# 训练网络
train(vgg_model, trainloader, testloader, 20, optimizer, criterion)   # num_epoches = 20

Start...
epoch: 0, Train Loss: 1.825938, Train Acc: 22.932225, Eval Loss: 2.024197, Eval Acc: 31.394904, Time: 66.7 s
epoch: 1, Train Loss: 0.981903, Train Acc: 41.923274, Eval Loss: 1.335038, Eval Acc: 36.159236, Time: 67.3 s
epoch: 2, Train Loss: 0.691300, Train Acc: 48.723785, Eval Loss: 0.793959, Eval Acc: 46.363057, Time: 68.8 s
epoch: 3, Train Loss: 0.521801, Train Acc: 52.501279, Eval Loss: 0.934406, Eval Acc: 44.993631, Time: 71.3 s
epoch: 4, Train Loss: 0.401812, Train Acc: 55.028133, Eval Loss: 0.936934, Eval Acc: 46.210191, Time: 71.2 s
epoch: 5, Train Loss: 0.312005, Train Acc: 57.101023, Eval Loss: 2.135915, Eval Acc: 42.923567, Time: 70.7 s
epoch: 6, Train Loss: 0.251198, Train Acc: 58.432225, Eval Loss: 0.723369, Eval Acc: 50.433121, Time: 69.5 s
epoch: 7, Train Loss: 0.184134, Train Acc: 59.881074, Eval Loss: 0.807630, Eval Acc: 48.834395, Time: 68.2 s
epoch: 8, Train Loss: 0.145900, Train Acc: 60.760870, Eval Loss: 0.837917, Eval Acc: 50.044586, Time: 68.3 s
epoch: 9, 

VGG11 训练20轮的效果大概为 Train Loss: 0.020202, Train Acc: 63.524297, Eval Loss: 1.317068, Eval Acc: 49.878981, Time: 35.7 s
<br>
VGG19 训练20轮的效果大概为 Train Loss: 0.025375, Train Acc: 63.390026, Eval Loss: 0.875148, Eval Acc: 52.509554, Time: 68.4 s

<br>
效果都不咋好  需要重构网络 或者调参试试。。。。

In [57]:
# 保存模型
torch.save(vgg_model, 'model/VGG19_CAFIR.pkl')
net = torch.load('model/VGG19_CAFIR.pkl')

  "type " + obj.__name__ + ". It won't be checked "
