In [1]:
import mxnet as mx
from mxnet import ndarray as nd
from mxnet import autograd as ag
from mxnet import init
from mxnet import gluon
from mxnet.gluon.data import vision
from mxnet.gluon.data.vision import transforms
from mxnet.gluon import nn
from mxnet.gluon.data import DataLoader
import datetime
%matplotlib inline
import matplotlib as mpl
mpl.rcParams['figure.dpi'] = 120
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

  import OpenSSL.SSL


In [2]:
train_augs = transforms.Compose([
    transforms.Resize(224), 
    transforms.RandomResizedCrop(224), 
    transforms.RandomFlipLeftRight(), 
    transforms.ToTensor(), 
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010])
])

valid_augs = transforms.Compose([
    transforms.Resize(224), 
    transforms.ToTensor(), 
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010])
])

In [3]:
data_dir = 'data/cifar10/'
batch_size = 128

train_ds = vision.ImageFolderDataset(root=data_dir+'train', flag=1)
valid_ds = vision.ImageFolderDataset(root=data_dir+'valid', flag=1)

train_data = DataLoader(train_ds.transform_first(train_augs), 
                        batch_size=batch_size, shuffle=True, last_batch='keep')
valid_data = DataLoader(valid_ds.transform_first(valid_augs), 
                        batch_size=batch_size, shuffle=False, last_batch='keep')

In [4]:
for data, label in train_data:
    print(data.shape)
    print(label.shape)
    print(label)
    break

(128, 3, 224, 224)
(128,)

[3 0 0 1 0 1 3 7 4 3 5 7 5 1 7 7 7 1 7 7 4 9 3 4 4 6 8 7 8 3 4 1 1 2 7 6 8
 4 1 5 5 6 4 6 1 6 3 9 3 8 7 6 4 3 9 6 9 2 1 4 1 1 7 0 5 2 8 5 8 2 8 2 7 5
 6 6 3 7 7 7 8 3 9 4 6 3 7 1 3 6 5 2 5 0 9 9 5 9 3 3 9 3 3 3 7 5 2 3 0 5 8
 2 7 2 7 5 8 6 4 1 8 6 7 1 3 2 6 0]
<NDArray 128 @cpu(0)>


## 设计模型 --GoogLeNet

In [5]:
class inception_block(nn.HybridBlock):
    def __init__(self, channels, **kwargs):
        super(inception_block, self).__init__(**kwargs)
        with self.name_scope():
            branch_1 = nn.HybridSequential()
            branch_1.add(nn.Conv2D(channels=channels[0], kernel_size=1, 
                                   strides=1, padding=0))
            branch_1.add(nn.BatchNorm())
            branch_1.add(nn.Activation(activation='relu'))
            
            branch_2 = nn.HybridSequential()
            branch_2.add(nn.Conv2D(channels=channels[1], kernel_size=1, 
                                   strides=1, padding=0))
            branch_2.add(nn.BatchNorm())
            branch_2.add(nn.Activation(activation='relu'))
            branch_2.add(nn.Conv2D(channels=channels[2], kernel_size=3, 
                                   strides=1, padding=1))
            branch_2.add(nn.BatchNorm())
            branch_2.add(nn.Activation(activation='relu'))
            
            branch_3 = nn.HybridSequential()
            branch_3.add(nn.Conv2D(channels=channels[3], kernel_size=1, 
                                   strides=1, padding=0))
            branch_3.add(nn.BatchNorm())
            branch_3.add(nn.Activation(activation='relu'))
            branch_3.add(nn.Conv2D(channels=channels[4], kernel_size=5, 
                                   strides=1, padding=2))
            branch_3.add(nn.BatchNorm())
            branch_3.add(nn.Activation(activation='relu'))
            
            branch_4 = nn.HybridSequential()
            branch_4.add(nn.MaxPool2D(pool_size=3, strides=1, padding=1))
            branch_4.add(nn.Conv2D(channels=channels[5], kernel_size=1, 
                                   strides=1, padding=0))
        self.branch_1 = branch_1
        self.branch_2 = branch_2
        self.branch_3 = branch_3
        self.branch_4 = branch_4
    
    def hybrid_forward(self, F, x):
        out1 = out2 = out3 = out4 = x
        for layer in self.branch_1:
            out1 = layer(out1)
        for layer in self.branch_2:
            out2 = layer(out2)
        for layer in self.branch_3:
            out3 = layer(out3)
        for layer in self.branch_4:
            out4 = layer(out4)
        out = F.concat(out1, out2, out3, out4, dim=1)
        return out

In [6]:
class GoogLeNet(nn.HybridBlock):
    def __init__(self, num_classes, verbose=False, **kwargs):
        super(GoogLeNet, self).__init__(**kwargs)
        self.verbose = verbose
        with self.name_scope():
            net = nn.HybridSequential()
            # Stage 1
            net.add(nn.Conv2D(channels=64, kernel_size=7, 
                              strides=2, padding=3))
            net.add(nn.BatchNorm())
            net.add(nn.Activation(activation='relu'))
            net.add(nn.MaxPool2D(pool_size=3, strides=2, padding=1))
            # Stage 2
            net.add(nn.Conv2D(channels=64, kernel_size=1, 
                              strides=1, padding=0))
            net.add(nn.BatchNorm())
            net.add(nn.Activation(activation='relu'))
            net.add(nn.Conv2D(channels=192, kernel_size=3, 
                              strides=1, padding=1))
            net.add(nn.BatchNorm())
            net.add(nn.Activation(activation='relu'))
            net.add(nn.MaxPool2D(pool_size=3, strides=2, padding=1))
            # Stage 3
            net.add(inception_block(channels=[64, 96, 128, 16, 32, 32]))
            net.add(inception_block(channels=[128, 128, 192, 32, 96, 64]))
            net.add(nn.MaxPool2D(pool_size=3, strides=2, padding=1))
            # Stage 4
            net.add(inception_block(channels=[192, 96, 208, 16, 48, 64]))
            net.add(inception_block(channels=[160, 112, 224, 24, 64, 64]))
            net.add(inception_block(channels=[128, 128, 256, 24, 64, 64]))
            net.add(inception_block(channels=[112, 144, 288, 32, 64, 64]))
            net.add(inception_block(channels=[256, 160, 320, 32, 128, 128]))
            net.add(nn.MaxPool2D(pool_size=3, strides=2, padding=1))
            # Stage 5
            net.add(inception_block(channels=[256, 160, 320, 32, 128, 128]))
            net.add(inception_block(channels=[384, 192, 384, 48, 128, 128]))
            net.add(nn.AvgPool2D(pool_size=7, strides=1, padding=0))
            # classification
            net.add(nn.Dropout(0.4))
            net.add(nn.Flatten())
            net.add(nn.Dense(num_classes))
        self.net = net
        
    def hybrid_forward(self, F, x):
        for i, layer in enumerate(self.net):
            out = layer(x)
            x = out
            if self.verbose:
                print('Block %d, Output: %s' % (i+1, out.shape))
        return out

In [7]:
def get_net(ctx, num_classes=10):
    net = GoogLeNet(num_classes=num_classes)
    net.initialize(ctx=ctx, init=init.Xavier())
    return net

In [8]:
# net = GoogLeNet(num_classes=10, verbose=True)
# net.initialize()
# net

In [9]:
# for data, label in train_data:
#     output = net(data)
#     break

In [10]:
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

In [11]:
def train(train_data, valid_data, net, ctx, num_epochs, 
          lr, lr_decay, lr_period, wd, 
          cost_period, print_cost=False):
    train_costs = []
    valid_costs = []
    trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr, 
                                                          'momentum': 0.9, 
                                                          'wd': wd})
    for epoch in range(num_epochs):
        pre_time = datetime.datetime.now()
        train_loss = 0
        train_acc = 0
        valid_loss = 0
        valid_acc = 0
        if (epoch+1) in lr_period:
            trainer.set_learning_rate(trainer.learning_rate*lr_decay)
        for data, label in train_data:
            data = data.as_in_context(ctx)
            label = label.as_in_context(ctx).astype('float32')
            with ag.record():
                output = net(data)
                loss = softmax_cross_entropy(output, label)
            loss.backward()
            trainer.step(batch_size)
            train_loss += nd.mean(loss).asscalar()
            train_acc += nd.mean(output.argmax(axis=1) == label).asscalar()
        
        cur_time = datetime.datetime.now()
        h, remainder = divmod((cur_time-pre_time).seconds, 3600)
        m, s = divmod(remainder, 60)
        time_str = 'Time %02d:%02d:%02d, ' % (h, m, s)
        
        if valid_data is not None:
            for data, label in valid_data:
                data = data.as_in_context(ctx)
                label = label.as_in_context(ctx).astype('float32')
                output = net(data)
                loss = softmax_cross_entropy(output, label)
                valid_loss += nd.mean(loss).asscalar()
                valid_acc += nd.mean(output.argmax(axis=1) == label).asscalar()
            print_str = 'Epoch %d, train_loss: %f, train_acc: %f, valid_acc: %f, ' % (epoch+1, 
                                                                                      train_loss/len(train_data), 
                                                                                      train_acc/len(train_data), 
                                                                                      valid_acc/len(valid_data))
        else:
            print_str = 'Epoch %d, train_loss: %f, train_acc: %f, ' % (epoch+1, 
                                                                   train_loss/len(train_data), 
                                                                   train_acc/len(train_data))
        print(print_str+time_str+'lr: %f' % trainer.learning_rate)
        
        if print_cost and (epoch+1) % cost_period == 0:
            train_costs.append(train_loss/len(train_data))
            valid_costs.append(valid_loss/len(valid_data))
        
    if print_cost:
        x_axis = np.linspace(0, num_epoches, len(train_costs), endpoint=True)
        l1, = plt.semilogy(x_axis, train_costs)
        l2, = plt.semilogy(x_axis, valid_costs)
        plt.xlabel('epoch')
        plt.ylabel('loss')
        plt.legend([l1, l2], ['train', 'valid'], loc='upper right')
        plt.show()

In [12]:
ctx = mx.gpu(0)
net = get_net(ctx=ctx, num_classes=10)
net.hybridize()

num_epochs = 100
lr = 0.01
lr_decay = 0.1
lr_period = [35, 75]
wd = 5e-4
cost_period = 10
print_cost = True

In [13]:
train(train_data, valid_data, net, ctx, num_epochs, lr, lr_decay, lr_period, wd, cost_period, print_cost)

Epoch 1, train_loss: 1.841722, train_acc: 0.349175, valid_acc: 0.462500, Time 000243, lr: 0.010000
Epoch 2, train_loss: 1.506475, train_acc: 0.474424, valid_acc: 0.508398, Time 000240, lr: 0.010000
Epoch 3, train_loss: 1.316293, train_acc: 0.533605, valid_acc: 0.461914, Time 000240, lr: 0.010000
Epoch 4, train_loss: 1.204649, train_acc: 0.577415, valid_acc: 0.618164, Time 000240, lr: 0.010000
Epoch 5, train_loss: 1.101322, train_acc: 0.613538, valid_acc: 0.655078, Time 000240, lr: 0.010000
Epoch 6, train_loss: 1.022485, train_acc: 0.639727, valid_acc: 0.721289, Time 000240, lr: 0.010000
Epoch 7, train_loss: 0.972765, train_acc: 0.658669, valid_acc: 0.722266, Time 000240, lr: 0.010000
Epoch 8, train_loss: 0.924787, train_acc: 0.677374, valid_acc: 0.752734, Time 000240, lr: 0.010000
Epoch 9, train_loss: 0.872614, train_acc: 0.692370, valid_acc: 0.721680, Time 000240, lr: 0.010000
Epoch 10, train_loss: 0.835013, train_acc: 0.707707, valid_acc: 0.783984, Time 000240, lr: 0.010000
Epoch 11,

KeyboardInterrupt: 