# AlexNet的实现及在cifar10上的应用

## 导入数据集

### 导入包

In [1]:
import mxnet as mx
from mxnet import gluon
from mxnet import ndarray as nd
from mxnet import autograd as ag
from mxnet import init
from mxnet.gluon.data import vision
from mxnet.gluon.data.vision import transforms
from mxnet.gluon import nn
import datetime
%matplotlib inline
import matplotlib as mpl
mpl.rcParams['figure.dpi'] = 120
import matplotlib.pyplot as plt
import numpy as np

  import OpenSSL.SSL


### 数据增广

In [2]:
transform_train = transforms.Compose([
#     transforms.CenterCrop(32),
#     transforms.RandomFlipTopBottom(),
#     transforms.RandomColorJitter(brightness=0.0, contrast=0.0, saturation=0.0, hue=0.0),
#     transforms.RandomLighting(0.0),
#     transforms.Cast('float32'),
#     transforms.Resize(32),
    transforms.Resize(227),
    
    # 随机按照 scale 和 ratio 裁剪， 并放缩为 227*227 #(32x32) 的正方形
    transforms.RandomResizedCrop(227, scale=(0.08, 1.0), ratio=(3.0/4.0, 4.0/3.0)),
    transforms.RandomFlipLeftRight(),
    # 将像素值缩小到 (0, 1) 内， 并将数据格式从 “ 高 × 宽 × 通道 ” 改为 “ 通道 × 高 × 宽”
    transforms.ToTensor(),
    # 对图片的每个通道做标准化 --减去均值，除以方差
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010])
    
])

transform_test = transforms.Compose([
    transforms.Resize(227),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010])
])

### 读取数据集

In [3]:
data_dir = 'data/cifar10/'

batch_size = 128

# 读取原始图像文件， flag = 1 表示图像有三个通道（彩色）
train_ds = vision.ImageFolderDataset(data_dir + 'train', flag=1)
valid_ds = vision.ImageFolderDataset(data_dir + 'valid', flag=1)

loader = gluon.data.DataLoader
train_data = loader(train_ds.transform_first(transform_train), 
                    batch_size, shuffle=True, last_batch='keep')
valid_data = loader(valid_ds.transform_first(transform_test), 
                    batch_size, shuffle=False, last_batch='keep')

In [4]:
print(len(train_ds))
print(len(valid_ds))

print(len(train_data))
for data, label in train_data:
    print(data.shape, label.shape)
    print(label.as_in_context(mx.gpu(0)))
    break

45000
5000
352
(128, 3, 227, 227) (128,)

[4 1 7 7 1 6 0 8 2 8 2 2 7 7 2 7 4 8 0 9 6 1 1 4 2 1 9 4 8 2 2 8 8 1 4 1 2
 2 1 9 8 2 5 1 7 5 4 2 7 3 5 4 5 0 7 7 6 5 1 0 1 3 2 7 9 1 4 8 3 3 2 2 3 0
 0 7 7 9 1 7 3 9 1 7 9 0 6 0 7 5 9 7 8 7 5 1 0 0 7 5 3 0 2 9 6 3 0 4 5 8 0
 0 3 6 2 7 6 8 7 5 1 2 9 9 1 9 1 6]
<NDArray 128 @gpu(0)>


## 交叉熵损失函数

In [5]:
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

## 设计模型 --AlexNet

In [6]:
class AlexNet(nn.HybridBlock):
    def __init__(self, num_classes, verbose=False, **kwargs):
        super(AlexNet, self).__init__(**kwargs)
        self.verbose = verbose
        with self.name_scope():
            net = self.net = nn.HybridSequential()
            # conv1
            net.add(nn.Conv2D(channels=96, kernel_size=11, 
                              strides=4, padding=0))
            net.add(nn.BatchNorm())
            net.add(nn.Activation(activation='relu'))
            net.add(nn.MaxPool2D(pool_size=3, strides=2))
            # conv2
            net.add(nn.Conv2D(channels=256, kernel_size=5, 
                              strides=1, padding=2))
            net.add(nn.BatchNorm())
            net.add(nn.Activation(activation='relu'))
            net.add(nn.MaxPool2D(pool_size=3, strides=2))
            # conv3
            net.add(nn.Conv2D(channels=384, kernel_size=3, 
                              strides=1, padding=1))
            net.add(nn.BatchNorm())
            net.add(nn.Activation(activation='relu'))
            # conv4
            net.add(nn.Conv2D(channels=384, kernel_size=3, 
                              strides=1, padding=1))
            net.add(nn.BatchNorm())
            net.add(nn.Activation(activation='relu'))
            # conv5
            net.add(nn.Conv2D(channels=256, kernel_size=3, 
                              strides=1, padding=1))
            net.add(nn.BatchNorm())
            net.add(nn.Activation(activation='relu'))
            net.add(nn.MaxPool2D(pool_size=3, strides=2))
            
            # FC
            net.add(nn.Flatten())
            net.add(nn.Dense(4096))
            net.add(nn.Dropout(0.5))
            net.add(nn.Dense(4096))
            net.add(nn.Dropout(0.5))
            net.add(nn.Dense(num_classes))
    
    def hybrid_forward(self, F, x):   # __init__() 里的 self
        out = x
        for i, f in enumerate(self.net):
            out = f(out)
            if self.verbose:
                print('Block %d Output: %s' % (i+1, out.shape))
        return out

In [7]:
def get_net(ctx, num_classes=10):
    net = AlexNet(num_classes)
    net.initialize(ctx=ctx, init=init.Xavier())
    return net

In [8]:
def train(train_data, valid_data, net, ctx, num_epoches, 
          learning_rate=0.01, lr_decay=0.1, lr_period=50, 
          momentum = 0.9, weight_decay=0, cost_period = 10, 
          print_cost=False):
    costs = []
    valid_costs = []
    trainer = gluon.Trainer(net.collect_params(), 'sgd', 
                            {'learning_rate': learning_rate, 
                             'momentum': momentum, 
                             'wd': weight_decay})
    pre_time = datetime.datetime.now()
#     moving_loss = 0
#     niter = 0
    for epoch in range(num_epoches):
        train_loss = 0
        train_acc = 0
        if (epoch+1) % lr_period == 0:
            trainer.set_learning_rate(trainer.learning_rate * lr_decay)
        for data, label in train_data:
            data = data.as_in_context(ctx)
#             label = label.as_in_context(ctx)
            label = label.astype('float32').as_in_context(ctx)
            with ag.record():
                output = net(data)
                loss = softmax_cross_entropy(output, label)
            loss.backward()
            trainer.step(batch_size)
            train_loss += nd.mean(loss).asscalar()
#             print(output.argmax(axis=1).astype(np.int64), label)
            train_acc += nd.mean(output.argmax(axis=1) == label).asscalar()
#             train_acc += nd.mean(output.argmax(axis=1).astype(np.int64) == label).asscalar()
#             niter += 1
#             cur_loss = nd.mean(loss).asscalar()
#             moving_loss = 0.9 * moving_loss + 0.1 * cur_loss
#             corr_loss = moving_loss / (1 - pow(0.9, niter))
        cur_time = datetime.datetime.now()
        h, remainder = divmod((cur_time - pre_time).seconds, 3600)
        m, s = divmod(remainder, 60)
        time_str = "Time %02d:%02d:%02d" % (h, m, s)
        if valid_data is not None:
            valid_loss = 0
            valid_acc = 0
            for data, label in valid_data:
                data = data.as_in_context(ctx)
#                 label = label.as_in_context(ctx)
                label = label.astype('float32').as_in_context(ctx)
                output = net(data)
                valid_loss += nd.mean(softmax_cross_entropy(output, label)).asscalar()
#                 valid_acc += nd.mean(output.argmax(axis=1).astype(np.int64) == label).asscalar()
                valid_acc += nd.mean(output.argmax(axis=1) == label).asscalar()
            epoch_str = "Epoch %d, train_loss: %f, train_acc: %f, valid_acc %f, " % (epoch+1, 
                                                                                   train_loss/len(train_data), 
                                                                                   train_acc/len(train_data), 
                                                                                   valid_acc/len(valid_data))
        else:
            epoch_str = "Epoch %d, train_loss: %f, train_acc: %f, " % (epoch+1, 
                                                                     train_loss/len(train_data), 
                                                                     train_acc/len(train_data))
        if print_cost and (epoch+1) % cost_period == 0:
#             costs.append(corr_loss)
            costs.append(train_loss/len(train_data))
            valid_costs.append(valid_loss/len(valid_data))
        print(epoch_str + time_str + ', lr: %f' % trainer.learning_rate)
        pre_time = cur_time
    if print_cost:
        x_axis = np.linspace(0, num_epoches, len(costs), endpoint = True)
        plt.semilogy(x_axis, costs)
#         plt.semilogy(x_axis, valid_costs)
        plt.xlabel('epoch')
        plt.ylabel('loss')
        plt.show()

In [9]:
ctx = mx.gpu(0)
num_epoches = 300
learning_rate = 0.003
lr_decay = 0.1
lr_period = 50
momentum = 0.9
weight_decay = 0 
cost_period = 10

In [44]:
net = get_net(ctx, num_classes=10)
net.hybridize()
train(train_data, valid_data, net, ctx, num_epoches, 
      learning_rate, lr_decay, lr_period, 
      momentum, weight_decay, cost_period, 
      print_cost=True)

NameError: name 'get_net' is not defined

In [116]:
!nvidia-smi

Wed Jun 20 22:01:52 2018       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 384.130                Driver Version: 384.130                   |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  GeForce GTX 108...  Off  | 00000000:29:00.0  On |                  N/A |
| 15%   56C    P2    65W / 280W |   3670MiB / 11169MiB |     28%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage    

In [81]:
a = 'abcbdefg'
b = list(a)
c = list('igk')
b = c
b = []
print(b[0])

IndexError: list index out of range

In [78]:
i = 0
while i < 5:
    i += 1
    if i == 2:
        i += 2
    print(i)

1
4
5


In [None]:
t = 0
flag = 0
for i in range(len(s)):
    if s_map[s[i]] < 0:
        if i == 0:
            return False
        else:
            t += 1
            flag -= 1
            if s_map[s[i]] + s_map[s[i-(2*t-1)]] != 0:
                return False
    else:
        t = 0
        flag += 1
if flag == 0:
    return True
else:
    return False

In [103]:
class Solution:
    def isValid(self, s):
        """
        :type s: str
        :rtype: bool
        """
        s_map = {'(': 1, '[': 2, '{': 3, ')': -1, ']': -2, '}': -3}
        s_list = list(s)
        for _ in range(int(len(s)/2) + 1):
            i = 0
            s_result = list()
            if len(s_list) %2 != 0:
                return False
            while i < len(s_list) - 1:
                if s_map[s_list[i]] + s_map[s_list[i+1]] != 0:
                    s_result.append(s_list[i])
                else:
                    i += 1
                i += 1
            if i == len(s_list) - 1:
                s_result.append(s_list[i])
            if len(s_list) == 0:
                return True
            elif s_list == s_result:
                return False
            else:
                s_list = s_result

In [105]:
x = Solution()
x.isValid('[]{[()]{([])}}{}}')

False

In [108]:
class Solution:
    def isValid(self, s):
        """
        :type s: str
        :rtype: bool
        """
        pairs = {']':'[', ')':'(', '}':'{'}
        stack = [s[0]]
        for i in range(1,len(s)):
            stack.append(s[i])
            if stack[-1] in pairs and stack[-2] == pairs[stack[-1]]:
                stack.pop()
                stack.pop()
        if len(stack)>=1:
            return False
        return True

In [112]:
x = Solution()
x.isValid('[]{[()]{([])}}{')

False