# MXNet实现正则化

In [1]:
import mxnet as mx
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
from mxnet import gluon,init,autograd,nd
from mxnet.gluon import data as gdata,loss as gloss

## 定义一个dropout层

In [8]:
def dropout(X,drop_prob):
    assert 0 <= drop_prob <= 1
    
    keep_prob = 1-drop_prob
    
    if keep_prob == 0:
        return X.zero_likes()
    
    mask = nd.random.uniform(0,1,X.shape,ctx = mx.gpu()) < keep_prob
    
    return mask*X/keep_prob   

## 测试一下

In [11]:
X = nd.arange(16,ctx = mx.gpu()).reshape(2,8)

In [12]:
print(X)
print(dropout(X,0.5))


[[ 0.  1.  2.  3.  4.  5.  6.  7.]
 [ 8.  9. 10. 11. 12. 13. 14. 15.]]
<NDArray 2x8 @gpu(0)>

[[ 0.  2.  4.  0.  0. 10. 12.  0.]
 [ 0.  0.  0. 22.  0. 26.  0.  0.]]
<NDArray 2x8 @gpu(0)>


## 定义模型参数

In [31]:
num_inputs,num_outputs,num_hidden1,num_hidden2 =  784,10,256,256

#定义两个隐层
W1 = nd.random.normal(scale = 0.01,shape = (num_inputs,num_hidden1),ctx = mx.gpu())
b1 = nd.zeros(shape = (1,num_hidden1),ctx = mx.gpu())

W2 = nd.random.normal(scale = 0.01,shape = (num_hidden1,num_hidden2),ctx = mx.gpu())
b2 = nd.zeros(shape=(1,num_hidden2),ctx = mx.gpu())
#定义输出层
W3 = nd.random.normal(scale=0.01,shape = (num_hidden2,num_outputs),ctx = mx.gpu())
b3 = nd.zeros(shape = (1,num_outputs),ctx = mx.gpu())


In [32]:
params = [W1,b1,W2,b2,W3,b3]

for param in params :
    param.attach_grad()

## 定义模型

In [47]:
drop_prob1,drop_prob2 = 0.2,0.5
def net(X):
    X = X.reshape(-1,num_inputs)
    H1 = (nd.dot(X,W1)+b1).relu()      #第一个隐藏层的输出
    
    #只在训练的时候进行丢弃
    if autograd.is_training():
        H1 = dropout(H1,drop_prob1)   #第一个隐层后面添加丢弃层
    H2 = (nd.dot(H1,W2)+b2).relu()
    
    if autograd.is_training():
        H2 = dropout(H2,drop_prob2)   #第二个隐层之后添加丢弃层
    return (nd.dot(H2,W3)+b3)

In [41]:
def sgd(params,batch_size,learning_rate):
    for param in params:
        param[:] = param-learning_rate*param.grad/batch_size

## 训练模型

In [38]:
num_epochs ,lr,batch_size = 5,0.5,256
loss = gloss.SoftmaxCrossEntropyLoss()

def ToTensor(X,y):
    return X.astype(np.float32)/255,y.astype(np.float32)

batch_size = 256

with mx.Context(mx.gpu()):
    trainData = gdata.vision.FashionMNIST(root = './FashionMNIST',train=True,transform = ToTensor)
    testData = gdata.vision.FashionMNIST(root ='./FashionMNIST',train = False,transform = ToTensor)
    
    train_iter = gdata.DataLoader(trainData,batch_size,shuffle=True)
    test_iter = gdata.DataLoader(testData,batch_size,shuffle=False)

In [42]:
def accuracy(y_hat,y):
    return (y_hat.argmax(axis = 1)==y).mean().asscalar()

def evaluate_accuracy(net,data):
    acc = 0
    for X,y in data:
        y_hat = net(X)
        acc += accuracy(y_hat,y)
    return acc/len(data)

In [48]:
def train_drop_out_net(net,train_iter,test_iter,num_epochs,batch_size,loss,trainer=None,params=None,lr=None):
    for epoch in range(1,num_epochs+1):
        train_loss_sum = 0
        train_acc_sum = 0
        for X,y in train_iter :
            with autograd.record():
                y_hat = net(X)
                l = loss(y_hat,y)
            l.backward()
            
            if trainer:
                trainer.step(batch_size)
            else:
                sgd(params,batch_size,lr)
            
            train_loss_sum += l.mean().asscalar()
            train_acc_sum += accuracy(y_hat,y)
        test_acc = evaluate_accuracy(net,test_iter)
        print('epoch %d train_loss %.4f train_acc %.3f test_acc %.3f'
             %(epoch,train_loss_sum/len(train_iter),train_acc_sum/len(train_iter),test_acc))
        

In [49]:
with mx.Context(mx.gpu()):
    train_drop_out_net(net,train_iter,test_iter,num_epochs,batch_size,loss,trainer=None,params=params,lr=lr)

epoch 2 train_loss 1.1539 train_acc 0.554 test_acc 0.796
epoch 3 train_loss 0.5899 train_acc 0.781 test_acc 0.828
epoch 4 train_loss 0.5031 train_acc 0.818 test_acc 0.845
epoch 5 train_loss 0.4520 train_acc 0.836 test_acc 0.865
epoch 6 train_loss 0.4260 train_acc 0.845 test_acc 0.866


##  Gluon接口实现dropout

In [51]:
from mxnet.gluon import nn

In [67]:
net = nn.Sequential()
net.add(nn.Dense(256,activation='relu'),
        nn.Dropout(drop_prob2),               
        nn.Dense(256,activation='relu'),
        nn.Dropout(drop_prob1),
        nn.Dense(10)
        )

In [68]:
net.initialize(init.Normal(sigma=0.01),ctx=mx.gpu())

In [69]:
trainer = gluon.Trainer(net.collect_params(),'sgd',{'learning_rate':lr})
with mx.Context(mx.gpu()):
    train_drop_out_net(net,train_iter,test_iter,num_epochs,batch_size,loss,trainer=trainer,lr=lr)

epoch 2 train_loss 1.1716 train_acc 0.544 test_acc 0.775
epoch 3 train_loss 0.5913 train_acc 0.778 test_acc 0.823
epoch 4 train_loss 0.5089 train_acc 0.812 test_acc 0.854
epoch 5 train_loss 0.4647 train_acc 0.830 test_acc 0.858
epoch 6 train_loss 0.4427 train_acc 0.839 test_acc 0.854
