In [None]:
import d2lzh as d2l
from mxnet import autograd,gluon,init,nd
from mxnet.gluon import loss as gloss,nn

def dropout(X,drop_prob):
    assert 0<=drop_prob<=1
    keep_prob=1-drop_prob
    if keep_prob==0:
        return X.zeros_like()
    mask=nd.random.uniform(0,1,X.shape)<keep_prob
    return mask*X/keep_prob

In [6]:
X=nd.arange(16).reshape((2,8))
dropout(X,0)


[[ 0.  1.  2.  3.  4.  5.  6.  7.]
 [ 8.  9. 10. 11. 12. 13. 14. 15.]]
<NDArray 2x8 @cpu(0)>

In [26]:
X=nd.arange(16).reshape((2,8))
dropout(X,1)


[[0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]]
<NDArray 2x8 @cpu(0)>

#### 包含两个隐藏层的多层感知机 

In [27]:
num_inputs,num_outputs,num_hiddens1,num_hiddens2=784,10,256,256
W1=nd.random.normal(scale=0.01,shape=(num_inputs,num_hiddens1))
b1=nd.zeros(num_hiddens1)
W2=nd.random.normal(scale=0.01,shape=(num_hiddens1,num_hiddens2))
b2=nd.zeros(num_hiddens2)
W3=nd.random.normal(scale=0.01,shape=(num_hiddens2,num_outputs))
b3=nd.zeros(num_outputs)

params=[W1,b1,W2,b2,W3,b3]
for param in params:
    param.attach_grad()

In [30]:
drop_prob1,drop_prob2=0.2,0.5

def net(X):
    X=X.reshape((-1,num_inputs))
    H1=(nd.dot(X,W1)+b1).relu()
    if (autograd.is_training()):
        H1=dropout(H1,drop_prob1)
    H2=(nd.dot(H1,W2)+b2).relu()
    if (autograd.is_training()):
        H2=dropout(H2,drop_prob1)
    return nd.dot(H2,W3)+b3

In [31]:
num_epochs,lr,batch_size=5,0.5,256
loss=gloss.SoftmaxCrossEntropyLoss()
from mxnet.gluon import data as gdata
mnist_train = gdata.vision.FashionMNIST(root="E:/ML_Dataset/fashionMNIT/",train=True)
mnist_test = gdata.vision.FashionMNIST(root="E:/ML_Dataset/fashionMNIT/",train=False)
num_workers=0
transformer=gdata.vision.transforms.ToTensor()
train_iter=gdata.DataLoader(mnist_train.transform_first(transformer),
                            batch_size,shuffle=True,num_workers=num_workers)
test_iter=gdata.DataLoader(mnist_test.transform_first(transformer),
                            batch_size,shuffle=False,num_workers=num_workers)
d2l.train_ch3(net,train_iter,test_iter,loss,num_epochs,batch_size,params,lr)

epoch 1, loss 1.2122, train acc 0.528, test acc 0.763
epoch 2, loss 0.5899, train acc 0.777, test acc 0.835
epoch 3, loss 0.4969, train acc 0.818, test acc 0.843
epoch 4, loss 0.4522, train acc 0.834, test acc 0.859
epoch 5, loss 0.4178, train acc 0.846, test acc 0.863


### 简洁实现 

In [32]:
net=nn.Sequential()
net.add(nn.Dense(256,activation="relu"),
       nn.Dropout(drop_prob1),
       nn.Dense(256,activation="relu"),
       nn.Dropout(drop_prob2),
       nn.Dense(10))
net.initialize(init.Normal(sigma=0.01))

In [34]:
trainer=gluon.Trainer(net.collect_params(),'sgd',{'learning_rate':lr})
d2l.train_ch3(net,train_iter,test_iter,loss,num_epochs,batch_size,None,None,trainer)

epoch 1, loss 1.2110, train acc 0.528, test acc 0.745
epoch 2, loss 0.5976, train acc 0.776, test acc 0.829
epoch 3, loss 0.4976, train acc 0.818, test acc 0.831
epoch 4, loss 0.4506, train acc 0.836, test acc 0.861
epoch 5, loss 0.4237, train acc 0.845, test acc 0.865
