In [6]:
import sys
sys.path.insert(0, '..')

import gluonbook as gb
from mxnet import autograd, nd
from mxnet.gluon import loss as gloss

In [7]:
num_inputs = 784
num_outputs = 10
num_hiddens1 = 256
num_hiddens2 = 256

W1 = nd.random.normal(scale=0.01, shape=(num_inputs, num_hiddens1))
b1 = nd.zeros(num_hiddens1)
W2 = nd.random.normal(scale=0.01, shape=(num_hiddens1, num_hiddens2))
b2 = nd.zeros(num_hiddens2)
W3 = nd.random.normal(scale=0.01, shape=(num_hiddens2, num_outputs))
b3 = nd.zeros(num_outputs)

params = [W1, b1, W2, b2, W3, b3]
for param in params:
    param.attach_grad()

In [8]:
drop_prob1 = 0.2
drop_prob2 = 0.5

def net(X):
    X = X.reshape((-1, num_inputs))
    H1 = (nd.dot(X, W1) + b1).relu()
    H2 = (nd.dot(H1, W2) + b2).relu()
    return nd.dot(H2, W3) + b3

In [9]:
def accuracy(y_hat, y):
    return (y_hat.argmax(axis=1) == y.astype('float32')).mean().asscalar()

def evaluate_accuracy(data_iter, net):
    acc = 0
    for X, y in data_iter:
        acc += accuracy(net(X), y)
    return acc / len(data_iter)

def train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size,
              params=None, lr=None, trainer=None):
    for epoch in range(num_epochs):
        train_l_sum = 0
        train_acc_sum = 0
        for X, y in train_iter:
            with autograd.record():
                y_hat = net(X)
                l = loss(y_hat, y)
            l.backward()
            if trainer is None:
                gb.sgd(params, lr, batch_size)
            else:
                trainer.step(batch_size)  # 下一节将用到。
            train_l_sum += l.mean().asscalar()
            train_acc_sum += accuracy(y_hat, y)
        test_acc = evaluate_accuracy(test_iter, net)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f'
              % (epoch + 1, train_l_sum / len(train_iter),
                 train_acc_sum / len(train_iter), test_acc))

In [10]:
num_epochs = 5
lr = 0.5
batch_size = 256
loss = gloss.SoftmaxCrossEntropyLoss()
train_iter, test_iter = gb.load_data_fashion_mnist(batch_size)
train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, params,
             lr)

epoch 1, loss 1.0725, train acc 0.583, test acc 0.782
epoch 2, loss 0.5422, train acc 0.796, test acc 0.811
epoch 3, loss 0.4514, train acc 0.832, test acc 0.850
epoch 4, loss 0.4177, train acc 0.846, test acc 0.843
epoch 5, loss 0.3834, train acc 0.857, test acc 0.866


In [2]:
import numpy as np
def dropout(X, drop_prob):
    assert 0 <= drop_prob <= 1
    keep_prob = 1 - drop_prob
    # 这种情况下把全部元素都丢弃。
    if keep_prob == 0:
        return X.zeros_like()
    mask = np.random.uniform(0, 1, X.shape) < keep_prob
    return mask * X / keep_prob

In [5]:
X = np.arange(16).reshape((2, 8))
dropout(X, 0.5)

array([[ 0.,  0.,  4.,  0.,  8.,  0., 12., 14.],
       [ 0., 18.,  0.,  0., 24.,  0., 28., 30.]])

In [6]:
np.random.uniform(0, 1, X.shape)

array([[0.96754919, 0.15460036, 0.10414868, 0.69457543, 0.47980614,
        0.48920228, 0.23256717, 0.24876718],
       [0.63893278, 0.71515855, 0.88347252, 0.28030065, 0.33392064,
        0.93264613, 0.00385722, 0.93925438]])