In [2]:
from mxnet import gluon
from mxnet import ndarray as nd

def transform(data, label):
    return data.astype('float32')/255, label.astype('float32')
mnist_train = gluon.data.vision.FashionMNIST(train=True, transform=transform)
mnist_test = gluon.data.vision.FashionMNIST(train=False, transform=transform)

In [3]:
batch_size=256
train_data=gluon.data.DataLoader(mnist_train,batch_size=batch_size,shuffle=True)
test_data=gluon.data.DataLoader(mnist_test,batch_size=batch_size,shuffle=False)

keepdims=True 用于保证张量的维度特性，防止某一维度的长度为1时被自动降维

In [21]:
def softmax(x):
    exp=nd.exp(x)
    sum_=exp.sum(axis=1,keepdims=True)
    return exp/sum_

In [22]:
def relu(X):
    return nd.maximum(X,0)

In [31]:
num_input=28*28
num_hide=256
num_output=10
weight_scale=0.1
W1=nd.random_normal(shape=(num_input,num_hide),scale=weight_scale)
B1=nd.random_normal(shape=(num_hide))
W2=nd.random_normal(shape=(num_hide,num_output),scale=weight_scale)
B2=nd.random_normal(shape=(num_output))
params=[W1,B1,W2,B2]
for param in params:
    param.attach_grad()

In [32]:
def net(X):
    output1=nd.dot(X.reshape((-1,num_input)),W1)+B1
    hide=relu(output1)
    return softmax(nd.dot(hide,W2)+B2)

In [33]:
def cross_entropy(yhat,y):
    return - nd.pick(nd.log(yhat),y)

In [34]:
def accuracy(output, label):
    return nd.mean(output.argmax(axis=1)==label).asscalar()

In [35]:
def evaluate_accuracy(data_iterator, net):
    acc=.0
    for data,label in data_iterator:
        output=net(data)
        acc+=accuracy(output,label)
    return acc/len(data_iterator)

In [36]:
evaluate_accuracy(test_data,net)

0.1033203125

In [37]:
import sys
sys.path.append('..')
from utils import SGD
from mxnet import autograd

In [39]:
learning_rate = 0.1
for epoch in range(100):
    train_loss = 0.
    train_acc = 0.
    for data, label in train_data:
        with autograd.record():
            output = net(data)
            loss = cross_entropy(output, label)
        loss.backward()
        SGD(params, learning_rate/batch_size)

        train_loss += nd.mean(loss).asscalar()
        train_acc += accuracy(output, label)

    test_acc = evaluate_accuracy(test_data, net)
    print("Epoch %d. Loss: %f, Train acc %f, Test acc %f" % (epoch, train_loss/len(train_data), train_acc/len(train_data), test_acc))

Epoch 0. Loss: 0.393821, Train acc 0.859081, Test acc 0.866309
Epoch 1. Loss: 0.380344, Train acc 0.863725, Test acc 0.866992
Epoch 2. Loss: 0.368827, Train acc 0.868794, Test acc 0.869727
Epoch 3. Loss: 0.363345, Train acc 0.869171, Test acc 0.870605
Epoch 4. Loss: 0.350662, Train acc 0.874756, Test acc 0.874023
Epoch 5. Loss: 0.346111, Train acc 0.875975, Test acc 0.870508
Epoch 6. Loss: 0.338737, Train acc 0.878613, Test acc 0.877637
Epoch 7. Loss: 0.331218, Train acc 0.880652, Test acc 0.875195
Epoch 8. Loss: 0.325888, Train acc 0.882264, Test acc 0.871289
Epoch 9. Loss: 0.321658, Train acc 0.885073, Test acc 0.876855
Epoch 10. Loss: 0.316386, Train acc 0.886370, Test acc 0.877637
Epoch 11. Loss: 0.311219, Train acc 0.887627, Test acc 0.879297
Epoch 12. Loss: 0.306644, Train acc 0.889672, Test acc 0.877832
Epoch 13. Loss: 0.302566, Train acc 0.892110, Test acc 0.882520
Epoch 14. Loss: 0.298764, Train acc 0.892681, Test acc 0.879004
Epoch 15. Loss: 0.295256, Train acc 0.893988, Test