### build datasets from FASHION_MNIST

In [135]:
%matplotlib inline
import sys
import d2lzh as d2l
from mxnet.gluon import data as gdata
from mxnet import autograd, nd

batch_size = 256
transformer = gdata.vision.transforms.ToTensor()
mnist_train, mnist_test = gdata.vision.FashionMNIST(train=True), gdata.vision.FashionMNIST(train=False)

if sys.platform.startswith('win'):
    num_workers = 1
else:
    num_workers = 4
    
####
!echo num_worker: {num_workers} 
####

train_iter, test_iter = \
    gdata.DataLoader(mnist_train.transform_first(transformer), batch_size, shuffle=True, num_workers=num_workers), \
    gdata.DataLoader(mnist_test.transform_first(transformer), batch_size, shuffle=True, num_workers=num_workers)

num_inputs, num_outputs = 28 * 28, 10 # each image's size = 28^2 pixel
W = nd.random.normal(scale=0.01, shape=(num_inputs, num_outputs))
b = nd.zeros((1, num_outputs))

W.attach_grad()
b.attach_grad()

print(W, b)
print(W.shape, b.shape)

num_worker: 4

[[ 0.00241127  0.0033418  -0.01314738 ... -0.00470703  0.01259373
  -0.00720676]
 [ 0.00600226  0.00011061  0.0001107  ...  0.00323387 -0.00515725
   0.0056391 ]
 [-0.00306754  0.01372907 -0.00487223 ... -0.00297172  0.00460683
   0.00097664]
 ...
 [-0.01689546  0.00070881 -0.01083925 ... -0.00928763 -0.00952997
  -0.00600304]
 [-0.00338771 -0.01121285 -0.01522656 ...  0.00986621 -0.00403203
  -0.00121138]
 [-0.00856771  0.00302645 -0.00781122 ... -0.00780047  0.00519805
  -0.01402952]]
<NDArray 784x10 @cpu(0)> 
[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
<NDArray 1x10 @cpu(0)>
(784, 10) (1, 10)


In [136]:
X = nd.array([[1, 2, 3],[4, 5, 6]])
print(X.sum(axis=0, keepdims=True), X.sum(axis=1, keepdims=True))



[[5. 7. 9.]]
<NDArray 1x3 @cpu(0)> 
[[ 6.]
 [15.]]
<NDArray 2x1 @cpu(0)>


In [137]:
def softmax(X):
    # X's rows = samples
    X_exp = X.exp()
    partition = X_exp.sum(axis=1, keepdims=True)
    return X_exp / partition

X = nd.random.normal(shape=(2, 5))

print(X, X.sum(axis=1))
X_prob = softmax(X)
print(X_prob, X_prob.sum(axis=1))

def net(X):
#     print(X.reshape(-1, num_inputs).shape, W.shape)
    return softmax(nd.dot(X.reshape(-1, num_inputs), W) + b)



[[-0.14835548 -0.36234885 -0.94928247 -0.15361013 -0.39580846]
 [ 0.9750422   0.32427043  0.30911517 -0.2388004   0.40020448]]
<NDArray 2x5 @cpu(0)> 
[-2.0094054  1.7698319]
<NDArray 2 @cpu(0)>

[[0.24802741 0.20024586 0.11134265 0.24672754 0.19365658]
 [0.34538856 0.18016952 0.1774596  0.10259892 0.19438337]]
<NDArray 2x5 @cpu(0)> 
[1. 1.]
<NDArray 2 @cpu(0)>


In [138]:
y_hat = nd.array([[0.1, 0.3, 0.6], [0.3, 0.2, 0.5]])
y = nd.array([0, 2], dtype='int32')
print(nd.pick(y_hat, y))

def cross_entropy(y_hat, y):
    return -nd.pick(y_hat, y).log()

def accuracy(y_hat, y):
    return (y_hat.argmax(axis=1) == y.astype('float32')).mean().asscalar()


[0.1 0.5]
<NDArray 2 @cpu(0)>


In [139]:
def evaluate_net(data_iter, net):
    acc_sum, n = 0.0, 0
    for X, y in data_iter:
        y = y.astype('float32')
        acc_sum = (net(X).argmax(axis=1) == y).sum().asscalar()
        n += y.size
        return acc_sum / n


In [140]:
evaluate_net(test_iter, net)

0.09765625

### train 

In [141]:
num_epochs, lr = 5, 0.1

def train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, params=None, lr=None, trainer=None):
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n = .0, .0, 0
        for X, y in train_iter:
            with autograd.record():
                y_hat = net(X)
                l = loss(y_hat, y).sum()
            l.backward()
            if trainer is None:
                d2l.sgd(params, lr, batch_size)
            else:
                trainer.step(batch_size)
            y = y.astype('float32')
            train_l_sum += l.asscalar()
#             print(y_hat, y)
            train_acc_sum += (y_hat.argmax(axis=1) == y).sum().asscalar()
            n += y.size
        test_acc = accuracy(y_hat, y)
        print(f'epoch: {epoch}, loss: {train_l_sum / n}, train_acc_sum: {train_acc_sum}, test acc: {test_acc}')

train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, batch_size, [W, b], lr)

epoch: 0, loss: 0.7875408166885376, train_acc_sum: 44690.0, test acc: 0.7916666865348816
epoch: 1, loss: 0.5733886915842692, train_acc_sum: 48663.0, test acc: 0.75
epoch: 2, loss: 0.529137768236796, train_acc_sum: 49418.0, test acc: 0.8645833134651184
epoch: 3, loss: 0.5048939123153686, train_acc_sum: 49835.0, test acc: 0.8541666865348816
epoch: 4, loss: 0.48991118818918866, train_acc_sum: 50091.0, test acc: 0.8333333134651184
