### build datasets from FASHION_MNIST

In [8]:
%matplotlib inline
import sys
import d2lzh as d2l
from mxnet.gluon import data as gdata
from mxnet import autograd, nd

batch_size = 256
transformer = gdata.vision.transforms.ToTensor()
mnist_train, mnist_test = gdata.vision.FashionMNIST(train=True), gdata.vision.FashionMNIST(train=False)

if sys.platform.startswith('win'):
    num_workers = 1
else:
    num_workers = 4
    
####
!echo num_worker: {num_workers} 
####

train_iter, test_iter = \
    gdata.DataLoader(mnist_train.transform_first(transformer), batch_size, shuffle=True, num_workers=num_workers), \
    gdata.DataLoader(mnist_test.transform_first(transformer), batch_size, shuffle=True, num_workers=num_workers)

num_inputs, num_outputs = 28 * 28, 10 # each image's size = 28^2 pixel
W = nd.random.normal(scale=0.01, shape=(num_inputs, num_outputs))
b = nd.zeros((1, num_outputs))

W.attach_grad()
b.attach_grad()

print(W, b)
print(W.shape, b.shape)

num_worker: 4

[[-0.01214079  0.02156406  0.01093822 ... -0.00806932  0.01376901
   0.00205885]
 [ 0.00994352 -0.00235806  0.00298818 ... -0.00962973  0.00508051
   0.00756173]
 [ 0.0168393   0.01257365  0.00131232 ... -0.00987804  0.00958589
  -0.01497647]
 ...
 [ 0.01550311  0.01372548  0.00444446 ...  0.00035544 -0.01057525
  -0.00585316]
 [-0.00245804 -0.0076688  -0.00301254 ...  0.01712018  0.01279332
  -0.00793114]
 [-0.02800045 -0.01546722 -0.00802924 ...  0.0114329  -0.0185089
  -0.00983216]]
<NDArray 784x10 @cpu(0)> 
[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
<NDArray 1x10 @cpu(0)>
(784, 10) (1, 10)


In [9]:
X = nd.array([[1, 2, 3],[4, 5, 6]])
print(X.sum(axis=0, keepdims=True), X.sum(axis=1, keepdims=True))



[[5. 7. 9.]]
<NDArray 1x3 @cpu(0)> 
[[ 6.]
 [15.]]
<NDArray 2x1 @cpu(0)>


In [10]:
def softmax(X):
    # X's rows = samples
    X_exp = X.exp()
    partition = X_exp.sum(axis=1, keepdims=True)
    return X_exp / partition

X = nd.random.normal(shape=(2, 5))

print(X, X.sum(axis=1))
X_prob = softmax(X)
print(X_prob, X_prob.sum(axis=1))

def net(X):
#     print(X.reshape(-1, num_inputs).shape, W.shape)
    return softmax(nd.dot(X.reshape(-1, num_inputs), W) + b)



[[-0.16282491  0.6836102   1.6106696  -1.2642232   0.1389543 ]
 [ 0.6671155   0.214447    0.99384654  0.10874183  0.5353432 ]]
<NDArray 2x5 @cpu(0)> 
[1.006186 2.519494]
<NDArray 2 @cpu(0)>

[[0.09168093 0.21373768 0.5401294  0.03047529 0.12397669]
 [0.22366177 0.14223297 0.31009105 0.12796557 0.19604862]]
<NDArray 2x5 @cpu(0)> 
[1. 1.]
<NDArray 2 @cpu(0)>


In [11]:
y_hat = nd.array([[0.1, 0.3, 0.6], [0.3, 0.2, 0.5]])
y = nd.array([0, 2], dtype='int32')
print(nd.pick(y_hat, y))

def cross_entropy(y_hat, y):
    return -nd.pick(y_hat, y).log()

def accuracy(y_hat, y):
    return (y_hat.argmax(axis=1) == y.astype('float32')).mean().asscalar()


[0.1 0.5]
<NDArray 2 @cpu(0)>


In [12]:
def evaluate_net(data_iter, net):
    acc_sum, n = 0.0, 0
    for X, y in data_iter:
        y = y.astype('float32')
        acc_sum = (net(X).argmax(axis=1) == y).sum().asscalar()
        n += y.size
        return acc_sum / n


In [13]:
evaluate_net(test_iter, net)

0.09375

### train 

In [14]:
num_epochs, lr = 5, 0.1

def train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, params=None, lr=None, trainer=None):
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n = .0, .0, 0
        for X, y in train_iter:
            with autograd.record():
                y_hat = net(X)
                l = loss(y_hat, y).sum()
            l.backward()
            if trainer is None:
                d2l.sgd(params, lr, batch_size)
            else:
                trainer.step(batch_size)
            y = y.astype('float32')
            train_l_sum += l.asscalar()
#           print(y_hat, y)
            train_acc_sum += (y_hat.argmax(axis=1) == y).sum().asscalar()
            n += y.size
        test_acc = accuracy(y_hat, y)
        print(f'epoch: {epoch}, loss: {train_l_sum / n}, train_acc_sum: {train_acc_sum}, test acc: {test_acc}')

train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, batch_size, [W, b], lr)

epoch: 0, loss: 0.7878674438476563, train_acc_sum: 44816.0, test acc: 0.8020833134651184
epoch: 1, loss: 0.5739248268127441, train_acc_sum: 48611.0, test acc: 0.8125
epoch: 2, loss: 0.5293440882364909, train_acc_sum: 49331.0, test acc: 0.8541666865348816
epoch: 3, loss: 0.5048197692235311, train_acc_sum: 49791.0, test acc: 0.875
epoch: 4, loss: 0.4890014295578003, train_acc_sum: 50081.0, test acc: 0.8125
