In [2]:
# coding=utf-8
from __future__ import absolute_import, division, print_function, unicode_literals

import numpy as np
import tensorflow as tf

from tensorflow.keras.datasets import fashion_mnist

batch_size = 128

# 数据 28*28*1 的图像
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
x_train = tf.cast(x_train, tf.float32) / 255.0
x_test = tf.cast(x_test, tf.float32) / 255.0
train_iter = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(batch_size)
test_iter = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(batch_size)

# 参数
num_inputs, num_outputs, num_hiddens = 784, 10, 256
W1 = tf.Variable(tf.random.normal(shape=(num_inputs, num_hiddens), mean=0, stddev=0.01, dtype=tf.float32))
b1 = tf.Variable(tf.zeros(num_hiddens, dtype=tf.float32))
W2 = tf.Variable(tf.random.normal(shape=(num_hiddens,num_outputs), mean=0, stddev=0.01, dtype=tf.float32))
b2 = tf.Variable(tf.random.normal([num_outputs], stddev=0.1))

print('load done：', x_train.shape, y_train.shape)


load done： (60000, 28, 28) (60000,)


In [6]:
# 激活函数
with tf.device('CPU:0'):
    def relu(x):
        return tf.math.maximum(x, 0)

    # 模型
    def net(X):
        X = tf.reshape(X, shape=[-1, num_inputs])
        h = relu(tf.matmul(X,W1)+b1)
        return tf.math.softmax(tf.matmul(h, W2)+b2)

    # 损失函数
    # 分类损失函数：
    # （1）tf.keras.losses
    # CategoricalCrossentropy || categorical_crossentropy 这个用于 one-hot形式的 y_pred
    # SparseCategoricalCrossentropy || sparse_categorical_crossentropy 用于  a single floating point value per example for y_true and classes
    # （2）tf.nn
    # sparse_softmax_cross_entropy_with_logits 这个损失函数内部进行softmax，不要把softmax的输出作为它的输入！

    ## eg:
    # tf.keras.losses.sparse_categorical_crossentropy(y_true, y_pred, from_logits=False, axis=-1)
    # tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False, reduction=losses_utils.ReductionV2.AUTO,name='sparse_categorical_crossentropy')

    def entropy_loss(y_hat, y_true):
        return tf.keras.losses.sparse_categorical_crossentropy(y_true, y_hat)

    # 训练


    def evaluate_accuracy(test_iter, net):
        acc_sum, n = 0.0, 0
        for x, y in test_iter:
            y_pred = net(x)
            y_pred = tf.cast(tf.argmax(y_pred, axis=1), dtype=tf.int64)
            y = tf.cast(y, dtype=tf.int64)
            acc_sum += np.sum(y==y_pred)
            n += x.shape[0]
        return acc_sum / n



    def train_net(net, train_iter, test_iter, loss_function, num_epochs, batch_size, params=None, lr=0.01, optimizer=None):
        for epoch in range(num_epochs):
            train_loss_sum, train_acc_sum, n = 0.0, 0.0, 0
            for X, y in train_iter:
                with tf.GradientTape() as tape:
                    y_pred = net(X)
                    # print('net(x) outputs y:', y_pred.shape, '  ', y_pred)
                    # print('y_true:', y.shape, '  ', y)
                    # loss_value = loss_function(y_pred, y)
                    loss_value = tf.reduce_sum(loss_function(y_pred, y))
                    print('loss outputs:', loss_value)
                    # break
                grads = tape.gradient(loss_value, params)
                
                if optimizer is None:
                    for i, param in enumerate(params):
                        param.assign_sub(lr*grads[i] / batch_size)
                else:
                    optimizer.apply_gradients(zip([grad/batch_size for grad in grads], params))

                # evaluate
                y = tf.cast(y, dtype=tf.float32)
                train_loss_sum += loss_value.numpy()
                train_acc_sum += tf.reduce_sum(tf.cast(tf.argmax(y_pred, axis=1)==tf.cast(y, dtype=tf.int64), dtype=tf.int64)).numpy()
                n += y.shape[0]

            test_acc = evaluate_accuracy(test_iter, net)
            print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f'%(epoch+1, train_loss_sum / n, train_acc_sum/n, test_acc))




    num_epochs, lr = 1, 0.1
    train_net(net, train_iter, test_iter, entropy_loss, num_epochs, batch_size, [W1, b1, W2, b2], lr, tf.keras.optimizers.SGD(lr
    ))

    # evaluate_accuracy(test_iter, net)




4030604e-03
 5.55768097e-03 5.75951457e-01 2.99118254e-02 7.09272802e-01
 3.24828625e-01 3.50896060e-03 6.48542523e-01 7.30391731e-03], shape=(128,), dtype=float32)
net(x) outputs y: (128, 10)    tf.Tensor(
[[2.8851991e-07 5.6832961e-09 2.7566530e-08 ... 7.8063029e-05
  3.3706394e-06 9.7136569e-01]
 [4.1024942e-02 1.2121729e-03 2.9723952e-03 ... 4.9993596e-06
  1.3042666e-04 4.3354407e-06]
 [5.9939486e-01 1.5169961e-02 5.2735154e-02 ... 3.4142277e-04
  2.2538032e-03 2.8358115e-04]
 ...
 [4.8361280e-06 9.9982351e-01 7.4601430e-06 ... 9.2888406e-08
  2.0116634e-08 3.7191967e-08]
 [3.8672462e-02 4.2982292e-01 3.2482073e-02 ... 2.9291632e-03
  5.1206970e-03 2.9679891e-03]
 [8.1647646e-05 5.8280671e-06 3.3969132e-05 ... 3.8116113e-03
  1.5117563e-03 5.5219233e-02]], shape=(128, 10), dtype=float32)
y_true: (128,)    tf.Tensor(
[9 3 0 1 5 4 9 5 9 7 6 0 1 7 8 7 0 1 9 9 7 2 8 8 5 6 0 0 6 4 5 8 7 6 8 9 7
 2 7 1 7 3 5 1 2 1 7 5 4 8 3 0 9 9 8 1 1 5 7 8 7 7 8 2 0 8 3 9 1 1 0 1 7 1
 7 7 7 1 1 2 7 7 

In [7]:
with tf.device('CPU:0'):
    model = tf.keras.models.Sequential(
        [
            tf.keras.layers.Flatten(input_shape=(28,28)),
            tf.keras.layers.Dense(256, activation='relu'),
            tf.keras.layers.Dense(10, activation='softmax')

        ]
    )

    model.compile(optimizer=tf.keras.optimizers.SGD(lr=0.1),
        loss = 'sparse_categorical_crossentropy',
        metrics = ['accuracy']
    )

    model.summary()

    model.fit(x_trian, y_train, epochs=5, batch_size=8, validation_data=(x_test, y_test), validation_freq=1)









Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (None, 784)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 256)               200960    
_________________________________________________________________
dense_3 (Dense)              (None, 10)                2570      
Total params: 203,530
Trainable params: 203,530
Non-trainable params: 0
_________________________________________________________________
Train on 60000 samples, validate on 10000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
# dropout
 
 def dropout(x, drop_prob):
     assert 0<=drop_prob<=1

     keep_prob = 1-drop_prob

     if keep_prob==0:
         return tf.zeros_like(x)
        
     mask = tf.random.uniform(shape=x.shape, minval=0, maxval=1,)



















