In [2]:
import tensorflow as tf
import numpy as np
import sys
sys.path.append("..") # 为了导入上层目录的d2lzh_tensorflow
#import d2lzh_tensorflow2 as d2l
print(tf.__version__)

2.1.0


### 3.9.1 获取和读取数据

In [3]:
from tensorflow.keras.datasets import fashion_mnist
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
batch_size = 256
x_train = tf.cast(x_train, tf.float32)
x_test = tf.cast(x_test, tf.float32)
x_train = x_train/255.0
x_test = x_test/255.0
train_iter = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(batch_size)
test_iter = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(batch_size)

### 3.9.2 定义模型参数

In [4]:
num_inputs, num_outputs, num_hiddens = 784, 10, 256
W1 = tf.Variable(tf.random.normal(shape=(num_inputs, num_hiddens),mean=0, stddev=0.01, dtype=tf.float32))
b1 = tf.Variable(tf.zeros(num_hiddens, dtype=tf.float32))
W2 = tf.Variable(tf.random.normal(shape=(num_hiddens, num_outputs),mean=0, stddev=0.01, dtype=tf.float32))
b2 = tf.Variable(tf.random.normal([num_outputs], stddev=0.1))

### 3.9.3 定义激活函数

这里我们使用基础的max函数来实现ReLU，而非直接调用relu函数。

In [5]:
def relu(x):
    return tf.math.maximum(x,0)

### 3.9.4 定义模型

In [6]:
def net(X):
    X = tf.reshape(X, shape=[-1, num_inputs])
    h = relu(tf.matmul(X, W1) + b1)
    return tf.math.softmax(tf.matmul(h, W2) + b2)

### 3.9.5 定义损失函数
为了得到更好的数值稳定性，我们直接使用Tensorflow提供的包括softmax运算和交叉熵损失计算的函数。

In [7]:
def loss(y_hat,y_true):
    return tf.losses.sparse_categorical_crossentropy(y_true,y_hat)

### 3.9.6 训练模型

In [9]:
def evaluate_accuracy(data_iter,net):
    acc_sum,n=0.0,0
    for _,(X,y) in enumerate(data_iter):
        y=tf.cast(y,dtype=tf.int64)
        acc_sum+=np.sum(tf.cast(tf.argmax(net(X),axis=1),dtype=tf.int64)==y)
        n+=y.shape[0]
    return acc_sum/n

In [10]:
num_epochs,lr=5,0.5
params=[W1,b1,W2,b2]
def train_ch3(net,train_iter,test_iter,loss,num_epochs,batch_size,params=None,lr=None,trainer=None):
    for epoch in range(num_epochs):
        train_loss_sum,train_acc_sum,n=0.0,0.0,0
        for X,y in train_iter:
            with tf.GradientTape() as tape:
                y_hat=net(X)
                l=tf.reduce_sum(loss(y_hat,y))
            grads=tape.gradient(l,params)
            if trainer is None:
                #如果没有传入优化器，则使用原来编写的小批量随机梯度下降
                for i,param in enumerate(params):
                    param.assign_sub(lr*grads[i]/batch_size)
            else:
                trainer.apply_gradient(zip([grad/batch_size for grad in grads],params))
            y=tf.cast(y,dtype=tf.float32)
            train_loss_sum+=l.numpy()
            train_acc_sum+=tf.reduce_sum(tf.cast(tf.argmax(y_hat,axis=1)==tf.cast(y,dtype=tf.int64),dtype=tf.int64)).numpy()
            n+=y.shape[0]
        test_acc=evaluate_accuracy(test_iter,net)
        print('epoch={},loss={:.4},train_acc={:.3},test_acc={:.3}'.format(epoch+1,train_loss_sum/n,train_acc_sum/n,test_acc))
train_ch3(net,train_iter,test_iter,loss,num_epochs,batch_size,params,lr)

epoch=1,loss=0.484,train_acc=0.82,test_acc=0.837
epoch=2,loss=0.4182,train_acc=0.844,test_acc=0.85
epoch=3,loss=0.3859,train_acc=0.857,test_acc=0.857
epoch=4,loss=0.364,train_acc=0.866,test_acc=0.862
epoch=5,loss=0.3472,train_acc=0.871,test_acc=0.866
