In [120]:
import tensorflow as tf 


In [121]:
from tensorflow.keras import datasets,optimizers
from tensorflow import keras 

In [122]:
(train_x,train_label),(test_x,test_label) = datasets.mnist.load_data()
train_label.shape, train_x.shape,test_x.shape,test_label.shape

((60000,), (60000, 28, 28), (10000, 28, 28), (10000,))

In [123]:
# 转换为tensor
x = tf.convert_to_tensor(train_x,dtype=tf.float32)
y = tf.convert_to_tensor(train_label,dtype = tf.int32)
x_test = tf.convert_to_tensor(test_x,dtype=tf.float32)
y_test = tf.convert_to_tensor(test_label,dtype = tf.int32)
x.shape, y.shape, x.dtype, y.dtype

(TensorShape([60000, 28, 28]), TensorShape([60000]), tf.float32, tf.int32)

In [124]:
# 查看最小值和最大值
print(tf.reduce_min(x))
print(tf.reduce_max(x))

print(tf.reduce_max(y))
print(tf.reduce_min(y))

print(tf.reduce_min(x_test))
print(tf.reduce_max(x_test))

print(tf.reduce_max(y_test))
print(tf.reduce_min(y_test))

tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(255.0, shape=(), dtype=float32)
tf.Tensor(9, shape=(), dtype=int32)
tf.Tensor(0, shape=(), dtype=int32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(255.0, shape=(), dtype=float32)
tf.Tensor(9, shape=(), dtype=int32)
tf.Tensor(0, shape=(), dtype=int32)


In [125]:
# 创建一个数据集对象(实质是一个迭代器/生成器), 可以按照batch来取数据
train_db = tf.data.Dataset.from_tensor_slices((x,y)).batch(128)
test_db = tf.data.Dataset.from_tensor_slices((x_test,y_test)).batch(128)
# 转换为迭代器之后,获取下一个batch
train_iter = iter(train_db)
sample_train = train_iter.next()
# 获得的sample是一个元组(x,y)的格式
sample[0].shape, sample[1].shape
test_iter = iter(test_db)
sample_test = test_iter.next()
print(sample_test,train_iter)

(<tf.Tensor: id=9814989, shape=(128, 28, 28), dtype=float32, numpy=
array([[[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       ...,

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
   

In [126]:
# 输入是[b,784] => [b,512] = > [b,256] => [b,10]
# 转化为tf.Vatiable的目的是为了让tf.GradientTaps()能够跟踪
# tf.GradientTaps()只能跟踪tfVariable的对象的梯度
# 方差设定为0.1, 有助于网络收敛
w1 = tf.Variable(tf.random.truncated_normal([784,512],stddev = 0.01))
b1 = tf.Variable(tf.ones([512]))

w2 = tf.Variable(tf.random.truncated_normal([512,256],stddev = 0.01))
b2 = tf.Variable(tf.ones([256]))

w3 = tf.Variable(tf.random.truncated_normal([256,10],stddev = 0.01))
b3 = tf.Variable(tf.ones([10]))

In [127]:
# 科学技术法表示的学习率
lr = 1e-5
# 可以直接使用for(x,y) in tf.data.Dataset.from_tensor_slices((x,y)).batch(128)
# 不需要转化为迭代器
for epoch in range(30):
    for step,(x,y) in enumerate(train_db):
        # 先将图片打平(维度转变)
        x = tf.reshape(tf.cast(x,tf.float32),[-1,28*28])/255.
    #     print(tf.matmul(x,w1).shape)
        # 标志位转化为10位的one-hot形式的tensor
        y = tf.one_hot(y,depth=10)
        with tf.GradientTape() as tape:
            # 将所有的梯度计算加到这个context中

            # 第一层前向传播
            # 这里b1虽然是tf.ones([512])大小的, 但是系统会自动完成broadingcast之后再进行相加
            h1 = tf.matmul(x,w1) + b1

            # 进行relu转换
            h1 = tf.nn.relu(h1)

            # 第二层前向传播\
            h2 = tf.matmul(h1,w2) + b2
            h2 = tf.nn.relu(h2)

            # 第三层前向传播
            output = tf.matmul(h2,w3) + b3
    #         print(output.shape)
    #         print(y.shape)

    #         print(output.shape)

            #计算误差
            mse_loss = tf.reduce_mean(tf.square(output - y))
            if step%100 == 0:
                print('Epoch:',epoch,' Step:',step,' ,Loss:', mse_loss)
        
        grads = tape.gradient(mse_loss,[w1,b1,w2,b2,w3,b3])
        # 获取对应的每个参量的梯度信息
    #     print(grads)
        # 进行梯度优化

        # 这里如果直接写w1 = w1 - lr*grads[0], 剪发操作之后的w1被赋值成了一个tfTensor的对象
        # 这样的Tensor对象下一次在求带图中又将会返回一个None的对象, 所以会报错
        # 针对这种情况我们使用w1.assign_sub()方法,这样使用原地更新的方式减去要更新的值
        # w1的属性不会变成Tensor,还会保持是一个tf.Variable对象
        w1.assign_sub(lr*grads[0])
        b1.assign_sub(lr*grads[1])
        w2.assign_sub(lr*grads[2])
        b2.assign_sub(lr*grads[3])
        w3.assign_sub(lr*grads[4])
        b3.assign_sub(lr*grads[5])
        # 这里的0对应tape.gradient(mse_loss,[w1,b1,w2,b2,w3,b3])中的第0个位置上的参数
    
    correct_sum = 0
    total = 0
    for step,(x,y) in enumerate(test_db):
        y = tf.cast(y,dtype=tf.int32)
        x = tf.cast(tf.reshape(x,[-1,28*28]),dtype=tf.float32)/255.
        h1 = tf.nn.relu(x@w1+b1)
        h2 = tf.nn.relu(h1@w2+b2)
        out = h2@w3+b3 # out是一个b,10大小的矩阵
        prod = tf.nn.softmax(out)# 进行softmax概率操作
        pred = tf.cast(tf.argmax(prod,axis=1),dtype=tf.int32)# 获取最终最大概率的预测结果, 注意这里axis要是1
#         print(pred)
#         print(tf.equal(pred,y))
        # 真实值是y, 需要与pred进行比较
        correct = tf.reduce_sum(tf.cast(tf.equal(pred,y),dtype=tf.float16))
        correct_sum += int(correct)
#         print(int(correct),correct_sum)
        total += x.shape[0]
    print('Epoch:',epoch ,',Accuracy:', correct_sum/total)

Epoch: 0  Step: 0  ,Loss: tf.Tensor(0.92763245, shape=(), dtype=float32)
Epoch: 0  Step: 100  ,Loss: tf.Tensor(0.8377632, shape=(), dtype=float32)
Epoch: 0  Step: 200  ,Loss: tf.Tensor(0.75881994, shape=(), dtype=float32)
Epoch: 0  Step: 300  ,Loss: tf.Tensor(0.690096, shape=(), dtype=float32)
Epoch: 0  Step: 400  ,Loss: tf.Tensor(0.6208563, shape=(), dtype=float32)
Epoch: 0 ,Accuracy: 0.0974
Epoch: 1  Step: 0  ,Loss: tf.Tensor(0.5860266, shape=(), dtype=float32)
Epoch: 1  Step: 100  ,Loss: tf.Tensor(0.5323427, shape=(), dtype=float32)
Epoch: 1  Step: 200  ,Loss: tf.Tensor(0.48528847, shape=(), dtype=float32)
Epoch: 1  Step: 300  ,Loss: tf.Tensor(0.44467035, shape=(), dtype=float32)
Epoch: 1  Step: 400  ,Loss: tf.Tensor(0.4024671, shape=(), dtype=float32)
Epoch: 1 ,Accuracy: 0.0974
Epoch: 2  Step: 0  ,Loss: tf.Tensor(0.38253266, shape=(), dtype=float32)
Epoch: 2  Step: 100  ,Loss: tf.Tensor(0.35040325, shape=(), dtype=float32)
Epoch: 2  Step: 200  ,Loss: tf.Tensor(0.32235777, shape=(),

Epoch: 20  Step: 200  ,Loss: tf.Tensor(0.08998867, shape=(), dtype=float32)
Epoch: 20  Step: 300  ,Loss: tf.Tensor(0.08970077, shape=(), dtype=float32)
Epoch: 20  Step: 400  ,Loss: tf.Tensor(0.089948356, shape=(), dtype=float32)
Epoch: 20 ,Accuracy: 0.1136
Epoch: 21  Step: 0  ,Loss: tf.Tensor(0.08969339, shape=(), dtype=float32)
Epoch: 21  Step: 100  ,Loss: tf.Tensor(0.08995919, shape=(), dtype=float32)
Epoch: 21  Step: 200  ,Loss: tf.Tensor(0.08998306, shape=(), dtype=float32)
Epoch: 21  Step: 300  ,Loss: tf.Tensor(0.08969303, shape=(), dtype=float32)
Epoch: 21  Step: 400  ,Loss: tf.Tensor(0.08994738, shape=(), dtype=float32)
Epoch: 21 ,Accuracy: 0.1136
Epoch: 22  Step: 0  ,Loss: tf.Tensor(0.08968712, shape=(), dtype=float32)
Epoch: 22  Step: 100  ,Loss: tf.Tensor(0.08995483, shape=(), dtype=float32)
Epoch: 22  Step: 200  ,Loss: tf.Tensor(0.089979455, shape=(), dtype=float32)
Epoch: 22  Step: 300  ,Loss: tf.Tensor(0.089687705, shape=(), dtype=float32)
Epoch: 22  Step: 400  ,Loss: tf.T