In [3]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets

In [4]:
import os

#设置log信息等级，2代表只打印重要信息
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'     

In [2]:
%pip install tensorflow

Collecting tensorflow
  Downloading tensorflow-2.4.1-cp37-cp37m-manylinux2010_x86_64.whl (394.3 MB)
[K     |████████████████████████████████| 394.3 MB 29 kB/s s eta 0:00:01     |████████▏                       | 100.3 MB 39.3 MB/s eta 0:00:08     |██████████▋                     | 131.1 MB 26.8 MB/s eta 0:00:10     |██████████████████▎             | 224.8 MB 35.2 MB/s eta 0:00:05     |███████████████████             | 232.9 MB 35.2 MB/s eta 0:00:05     |████████████████████            | 246.1 MB 34.4 MB/s eta 0:00:05     |█████████████████████▏          | 261.3 MB 34.4 MB/s eta 0:00:04     |█████████████████████▍          | 263.3 MB 34.4 MB/s eta 0:00:04
[?25hCollecting wheel~=0.35
  Downloading wheel-0.36.2-py2.py3-none-any.whl (35 kB)
Collecting typing-extensions~=3.7.4
  Downloading typing_extensions-3.7.4.3-py3-none-any.whl (22 kB)
Collecting protobuf>=3.9.2
  Downloading protobuf-3.14.0-cp37-cp37m-manylinux1_x86_64.whl (1.0 MB)
[K     |████████████████████████████████| 1.0 MB 1

### 准备数据

In [5]:
#加载mnist数据集，如果本地没有数据缓存，则会从网络中下载数据
# x: [60k,28,28]
# y: [60k]
(x,y),_ = datasets.mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [6]:
#将数据集转化为tensor格式
x = tf.convert_to_tensor(x,dtype=tf.float32)/255.      #/255.是将数据转化为0-1.之间
y = tf.convert_to_tensor(y,dtype=tf.int32)

In [7]:
x.shape, y.shape, x.dtype, y.dtype

(TensorShape([60000, 28, 28]), TensorShape([60000]), tf.float32, tf.int32)

In [8]:
#查看x数据集中的最大值与最小值
tf.reduce_max(x),tf.reduce_min(x)

(<tf.Tensor: shape=(), dtype=float32, numpy=1.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=0.0>)

In [9]:
tf.reduce_max(y),tf.reduce_min(y)

(<tf.Tensor: shape=(), dtype=int32, numpy=9>,
 <tf.Tensor: shape=(), dtype=int32, numpy=0>)

### 创建数据集

In [10]:
train_db = tf.data.Dataset.from_tensor_slices((x,y)).batch(128)       #将数据集按128个单位长度进行分割

In [11]:
train_iter = iter(train_db)     #转化为生成器
sample = next(train_iter)
sample[0].shape,sample[1].shape     #这里可以看到batch的作用是吧x,y数据集按128一个单位进行分割

(TensorShape([128, 28, 28]), TensorShape([128]))

### 权值设定


In [12]:
#[b,784]=> [b,256] => [b,128] =>[b,10]
#那么，w和b的矩阵大小需要满足矩阵相乘的规律
#tf.Variable 类型的数据可以支持自动求导
w1 = tf.Variable(tf.random.truncated_normal([784,256],stddev=0.1))       #重要！！默认方差为1，这里我们给一个比较小的数，效果会比较好
b1 = tf.Variable(tf.zeros([256]))
w2 = tf.Variable(tf.random.truncated_normal([256,128],stddev=0.1))
b2 = tf.Variable(tf.zeros([128]))
w3 = tf.Variable(tf.random.truncated_normal([128,10],stddev=0.1))
b3 = tf.Variable(tf.zeros([10]))

In [13]:
lr = 1e-3

for epoc in range(10):    #设置重复次数10次，可以修改此参数进行对比，每一次都是完整的数据集迭代

    #对数据集进行迭代
    for step,(x,y) in enumerate(train_db):         #step代表循环次数，（x,y）每次从train_db中迭代出一对值
        #x：[128,28,28]
        #y:[128]
        x = tf.reshape(x,[-1,28*28])
        #print(x.shape)

        with tf.GradientTape() as tape:

            #x:[b,28*28]
            #h1 = x@w1 + b1
            #[b,784]@[784,256] + [256] = [b,256]
            h1 = x@w1 + b1
            h1 = tf.nn.relu(h1)   #非线性函数，去掉负数

            #[b,256]@[256,128]+[128] = [b,128]
            h2 = h1@w2 + b2
            h2 = tf.nn.relu(h2)

            #[b,128]@[128,10] = [b,10]
            out = h2@w3 + b3


            #compute loss
            y_onehot = tf.one_hot(y,depth=10)     #编码

            #mse = mean((y-out)^2)    计算方差
            loss = tf.square(y_onehot - out)

            #mean:scalar
            loss = tf.reduce_mean(loss)

        #梯度计算，对[w1,b1,w2,b2,w3,b3]进行求导
        grads = tape.gradient(loss,[w1,b1,w2,b2,w3,b3])

        #迭代参数，梯度下降法
        #w1 = w1 - lr * w1_grad

        '''
        应该是这样写的，但是tensorflow中会将更新后的w1等变量恢复为tensor数据类型，后面又会报错
        w1 = w1-lr*grads[0]
        b1 = b1-lr*grads[1]
        w2 = w2-lr*grads[2]
        b2 = b2-lr*grads[3]
        w3 = w3-lr*grads[4]
        b3 = b3-lr*grads[5]
        '''
        #取而代之是这样的写法：
        w1.assign_sub(lr*grads[0])
        b1.assign_sub(lr*grads[1])
        w2.assign_sub(lr*grads[2])
        b2.assign_sub(lr*grads[3])
        w3.assign_sub(lr*grads[4])
        b3.assign_sub(lr*grads[5])



        if step%100 == 0:       
            print(epoc,step,'loss:',float(loss))         #每100步打印一下loss，并打印循环的次序

0 0 loss: 0.3397562503814697
0 100 loss: 0.19157014787197113
0 200 loss: 0.16938775777816772
0 300 loss: 0.16239294409751892
0 400 loss: 0.1708572804927826
1 0 loss: 0.15272292494773865
1 100 loss: 0.14506745338439941
1 200 loss: 0.13942284882068634
1 300 loss: 0.13654910027980804
1 400 loss: 0.14457686245441437
2 0 loss: 0.1313253790140152
2 100 loss: 0.12739227712154388
2 200 loss: 0.1224694699048996
2 300 loss: 0.12120924890041351
2 400 loss: 0.12830333411693573
3 0 loss: 0.11726796627044678
3 100 loss: 0.11568419635295868
3 200 loss: 0.11096692085266113
3 300 loss: 0.11068177223205566
3 400 loss: 0.11736283451318741
4 0 loss: 0.10735545307397842
4 100 loss: 0.10733451694250107
4 200 loss: 0.10264060646295547
4 300 loss: 0.10304568707942963
4 400 loss: 0.10956726223230362
5 0 loss: 0.10005934536457062
5 100 loss: 0.10104074329137802
5 200 loss: 0.0962456613779068
5 300 loss: 0.09723677486181259
5 400 loss: 0.10365629196166992
6 0 loss: 0.09443774074316025
6 100 loss: 0.0960503220558