In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets

In [2]:
import os

#设置log信息等级，2代表只打印重要信息
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'     

In [2]:
%pip install tensorflow

Collecting tensorflow
  Downloading tensorflow-2.4.1-cp37-cp37m-manylinux2010_x86_64.whl (394.3 MB)
[K     |████████████████████████████████| 394.3 MB 9.1 kB/s  eta 0:00:01    |██                              | 24.1 MB 10.0 MB/s eta 0:00:38     |████████████████▎               | 200.8 MB 18.7 MB/s eta 0:00:11     |████████████████▍               | 202.2 MB 18.7 MB/s eta 0:00:11     |███████████████████▊            | 243.7 MB 37.3 MB/s eta 0:00:05     |█████████████████████████▌      | 314.8 MB 28.4 MB/s eta 0:00:03     |███████████████████████████▌    | 338.9 MB 36.3 MB/s eta 0:00:02     |██████████████████████████████▉ | 380.4 MB 43.1 MB/s eta 0:00:01     |███████████████████████████████▊| 390.3 MB 43.1 MB/s eta 0:00:01
[?25hCollecting gast==0.3.3
  Downloading gast-0.3.3-py2.py3-none-any.whl (9.7 kB)
Collecting grpcio~=1.32.0
  Downloading grpcio-1.32.0-cp37-cp37m-manylinux2014_x86_64.whl (3.8 MB)
[K     |████████████████████████████████| 3.8 MB 39.6 MB/s eta 0:00:01
[?25hCollect

### 准备数据

In [16]:
#加载mnist数据集，如果本地没有数据缓存，则会从网络中下载数据
# x: [60k,28,28]
# y: [60k]
(x,y),_ = datasets.mnist.load_data()

In [17]:
#将数据集转化为tensor格式
x = tf.convert_to_tensor(x,dtype=tf.float32)/255.      #/255.是将数据转化为0-1.之间
y = tf.convert_to_tensor(y,dtype=tf.int32)

In [18]:
x.shape, y.shape, x.dtype, y.dtype

(TensorShape([60000, 28, 28]), TensorShape([60000]), tf.float32, tf.int32)

In [19]:
#查看x数据集中的最大值与最小值
tf.reduce_max(x),tf.reduce_min(x)

(<tf.Tensor: shape=(), dtype=float32, numpy=1.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=0.0>)

In [20]:
tf.reduce_max(y),tf.reduce_min(y)

(<tf.Tensor: shape=(), dtype=int32, numpy=9>,
 <tf.Tensor: shape=(), dtype=int32, numpy=0>)

### 创建数据集

In [21]:
train_db = tf.data.Dataset.from_tensor_slices((x,y)).batch(128)       #将数据集按128个单位长度进行分割

In [22]:
train_iter = iter(train_db)     #转化为生成器
sample = next(train_iter)
sample[0].shape,sample[1].shape     #这里可以看到batch的作用是吧x,y数据集按128一个单位进行分割

(TensorShape([128, 28, 28]), TensorShape([128]))

### 权值设定


In [31]:
#[b,784]=> [b,256] => [b,128] =>[b,10]
#那么，w和b的矩阵大小需要满足矩阵相乘的规律
#tf.Variable 类型的数据可以支持自动求导
w1 = tf.Variable(tf.random.truncated_normal([784,256],stddev=0.1))       #重要！！默认方差为1，这里我们给一个比较小的数，效果会比较好
b1 = tf.Variable(tf.zeros([256]))
w2 = tf.Variable(tf.random.truncated_normal([256,128],stddev=0.1))
b2 = tf.Variable(tf.zeros([128]))
w3 = tf.Variable(tf.random.truncated_normal([128,10],stddev=0.1))
b3 = tf.Variable(tf.zeros([10]))

In [33]:
lr = 1e-3

for epoc in range(10):    #设置重复次数10次，可以修改此参数进行对比，每一次都是完整的数据集迭代

    #对数据集进行迭代
    for step,(x,y) in enumerate(train_db):         #step代表循环次数，（x,y）每次从train_db中迭代出一对值
        #x：[128,28,28]
        #y:[128]
        x = tf.reshape(x,[-1,28*28])
        #print(x.shape)

        with tf.GradientTape() as tape:

            #x:[b,28*28]
            #h1 = x@w1 + b1
            #[b,784]@[784,256] + [256] = [b,256]
            h1 = x@w1 + b1
            h1 = tf.nn.relu(h1)   #非线性函数，去掉负数

            #[b,256]@[256,128]+[128] = [b,128]
            h2 = h1@w2 + b2
            h2 = tf.nn.relu(h2)

            #[b,128]@[128,10] = [b,10]
            out = h2@w3 + b3


            #compute loss
            y_onehot = tf.one_hot(y,depth=10)     #编码

            #mse = mean((y-out)^2)    计算方差
            loss = tf.square(y_onehot - out)

            #mean:scalar
            loss = tf.reduce_mean(loss)

        #梯度计算，对[w1,b1,w2,b2,w3,b3]进行求导
        grads = tape.gradient(loss,[w1,b1,w2,b2,w3,b3])

        #迭代参数，梯度下降法
        #w1 = w1 - lr * w1_grad

        '''
        应该是这样写的，但是tensorflow中会将更新后的w1等变量恢复为tensor数据类型，后面又会报错
        w1 = w1-lr*grads[0]
        b1 = b1-lr*grads[1]
        w2 = w2-lr*grads[2]
        b2 = b2-lr*grads[3]
        w3 = w3-lr*grads[4]
        b3 = b3-lr*grads[5]
        '''
        #取而代之是这样的写法：
        w1.assign_sub(lr*grads[0])
        b1.assign_sub(lr*grads[1])
        w2.assign_sub(lr*grads[2])
        b2.assign_sub(lr*grads[3])
        w3.assign_sub(lr*grads[4])
        b3.assign_sub(lr*grads[5])



        if step%100 == 0:       
            print(epoc,step,'loss:',float(loss))         #每100步打印一下loss

0 loss: 0.14193741977214813
100 loss: 0.1337687224149704
200 loss: 0.1409338265657425
300 loss: 0.12876813113689423
400 loss: 0.14227911829948425
0 loss: 0.12532471120357513
100 loss: 0.11962936073541641
200 loss: 0.12632541358470917
300 loss: 0.11668512970209122
400 loss: 0.1281130313873291
0 loss: 0.11361910402774811
100 loss: 0.10970083624124527
200 loss: 0.11579221487045288
300 loss: 0.10784342139959335
400 loss: 0.11777029186487198
0 loss: 0.10497782379388809
100 loss: 0.10239370167255402
200 loss: 0.10784158855676651
300 loss: 0.10108939558267593
400 loss: 0.10994835942983627
0 loss: 0.09825114905834198
100 loss: 0.09680360555648804
200 loss: 0.10157676041126251
300 loss: 0.09570697695016861
400 loss: 0.10373721271753311
0 loss: 0.09273962676525116
100 loss: 0.09238149970769882
200 loss: 0.0964723452925682
300 loss: 0.09128493070602417
400 loss: 0.09863805770874023
0 loss: 0.08822038024663925
100 loss: 0.0887659564614296
200 loss: 0.09217873960733414
300 loss: 0.08761563897132874