In [2]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets

In [3]:
import os

#设置log信息等级，2代表只打印重要信息
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'     

### 准备数据

In [5]:
#加载mnist数据集，如果本地没有数据缓存，则会从网络中下载数据
# x: [60k,28,28]
# y: [60k]
(x,y),_ = datasets.mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [6]:
#将数据集转化为tensor格式
x = tf.convert_to_tensor(x,dtype=tf.float32)/255.      #/255.是将数据转化为0-1.之间
y = tf.convert_to_tensor(y,dtype=tf.int32)

In [8]:
x.shape, y.shape, x.dtype, y.dtype

(TensorShape([60000, 28, 28]), TensorShape([60000]), tf.float32, tf.int32)

In [9]:
#查看x数据集中的最大值与最小值
tf.reduce_max(x),tf.reduce_min(x)

(<tf.Tensor: shape=(), dtype=float32, numpy=1.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=0.0>)

In [10]:
tf.reduce_max(y),tf.reduce_min(y)

(<tf.Tensor: shape=(), dtype=int32, numpy=9>,
 <tf.Tensor: shape=(), dtype=int32, numpy=0>)

### 创建数据集

In [11]:
train_db = tf.data.Dataset.from_tensor_slices((x,y)).batch(128)       #将数据集按128个单位长度进行分割

In [12]:
train_iter = iter(train_db)     #转化为生成器
sample = next(train_iter)
sample[0].shape,sample[1].shape     #这里可以看到batch的作用是吧x,y数据集按128一个单位进行分割

(TensorShape([128, 28, 28]), TensorShape([128]))

In [15]:
#[b,784]=> [b,256] => [b,128] =>[b,10]
#那么，w和b的矩阵大小需要满足矩阵相乘的规律
w1 = tf.Variable(tf.random.truncated_normal([784,256]))
b1 = tf.Variable(tf.zeros([256]))
w2 = tf.Variable(tf.random.truncated_normal([256,128]))
b2 = tf.Variable(tf.zeros([128]))
w3 = tf.Variable(tf.random.truncated_normal([128,10]))
b3 = tf.Variable(tf.zeros([10]))

In [16]:
#进行迭代
for (x,y) in train_db:
    #x：[128,28,28]
    #y:[128]
    x = tf.reshape(x,[-1,28*28])
    
    with tf.GradientTape() as tape:

        #x:[b,28*28]
        #h1 = x@w1 + b1
        #[b,784]@[784,256] + [256] = [b,256]
        h1 = x@w1 + b1
        h1 = tf.nn.relu(h1)   #非线性函数，去掉负数

        #[b,256]@[256,128]+[128] = [b,128]
        h2 = h1@w2 + b2
        h2 = tf.nn.relu(h2)

        #[b,128]@[128,10] = [b,10]
        out = h2@w3 + b3


        #compute loss
        y_onehot = tf.one_hot(y,depth=10)     #编码

        #mse = mean((y-out)^2)    计算方差
        loss = tf.square(y_onehot - out)

        #mean:scalar
        loss = tf.reduce_mean(loss)
    grads = tape.gradient(loss,[w1,b1,w2,b2,w3,b3])
    #w1 = w1 - lr * w1_grad
    w1 = w1-lr
    

NameError: name 'lr' is not defined