In [1]:
#創一個簡單的密集層類別
import tensorflow as tf

class NaiveDense:
    def __init__(self,input_size,output_size,activation):
        self.activation=activation
        
        w_shape=(input_size,output_size)
        w_initial_value=tf.random.uniform(w_shape,minval=0,
                                          maxval=1e-1) #將矩陣元素的初始值設定為亂數(介於0到0.1)
        self.W=tf.Variable(w_initial_value) #建立一個命名為W的Variable物件
        
        b_shape=(output_size,)
        b_initial_value=tf.zeros(b_shape) #將向量元素初始值設為0
        self.b=tf.Variable(b_initial_value) #建立一個命名為b的Variable物件

    def __call__(self,inputs): #建立正向傳播的方法(當把物件當成函式來呼叫時，就會自動呼叫此method)
        return self.activation(tf.matmul(inputs,self.W)+self.b)

    @property #將weight設為只能讀取，不能修改的屬性
    def weights(self):
        return [self.W,self.b]


In [2]:
#創一個簡單的序列式類別
class NaiveSequential:
    def __init__(self,layers):
        self.layers=layers

    def __call__(self,inputs):
        x=inputs
        for layer in self.layers: #將輸入的資料沿著各神經層傳遞
            x=layer(x)
        return x

    @property
    def weights(self):
        weights=[] #建立一個weights串列
        for layer in self.layers:
            weights += layer.weights #將各層的權重存到weights
        return weights

In [3]:
model=NaiveSequential([
    NaiveDense(input_size=28*28,output_size=512,activation=tf.nn.relu),
    NaiveDense(input_size=512,output_size=10,activation=tf.nn.softmax)
])

In [4]:
import math

class BatchGenerator:
    def __init__(self,images,labels,batch_size=128):
        assert len(images == len(labels))
        self.index=0
        self.images=images
        self.labels=labels
        self.batch_size=batch_size
        self.num_batches=math.ceil(len(images)/batch_size) #計算批次的個數，若結果為小數，則無條件進位

    def next(self): #讀取下一批次的輸入影像和對應標籤
        images=self.images[self.index : self.index + self.batch_size]
        labels=self.labels[self.index : self.index + self.batch_size]
        self.index += self.batch_size
        return images,labels

In [5]:
from keras import optimizers

optimizers=optimizers.SGD(learning_rate=1e-3) #使用SGD(隨機梯度)優化器

def update_weights(gradients,weights): #更新權重函式
    optimizers.apply_gradients(zip(gradients,weights))

In [6]:
def one_training_step(model,images_batch,labels_batch):
    with tf.GradientTape() as tape: #紀錄「正向傳播」的計算圖(在GradientTape區塊內計算出模型的預測結果)，並將其指派給tape
        predictions=model(images_batch) #將一批次的影像輸入模型
        per_sample_losses=tf.keras.losses.sparse_categorical_crossentropy( #計算每筆樣本的損失值
            labels_batch,predictions)
        average_loss=tf.reduce_mean(per_sample_losses)

    gradients=tape.gradient(average_loss,model.weights) #計算損失值相對於各權重參數的梯度
    update_weights(gradients,model.weights) #用梯度來更新模型，接著會定義函數
    return average_loss

In [7]:
def fit(model,images,labels,epochs,batch_size=128): #定義一個fit()訓練迴圈
    for epoch_counter in range(epochs): #一共會執行epochs次的訓練週期
        print(f"Epoch {epoch_counter}")
        batch_generator=BatchGenerator(images,labels) #建立產生小批次訓練資料的產生器
        for batch_counter in range(batch_generator.num_batches):
            images_batch,labels_batch=batch_generator.next() #取出最小訓練資料
            loss=one_training_step(model,images_batch,labels_batch)
            if batch_counter % 100 ==0:
                print(f"loss at batch {batch_counter}: {loss:.2f}")

In [8]:
from keras.datasets import mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

train_images = train_images.reshape((60000, 28 * 28))
train_images = train_images.astype("float32") / 255
test_images = test_images.reshape((10000, 28 * 28))
test_images = test_images.astype("float32") / 255

fit(model, train_images, train_labels, epochs=10, batch_size=128)

Epoch 0
loss at batch 0: 7.04
loss at batch 100: 2.25
loss at batch 200: 2.22
loss at batch 300: 2.12
loss at batch 400: 2.25
Epoch 1
loss at batch 0: 1.93
loss at batch 100: 1.89
loss at batch 200: 1.83
loss at batch 300: 1.74
loss at batch 400: 1.86
Epoch 2
loss at batch 0: 1.60
loss at batch 100: 1.58
loss at batch 200: 1.50
loss at batch 300: 1.44
loss at batch 400: 1.53
Epoch 3
loss at batch 0: 1.33
loss at batch 100: 1.34
loss at batch 200: 1.24
loss at batch 300: 1.22
loss at batch 400: 1.29
Epoch 4
loss at batch 0: 1.13
loss at batch 100: 1.16
loss at batch 200: 1.04
loss at batch 300: 1.06
loss at batch 400: 1.12
Epoch 5
loss at batch 0: 0.99
loss at batch 100: 1.02
loss at batch 200: 0.90
loss at batch 300: 0.94
loss at batch 400: 1.00
Epoch 6
loss at batch 0: 0.88
loss at batch 100: 0.91
loss at batch 200: 0.80
loss at batch 300: 0.84
loss at batch 400: 0.91
Epoch 7
loss at batch 0: 0.79
loss at batch 100: 0.83
loss at batch 200: 0.72
loss at batch 300: 0.77
loss at batch 40

In [9]:
import numpy as np

predictions=model(test_images)
predictions=predictions.numpy()

predicted_labels=np.argmax(predictions,axis=1)
matches=predicted_labels == test_labels
print(f"accuracy: {matches.mean():.2f}")

accuracy: 0.82
