# 平均損失函數
reduce_mean(loss) 可以讓每一個batch訓練後output出當前batch的loss 

In [2]:
import numpy as np
import tensorflow as tf
tf.compat.v1.reset_default_graph()
class MNISTLoader():
    def __init__(self):
        mnist = tf.keras.datasets.mnist
        (self.train_data, self.train_label), (self.test_data, self.test_label) = mnist.load_data()

        self.train_data = np.expand_dims(self.train_data.astype(np.float32) / 255.0, axis=-1)      # [60000, 28, 28, 1]
        self.test_data = np.expand_dims(self.test_data.astype(np.float32) / 255.0, axis=-1)        # [10000, 28, 28, 1]
        self.train_label = self.train_label.astype(np.int32)    # [60000]
        self.test_label = self.test_label.astype(np.int32)      # [10000]
        self.num_train_data, self.num_test_data = self.train_data.shape[0], self.test_data.shape[0]

    def get_batch(self, batch_size):

        index = np.random.randint(0, self.num_train_data, batch_size)
        return self.train_data[index, :], self.train_label[index]
    
class MLP(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.flatten = tf.keras.layers.Flatten()    
        self.dense1 = tf.keras.layers.Dense(units=100, activation=tf.nn.relu)
        self.dense2 = tf.keras.layers.Dense(units=10)

    def call(self, inputs):         # [batch_size, 28, 28, 1]
        x = self.flatten(inputs)    # [batch_size, 784]
        x = self.dense1(x)          # [batch_size, 100]
        x = self.dense2(x)          # [batch_size, 10]
        output = tf.nn.softmax(x)
        return output

num_epochs = 5
batch_size = 5
learning_rate = 0.001

model = MLP()
data_loader = MNISTLoader()

num_batches = int(data_loader.num_train_data // batch_size * num_epochs)

X, y = data_loader.get_batch(batch_size)
with tf.GradientTape() as tape:
    y_pred = model(X)
    loss = tf.keras.losses.sparse_categorical_crossentropy(y_true=y, y_pred=y_pred)
    # 計算當前 batch loss
    loss = tf.reduce_mean(loss)
    print(loss)

tf.Tensor(2.621033, shape=(), dtype=float32)


# 優化神經網路(TensorFlow版本)

In [11]:
import tensorflow as tf
import numpy as np
class MNISTLoader():
    def __init__(self):
        mnist = tf.keras.datasets.mnist
        (self.train_data, self.train_label), (self.test_data, self.test_label) = mnist.load_data()

        self.train_data = np.expand_dims(self.train_data.astype(np.float32) / 255.0, axis=-1)      # [60000, 28, 28, 1]
        self.test_data = np.expand_dims(self.test_data.astype(np.float32) / 255.0, axis=-1)        # [10000, 28, 28, 1]
        self.train_label = self.train_label.astype(np.int32)    # [60000]
        self.test_label = self.test_label.astype(np.int32)      # [10000]
        self.num_train_data, self.num_test_data = self.train_data.shape[0], self.test_data.shape[0]

    def get_batch(self, batch_size):

        index = np.random.randint(0, self.num_train_data, batch_size)
        return self.train_data[index, :], self.train_label[index]
    
class MLP(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.flatten = tf.keras.layers.Flatten()    
        self.dense1 = tf.keras.layers.Dense(units=100, activation=tf.nn.relu)
        self.dense2 = tf.keras.layers.Dense(units=10)

    def call(self, inputs):         # [batch_size, 28, 28, 1]
        x = self.flatten(inputs)    # [batch_size, 784]
        x = self.dense1(x)          # [batch_size, 100]
        x = self.dense2(x)          # [batch_size, 10]
        output = tf.nn.softmax(x)
        return output

num_epochs = 5
batch_size = 50
learning_rate = 0.001

model = MLP()
data_loader = MNISTLoader()
#optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
#optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate)
optimizer = tf.keras.optimizers.RMSprop(learning_rate=learning_rate)

# num_batches = int(data_loader.num_train_data // batch_size * num_epochs)
num_batches = int(np.ceil(data_loader.num_train_data // batch_size)) * num_epochs
print(model.variables)  # []
for batch_index in range(num_batches):
    X, y = data_loader.get_batch(batch_size)
    with tf.GradientTape() as tape:
        y_pred = model(X)
        loss = tf.keras.losses.sparse_categorical_crossentropy(y_true=y, y_pred=y_pred)
        loss = tf.reduce_mean(loss)
        print("batch %d: loss %f" % (batch_index, loss.numpy()))
    grads = tape.gradient(loss, model.variables)
    optimizer.apply_gradients(grads_and_vars=zip(grads, model.variables))


[]
batch 0: loss 2.320936
batch 1: loss 2.040167
batch 2: loss 2.087241
batch 3: loss 1.757956
batch 4: loss 1.886571
batch 5: loss 1.735484
batch 6: loss 1.527186
batch 7: loss 1.568506
batch 8: loss 1.441880
batch 9: loss 1.411643
batch 10: loss 1.360865
batch 11: loss 1.219160
batch 12: loss 1.093266
batch 13: loss 1.200287
batch 14: loss 1.055199
batch 15: loss 0.987168
batch 16: loss 1.034901
batch 17: loss 0.962319
batch 18: loss 0.913380
batch 19: loss 0.955973
batch 20: loss 0.895367
batch 21: loss 0.938127
batch 22: loss 0.855748
batch 23: loss 0.908024
batch 24: loss 0.766844
batch 25: loss 0.959602
batch 26: loss 0.920797
batch 27: loss 0.659319
batch 28: loss 0.790139
batch 29: loss 0.776857
batch 30: loss 0.819875
batch 31: loss 0.811049
batch 32: loss 0.860959
batch 33: loss 0.794132
batch 34: loss 0.797881
batch 35: loss 0.787934
batch 36: loss 0.627918
batch 37: loss 0.497544
batch 38: loss 0.550038
batch 39: loss 0.708925
batch 40: loss 0.681207
batch 41: loss 0.671071