# 平均損失函數
reduce_mean(loss) 可以讓每一個batch訓練後output出當前batch的loss 

In [2]:
import numpy as np
import tensorflow as tf
tf.compat.v1.reset_default_graph()
class MNISTLoader():
    def __init__(self):
        mnist = tf.keras.datasets.mnist
        (self.train_data, self.train_label), (self.test_data, self.test_label) = mnist.load_data()

        self.train_data = np.expand_dims(self.train_data.astype(np.float32) / 255.0, axis=-1)      # [60000, 28, 28, 1]
        self.test_data = np.expand_dims(self.test_data.astype(np.float32) / 255.0, axis=-1)        # [10000, 28, 28, 1]
        self.train_label = self.train_label.astype(np.int32)    # [60000]
        self.test_label = self.test_label.astype(np.int32)      # [10000]
        self.num_train_data, self.num_test_data = self.train_data.shape[0], self.test_data.shape[0]

    def get_batch(self, batch_size):

        index = np.random.randint(0, self.num_train_data, batch_size)
        return self.train_data[index, :], self.train_label[index]
    
class MLP(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.flatten = tf.keras.layers.Flatten()    
        self.dense1 = tf.keras.layers.Dense(units=100, activation=tf.nn.relu)
        self.dense2 = tf.keras.layers.Dense(units=10)

    def call(self, inputs):         # [batch_size, 28, 28, 1]
        x = self.flatten(inputs)    # [batch_size, 784]
        x = self.dense1(x)          # [batch_size, 100]
        x = self.dense2(x)          # [batch_size, 10]
        output = tf.nn.softmax(x)
        return output

num_epochs = 5
batch_size = 5
learning_rate = 0.001

model = MLP()
data_loader = MNISTLoader()

num_batches = int(data_loader.num_train_data // batch_size * num_epochs)

X, y = data_loader.get_batch(batch_size)
with tf.GradientTape() as tape:
    y_pred = model(X)
    loss = tf.keras.losses.sparse_categorical_crossentropy(y_true=y, y_pred=y_pred)
    # 計算當前 batch loss
    loss = tf.reduce_mean(loss)
    print(loss)

tf.Tensor(2.621033, shape=(), dtype=float32)


# 優化神經網路(keras版本)

In [6]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
tf.compat.v1.reset_default_graph()

(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
x_train = x_train.reshape([x_train.shape[0], -1])
x_test = x_test.reshape([x_test.shape[0], -1])
print(x_train.shape, ' ', y_train.shape)
print(x_test.shape, ' ', y_test.shape)

(60000, 784)   (60000,)
(10000, 784)   (10000,)


In [8]:
model = keras.Sequential([
    layers.Dense(64, activation='relu', input_shape=(784,)), # 784 * 64 + 64
    layers.Dense(64, activation='relu'), # 64*64 + 64
    layers.Dense(64, activation='relu'),
    layers.Dense(10, activation='softmax') #64*10 + 10
])


#keras.optimizers.Adagrad(learning_rate=0.01)
#keras.optimizers.Adam(learning_rate=0.01)
#keras.optimizers.RMSprop(learning_rate=0.01)

# provide labels as one_hot representation => tf.keras.losses.CategoricalCrossentropy
# provide labels as integers => tf.keras.losses.SparseCategoricalCrossentropy 
model.compile(optimizer=keras.optimizers.Adam(),
             loss=keras.losses.SparseCategoricalCrossentropy(),
             metrics=['accuracy'])
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 64)                50240     
_________________________________________________________________
dense_5 (Dense)              (None, 64)                4160      
_________________________________________________________________
dense_6 (Dense)              (None, 64)                4160      
_________________________________________________________________
dense_7 (Dense)              (None, 10)                650       
Total params: 59,210
Trainable params: 59,210
Non-trainable params: 0
_________________________________________________________________


In [9]:
#history = model.fit(x_train, y_train, batch_size=256, epochs=100, validation_split=0.3, verbose=2)
# verbose 代表呈現資訊詳細度 2最詳細 0最簡略 只可選擇 0/1/2來操作
history = model.fit(x_train, y_train, batch_size=256, epochs=100, validation_data=(x_test, y_test), verbose=2)


Epoch 1/100
235/235 - 0s - loss: 2.2645 - accuracy: 0.7448 - val_loss: 0.5907 - val_accuracy: 0.8683
Epoch 2/100
235/235 - 0s - loss: 0.4284 - accuracy: 0.8902 - val_loss: 0.3686 - val_accuracy: 0.9031
Epoch 3/100
235/235 - 0s - loss: 0.2889 - accuracy: 0.9209 - val_loss: 0.2874 - val_accuracy: 0.9224
Epoch 4/100
235/235 - 0s - loss: 0.2202 - accuracy: 0.9378 - val_loss: 0.2524 - val_accuracy: 0.9305
Epoch 5/100
235/235 - 0s - loss: 0.1808 - accuracy: 0.9466 - val_loss: 0.2347 - val_accuracy: 0.9372
Epoch 6/100
235/235 - 0s - loss: 0.1509 - accuracy: 0.9555 - val_loss: 0.2102 - val_accuracy: 0.9448
Epoch 7/100
235/235 - 0s - loss: 0.1315 - accuracy: 0.9610 - val_loss: 0.2035 - val_accuracy: 0.9449
Epoch 8/100
235/235 - 0s - loss: 0.1191 - accuracy: 0.9637 - val_loss: 0.2043 - val_accuracy: 0.9438
Epoch 9/100
235/235 - 0s - loss: 0.1064 - accuracy: 0.9675 - val_loss: 0.1997 - val_accuracy: 0.9498
Epoch 10/100
235/235 - 0s - loss: 0.1004 - accuracy: 0.9699 - val_loss: 0.1969 - val_accura