In [None]:
#!pip install tensorflow

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Activation, Dense
from tensorflow.keras.optimizers import Adam


In [2]:
# 載入 MNIST 手寫辨識資料
(X_train, y_train), (X_test, y_test_org) = mnist.load_data()

X_train = X_train.reshape(-1, 28, 28) / 255. #----->轉成3維並縮放      
X_test = X_test.reshape(-1, 28, 28) / 255.


# (Simple RNN建模)
#### 單一輸入，1層

In [3]:
model = Sequential() #----->必須是 3 dimension
model.add(SimpleRNN(units=256, input_shape=(28, 28))) #----->加 RNN 隱藏層(hidden layer)
model.add(Dense(units=10, activation='softmax')) #----->加 output 層


In [4]:
# 編譯: 選擇損失函數、優化方法及成效衡量方式
LR = 0.001 #----->Learning Rate
adam = Adam(LR)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) 

print(model.summary())


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   


 simple_rnn (SimpleRNN)      (None, 256)               72960     
                                                                 
 dense (Dense)               (None, 10)                2570      
                                                                 
Total params: 75530 (295.04 KB)
Trainable params: 75530 (295.04 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None


In [5]:
# 將 label 進行 one-hot encoding，例如數字 7 經過 One-hot encoding 轉換後是 0000001000，即第7個值為 1
y_TrainOneHot = to_categorical(y_train) 
y_TestOneHot = to_categorical(y_test_org) 

# 將 training 的 input 資料轉為2維
X_train_2D = X_train.reshape(60000, 28,28)
X_test_2D = X_test.reshape(10000, 28,28)  

x_Train_norm = X_train_2D/255
x_Test_norm = X_test_2D/255


In [6]:

# 進行訓練, 訓練過程會存在 train_history 變數中
train_history = model.fit(x=x_Train_norm, y=y_TrainOneHot, validation_split=0.2, 
                          batch_size=800, epochs=10, verbose=2)  


Epoch 1/10
60/60 - 7s - loss: 2.0521 - accuracy: 0.2671 - val_loss: 2.6472 - val_accuracy: 0.2048 - 7s/epoch - 119ms/step
Epoch 2/10
60/60 - 5s - loss: 1.9918 - accuracy: 0.3098 - val_loss: 1.4486 - val_accuracy: 0.5170 - 5s/epoch - 76ms/step
Epoch 3/10
60/60 - 5s - loss: 1.4221 - accuracy: 0.5298 - val_loss: 1.3019 - val_accuracy: 0.5690 - 5s/epoch - 77ms/step
Epoch 4/10
60/60 - 5s - loss: 1.2503 - accuracy: 0.5962 - val_loss: 1.0475 - val_accuracy: 0.6757 - 5s/epoch - 77ms/step
Epoch 5/10
60/60 - 5s - loss: 1.1135 - accuracy: 0.6434 - val_loss: 0.9677 - val_accuracy: 0.6931 - 5s/epoch - 75ms/step
Epoch 6/10
60/60 - 5s - loss: 1.0006 - accuracy: 0.6825 - val_loss: 0.8895 - val_accuracy: 0.7186 - 5s/epoch - 78ms/step
Epoch 7/10
60/60 - 5s - loss: 1.0994 - accuracy: 0.6486 - val_loss: 0.9101 - val_accuracy: 0.7192 - 5s/epoch - 79ms/step
Epoch 8/10
60/60 - 5s - loss: 0.8405 - accuracy: 0.7344 - val_loss: 0.9673 - val_accuracy: 0.6794 - 5s/epoch - 83ms/step
Epoch 9/10
60/60 - 5s - loss: 0

In [7]:

# 顯示訓練成果(分數)
loss, accuracy = model.evaluate(x_Test_norm, y_TestOneHot)  
print(f"test loss: {loss}  test accuracy: {accuracy}")
        
# 預測(prediction)
predictions = np.argmax(model.predict(x_Test_norm[0:20]), axis=-1)
# get prediction result
print('actual :', y_test_org[0:20])
print('predict:', predictions)


test loss: 0.6284801959991455  test accuracy: 0.7949000000953674
actual : [7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 3 4]
predict: [7 2 1 0 4 1 4 9 5 9 5 6 9 0 1 5 9 7 8 4]
