In [1]:
import tensorflow as tf
import numpy as np

* 學習於[https://tf.wiki/zh_hans/basic/models.html](https://tf.wiki/zh_hans/basic/models.html)
## Multilayer Perceptron(MLP)
* 又可稱多層全連接神經網路
* 步驟
    1. 使用 **tf.keras.dataset** 獲得dataset並預處理
    2. 使用 **tf.keras.Model** 和 **tf.keras,layers** 建構模型
    3. 使用 **tf.keras.losses** 計算loss function
    4. 使用 **tf.keras.optimizer** 優化模型
    5. 使用 **tf.keras.metrics** 計算評估指標

### dataset與pre-process
* 做一個簡單的 MNISTLoader class 來讀取 MNIST dataset
<img src="https://tf.wiki/_images/mnist_0-9.png">
* 分別載入60000和10000張大小為28x28的手寫數字圖片

In [2]:
class MNISTLoader():
    def __init__(self):
        mnist = tf.keras.datasets.mnist
        (self.train_data, self.train_label), (self.test_data, self.test_label) = mnist.load_data()
        # train_data.shape = (60000, 28, 28): 60000個28x28的input
        # 將像素都坐正規化，並且增加一個維度存放channel數量(1,因為為灰階)
        # 用astype()轉型
        self.train_data = np.expand_dims(self.train_data.astype(np.float32)/255.0, axis=-1) #[60000,28,28,1]
        self.test_data = np.expand_dims(self.test_data.astype(np.float32)/255.0, axis=-1) #[10000,28,28,1]
        self.train_label = self.train_label.astype(np.int32) #[60000]
        self.test_label = self.test_label.astype(np.int32) #[10000]
        self.num_train_data, self.num_test_data = self.train_data.shape[0], self.test_data.shape[0]
        
    def get_batch(self, batch_size):
        # 從資料集中隨機存取batch_size個元素return
        # index為存放的要挑選的index(一維陣列)
        index = np.random.randint(0, self.num_train_data, batch_size)
        return self.train_data[index, :], self.train_label[index]

### 模型構建
* 使用 **tf.keras.Model** 和 **tf.keras.layers**
* 該model輸入為一個向量(拉直的1x784)手寫數字圖片
* 輸出10維的向量，分別代表0到9的機率

#### Softmax
* 我們希望輸出為每個 output label 的機率 => 一個10維離散機率
    * 該向量中的每個元素均在 **[0,1]** 之間
    * 該向量的所有元素之和為 **1**
    <img src="img/softmax.png">

In [3]:
class MLP(tf.keras.Model):
    def __init__(self):
        super().__init__()
        # Flatten層將除了第一維(batch_size)以外的維度攤平
        self.flatten = tf.keras.layers.Flatten()
        self.dense1 = tf.keras.layers.Dense(units=100, activation=tf.nn.relu)
        self.dense2 = tf.keras.layers.Dense(units=10)
    
    def call(self, inputs):      # [batch_size, 28, 28, 1]
        x = self.flatten(inputs) # [batch_size, 784]
        x = self.dense1(x)       # [batch_size, 100]
        x = self.dense2(x)       # [batch_size, 8]
        output = tf.nn.softmax(x)
        return output

<img src="https://tf.wiki/_images/mlp.png">

### 模型的訓練 

In [4]:
# 定義一些模型的 hyper paremeter
num_epochs = 5
batch_size = 50
learning_rate = 0.001

### 優化器選擇
參考[網站](https://medium.com/雞雞與兔兔的工程世界/機器學習ml-note-sgd-momentum-adagrad-adam-optimizer-f20568c968db)
* SGD-準確率梯度下降法 (stochastic gradient decent)
    * <img src="https://miro.medium.com/v2/resize:fit:1100/format:webp/1*C8PAvTAXukHZ2mZPItwIrg.png">
* Momentum
    * 此優化器為模擬物理動量的概念，在同方向的維度上學習速度會變快，方向改變的時候學習速度會變慢
    * <img src="https://miro.medium.com/v2/resize:fit:1100/format:webp/1*tbRXwsHwo9WPx5xTcZDX_A.png">
    * **Vt** 可以想像成 **方向速度**，會與上一次的更新有關，如果上一次的梯度與這次同方向的話，|Vt|(速度)會越來越大(表示梯度增強)，w參數的更新梯度便會越來越快
    * **$\beta$** 可以想像為空氣阻力或是地面摩擦力，通常設為0.9
* AdaGrad(Adaptive)
    * learning rate η 對優化器非常重要，太小會花費太多時間學習，太大可能會overfitting，無法正確學習
    * AdaGrad會依照梯度去調整 η
    * <img src="https://miro.medium.com/v2/resize:fit:1100/format:webp/1*203QdRb0FaCFNfIZayfUsw.png">
        * 其中 η 乘上 1/√(n+ϵ) 再做參數更新
        * n為前面所有梯度值得平方和
        * ϵ 為平滑值，加上 ϵ 的原因是為了不讓分母為0，ϵ 一般值為1e-8
    * 特性
        1. 前期梯度較小的時候，n較小，能夠放大學習率
        2. 後期梯度較大的時候，n較大，能夠約束學習率
* Adam
    * Momentum 跟 AdaGrad這二種Optimizer做結合，為目前較常使用的Optimizer
    * <img src="https://miro.medium.com/v2/resize:fit:1100/format:webp/1*HIrsnzAhkYsm1wCI6gUaug.png">
        * 像Momentum一樣保持了過去梯度的指數衰減平均值，像AdaGrad一樣存了過去梯度的平方衰減平均值
    * <img src="https://miro.medium.com/v2/resize:fit:1100/format:webp/1*4b7GUhHlzoqum1U4PCTFWw.png">
        * 對mt跟vt做偏離校正
    * <img src="https://miro.medium.com/v2/resize:fit:1100/format:webp/1*uhyuvY_h-NFYU_hmKwfaFQ.png">
        * Adam 保留了 Momentum 對過去梯度的方向做梯度速度調整與AdaGrad對過去梯度的平方值做learning rate的調整，再加上Adam有做參數的”偏離校正”，使得每一次的學習率都會有個確定的範圍，會讓參數的更新較為平穩 
        
### 總整理[參考網站](https://medium.com/雞雞與兔兔的工程世界/機器學習ml-note-sgd-momentum-adagrad-adam-optimizer-f20568c968db)
<img src="https://miro.medium.com/v2/resize:fit:1400/format:webp/1*U0IT4yJReyPibVI43j38cQ.png">
<img src="https://miro.medium.com/v2/resize:fit:1240/1*SjtKOauOXFVjWRR7iCtHiA.gif">

In [5]:
model = MLP()
data_loader = MNISTLoader()
# 選擇Adam作為優化器
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

Metal device set to: Apple M1


2023-03-26 15:21:29.498420: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-03-26 15:21:29.498783: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [6]:
# 從data loader中隨機取一批訓練資料
# num_batches為batches的數量
num_batches = int(data_loader.num_train_data // batch_size * num_epochs)
# Epoch這是指當所有資料都被用來訓練類神經網路一次
for batch_index in range(num_batches):
    X, y = data_loader.get_batch(batch_size)
    with tf.GradientTape() as tape:
        y_pred = model(X)
        loss = tf.keras.losses.sparse_categorical_crossentropy(y_true=y, y_pred=y_pred)
        loss = tf.reduce_mean(loss)
        print(f"batch {batch_index}: loss {loss.numpy()}")
    grads = tape.gradient(loss, model.variables)
    optimizer.apply_gradients(grads_and_vars=zip(grads, model.variables))

batch 0: loss 2.3370795249938965
batch 1: loss 2.287160873413086
batch 2: loss 2.181264638900757
batch 3: loss 2.0944173336029053
batch 4: loss 1.989709734916687
batch 5: loss 1.9161925315856934
batch 6: loss 1.966715931892395
batch 7: loss 1.8699395656585693
batch 8: loss 1.7662453651428223
batch 9: loss 1.6430126428604126
batch 10: loss 1.5790722370147705
batch 11: loss 1.697829246520996
batch 12: loss 1.4866368770599365
batch 13: loss 1.37369966506958
batch 14: loss 1.3470656871795654
batch 15: loss 1.2022373676300049
batch 16: loss 1.3854176998138428
batch 17: loss 1.2590304613113403
batch 18: loss 1.443745493888855
batch 19: loss 1.2901467084884644
batch 20: loss 1.1727484464645386
batch 21: loss 1.3447316884994507
batch 22: loss 1.1882752180099487
batch 23: loss 1.1394003629684448
batch 24: loss 0.9043923020362854
batch 25: loss 1.1118465662002563
batch 26: loss 1.2136934995651245
batch 27: loss 0.8705396056175232
batch 28: loss 0.8899882435798645
batch 29: loss 0.970076441764831

batch 252: loss 0.21521815657615662
batch 253: loss 0.3988133668899536
batch 254: loss 0.21556247770786285
batch 255: loss 0.2677079141139984
batch 256: loss 0.34553390741348267
batch 257: loss 0.16768860816955566
batch 258: loss 0.5390146374702454
batch 259: loss 0.26769259572029114
batch 260: loss 0.3008610010147095
batch 261: loss 0.21756117045879364
batch 262: loss 0.452384889125824
batch 263: loss 0.3166040778160095
batch 264: loss 0.14949947595596313
batch 265: loss 0.3611223101615906
batch 266: loss 0.29933077096939087
batch 267: loss 0.19630639255046844
batch 268: loss 0.5219059586524963
batch 269: loss 0.49085530638694763
batch 270: loss 0.21292737126350403
batch 271: loss 0.30116796493530273
batch 272: loss 0.35086944699287415
batch 273: loss 0.2902015149593353
batch 274: loss 0.2660571038722992
batch 275: loss 0.23285678029060364
batch 276: loss 0.4210340678691864
batch 277: loss 0.28089237213134766
batch 278: loss 0.2126895636320114
batch 279: loss 0.2748216688632965
batch 

batch 499: loss 0.20532234013080597
batch 500: loss 0.7155076265335083
batch 501: loss 0.16050133109092712
batch 502: loss 0.10372867435216904
batch 503: loss 0.4076671600341797
batch 504: loss 0.09948620945215225
batch 505: loss 0.4464510679244995
batch 506: loss 0.17921182513237
batch 507: loss 0.14739641547203064
batch 508: loss 0.29852885007858276
batch 509: loss 0.4577837884426117
batch 510: loss 0.21631889045238495
batch 511: loss 0.3443073332309723
batch 512: loss 0.2657000422477722
batch 513: loss 0.17890074849128723
batch 514: loss 0.2537586987018585
batch 515: loss 0.1849393993616104
batch 516: loss 0.3671322464942932
batch 517: loss 0.3183537721633911
batch 518: loss 0.19231656193733215
batch 519: loss 0.26052072644233704
batch 520: loss 0.2900692820549011
batch 521: loss 0.2156733125448227
batch 522: loss 0.18374092876911163
batch 523: loss 0.31591278314590454
batch 524: loss 0.28520914912223816
batch 525: loss 0.16423122584819794
batch 526: loss 0.38088446855545044
batch 5

batch 758: loss 0.2842966616153717
batch 759: loss 0.20970191061496735
batch 760: loss 0.32217949628829956
batch 761: loss 0.15411075949668884
batch 762: loss 0.102567158639431
batch 763: loss 0.23558032512664795
batch 764: loss 0.10985280573368073
batch 765: loss 0.21874557435512543
batch 766: loss 0.11989489942789078
batch 767: loss 0.4594474732875824
batch 768: loss 0.22298561036586761
batch 769: loss 0.16461536288261414
batch 770: loss 0.2969748377799988
batch 771: loss 0.22994393110275269
batch 772: loss 0.0861022099852562
batch 773: loss 0.19118236005306244
batch 774: loss 0.11846795678138733
batch 775: loss 0.3656146824359894
batch 776: loss 0.1644221395254135
batch 777: loss 0.2079099863767624
batch 778: loss 0.07584474980831146
batch 779: loss 0.263822466135025
batch 780: loss 0.16324229538440704
batch 781: loss 0.03529270738363266
batch 782: loss 0.242379829287529
batch 783: loss 0.2420186996459961
batch 784: loss 0.11078204959630966
batch 785: loss 0.13609179854393005
batch 

batch 1013: loss 0.09135940670967102
batch 1014: loss 0.3697722554206848
batch 1015: loss 0.2732320725917816
batch 1016: loss 0.12711597979068756
batch 1017: loss 0.1084197610616684
batch 1018: loss 0.09602268040180206
batch 1019: loss 0.17100539803504944
batch 1020: loss 0.35996997356414795
batch 1021: loss 0.17367111146450043
batch 1022: loss 0.18135829269886017
batch 1023: loss 0.20467212796211243
batch 1024: loss 0.33018797636032104
batch 1025: loss 0.24753817915916443
batch 1026: loss 0.14725607633590698
batch 1027: loss 0.15417304635047913
batch 1028: loss 0.15562553703784943
batch 1029: loss 0.09998609125614166
batch 1030: loss 0.2256435751914978
batch 1031: loss 0.14071819186210632
batch 1032: loss 0.1354774683713913
batch 1033: loss 0.06417471915483475
batch 1034: loss 0.12619896233081818
batch 1035: loss 0.0912032276391983
batch 1036: loss 0.14519207179546356
batch 1037: loss 0.1051141545176506
batch 1038: loss 0.14408215880393982
batch 1039: loss 0.20723624527454376
batch 10

batch 1237: loss 0.1845332682132721
batch 1238: loss 0.0708836242556572
batch 1239: loss 0.17174415290355682
batch 1240: loss 0.10859961062669754
batch 1241: loss 0.15791679918766022
batch 1242: loss 0.14291143417358398
batch 1243: loss 0.1588432341814041
batch 1244: loss 0.0917372852563858
batch 1245: loss 0.14101609587669373
batch 1246: loss 0.17643806338310242
batch 1247: loss 0.07502073049545288
batch 1248: loss 0.12737612426280975
batch 1249: loss 0.1411573588848114
batch 1250: loss 0.1416904181241989
batch 1251: loss 0.20402154326438904
batch 1252: loss 0.15770013630390167
batch 1253: loss 0.06572642177343369
batch 1254: loss 0.17692221701145172
batch 1255: loss 0.10943473875522614
batch 1256: loss 0.28926903009414673
batch 1257: loss 0.23518437147140503
batch 1258: loss 0.25475364923477173
batch 1259: loss 0.21900241076946259
batch 1260: loss 0.13188542425632477
batch 1261: loss 0.07896038144826889
batch 1262: loss 0.07511511445045471
batch 1263: loss 0.11658434569835663
batch 1

batch 1460: loss 0.11250843107700348
batch 1461: loss 0.1359679400920868
batch 1462: loss 0.04147541522979736
batch 1463: loss 0.12516947090625763
batch 1464: loss 0.09203146398067474
batch 1465: loss 0.272529274225235
batch 1466: loss 0.22713160514831543
batch 1467: loss 0.2667236626148224
batch 1468: loss 0.09318993985652924
batch 1469: loss 0.14418335258960724
batch 1470: loss 0.2796647548675537
batch 1471: loss 0.06107597053050995
batch 1472: loss 0.11652668565511703
batch 1473: loss 0.023209985345602036
batch 1474: loss 0.11904828995466232
batch 1475: loss 0.08330963551998138
batch 1476: loss 0.18624398112297058
batch 1477: loss 0.3559463322162628
batch 1478: loss 0.10126855969429016
batch 1479: loss 0.17819301784038544
batch 1480: loss 0.11537052690982819
batch 1481: loss 0.10705722868442535
batch 1482: loss 0.26025301218032837
batch 1483: loss 0.1603609174489975
batch 1484: loss 0.12107696384191513
batch 1485: loss 0.14630916714668274
batch 1486: loss 0.13985595107078552
batch 1

batch 1689: loss 0.24453970789909363
batch 1690: loss 0.061763275414705276
batch 1691: loss 0.1402856558561325
batch 1692: loss 0.12473845481872559
batch 1693: loss 0.1123683974146843
batch 1694: loss 0.06556466221809387
batch 1695: loss 0.03398787975311279
batch 1696: loss 0.07374604791402817
batch 1697: loss 0.19704818725585938
batch 1698: loss 0.06719808280467987
batch 1699: loss 0.23389673233032227
batch 1700: loss 0.1316869556903839
batch 1701: loss 0.13301661610603333
batch 1702: loss 0.10859702527523041
batch 1703: loss 0.12210693210363388
batch 1704: loss 0.09010181576013565
batch 1705: loss 0.18248602747917175
batch 1706: loss 0.10198447108268738
batch 1707: loss 0.15067318081855774
batch 1708: loss 0.19065529108047485
batch 1709: loss 0.14493770897388458
batch 1710: loss 0.06850707530975342
batch 1711: loss 0.14341071248054504
batch 1712: loss 0.11175914853811264
batch 1713: loss 0.2506709098815918
batch 1714: loss 0.285860538482666
batch 1715: loss 0.16459758579730988
batch 

batch 1920: loss 0.17183120548725128
batch 1921: loss 0.1681501418352127
batch 1922: loss 0.05663676559925079
batch 1923: loss 0.16035594046115875
batch 1924: loss 0.4885237514972687
batch 1925: loss 0.12990112602710724
batch 1926: loss 0.13258342444896698
batch 1927: loss 0.04881872236728668
batch 1928: loss 0.0792296752333641
batch 1929: loss 0.07158064842224121
batch 1930: loss 0.1058226004242897
batch 1931: loss 0.07096090912818909
batch 1932: loss 0.09959586709737778
batch 1933: loss 0.08520162105560303
batch 1934: loss 0.1902616024017334
batch 1935: loss 0.16445808112621307
batch 1936: loss 0.08739226311445236
batch 1937: loss 0.09865913540124893
batch 1938: loss 0.0482109934091568
batch 1939: loss 0.06018314138054848
batch 1940: loss 0.17738638818264008
batch 1941: loss 0.09836657345294952
batch 1942: loss 0.09307960420846939
batch 1943: loss 0.29306092858314514
batch 1944: loss 0.11887255311012268
batch 1945: loss 0.13067787885665894
batch 1946: loss 0.03833404555916786
batch 1

batch 2150: loss 0.08695849031209946
batch 2151: loss 0.048726387321949005
batch 2152: loss 0.04051174223423004
batch 2153: loss 0.22520889341831207
batch 2154: loss 0.16388240456581116
batch 2155: loss 0.209634929895401
batch 2156: loss 0.16753898561000824
batch 2157: loss 0.1888800412416458
batch 2158: loss 0.01915547251701355
batch 2159: loss 0.04040980339050293
batch 2160: loss 0.1477951556444168
batch 2161: loss 0.12125752866268158
batch 2162: loss 0.16309751570224762
batch 2163: loss 0.2761151194572449
batch 2164: loss 0.2464291751384735
batch 2165: loss 0.05743597820401192
batch 2166: loss 0.04847937449812889
batch 2167: loss 0.10369004309177399
batch 2168: loss 0.0452052503824234
batch 2169: loss 0.06158633530139923
batch 2170: loss 0.06989964097738266
batch 2171: loss 0.09505008161067963
batch 2172: loss 0.10088613629341125
batch 2173: loss 0.1709192991256714
batch 2174: loss 0.2089129239320755
batch 2175: loss 0.08643023669719696
batch 2176: loss 0.06460429728031158
batch 217

batch 2382: loss 0.10628524422645569
batch 2383: loss 0.04801355302333832
batch 2384: loss 0.1668861210346222
batch 2385: loss 0.08185670524835587
batch 2386: loss 0.0424811877310276
batch 2387: loss 0.25581756234169006
batch 2388: loss 0.1434057652950287
batch 2389: loss 0.045227374881505966
batch 2390: loss 0.10177922993898392
batch 2391: loss 0.05081059783697128
batch 2392: loss 0.13902625441551208
batch 2393: loss 0.10596532374620438
batch 2394: loss 0.08395815640687943
batch 2395: loss 0.16645972430706024
batch 2396: loss 0.1644289791584015
batch 2397: loss 0.035744957625865936
batch 2398: loss 0.12477460503578186
batch 2399: loss 0.024753794074058533
batch 2400: loss 0.04829861968755722
batch 2401: loss 0.0795716717839241
batch 2402: loss 0.17564429342746735
batch 2403: loss 0.04044977203011513
batch 2404: loss 0.11865818500518799
batch 2405: loss 0.07653027027845383
batch 2406: loss 0.027599701657891273
batch 2407: loss 0.06412193924188614
batch 2408: loss 0.23012280464172363
ba

batch 2611: loss 0.252103328704834
batch 2612: loss 0.13280850648880005
batch 2613: loss 0.280124694108963
batch 2614: loss 0.34361356496810913
batch 2615: loss 0.05957774072885513
batch 2616: loss 0.0685478001832962
batch 2617: loss 0.09308689832687378
batch 2618: loss 0.3005453646183014
batch 2619: loss 0.09761513769626617
batch 2620: loss 0.028384186327457428
batch 2621: loss 0.15378285944461823
batch 2622: loss 0.07252050191164017
batch 2623: loss 0.2097136676311493
batch 2624: loss 0.1667896956205368
batch 2625: loss 0.07373461127281189
batch 2626: loss 0.1428060382604599
batch 2627: loss 0.1720465123653412
batch 2628: loss 0.028928270563483238
batch 2629: loss 0.35508471727371216
batch 2630: loss 0.20603495836257935
batch 2631: loss 0.033050570636987686
batch 2632: loss 0.07622829079627991
batch 2633: loss 0.06852160394191742
batch 2634: loss 0.0768405869603157
batch 2635: loss 0.14471812546253204
batch 2636: loss 0.13276250660419464
batch 2637: loss 0.081377774477005
batch 2638:

batch 2838: loss 0.08162789046764374
batch 2839: loss 0.07371589541435242
batch 2840: loss 0.0483550950884819
batch 2841: loss 0.05127594992518425
batch 2842: loss 0.05724584311246872
batch 2843: loss 0.05640621855854988
batch 2844: loss 0.023130564019083977
batch 2845: loss 0.048347778618335724
batch 2846: loss 0.09105836600065231
batch 2847: loss 0.05887335538864136
batch 2848: loss 0.051437053829431534
batch 2849: loss 0.07800912857055664
batch 2850: loss 0.1308281123638153
batch 2851: loss 0.03677067160606384
batch 2852: loss 0.06303553283214569
batch 2853: loss 0.1576356440782547
batch 2854: loss 0.05273560434579849
batch 2855: loss 0.11029969900846481
batch 2856: loss 0.12557971477508545
batch 2857: loss 0.1040748879313469
batch 2858: loss 0.09188485145568848
batch 2859: loss 0.046092696487903595
batch 2860: loss 0.051684558391571045
batch 2861: loss 0.12958365678787231
batch 2862: loss 0.14255504310131073
batch 2863: loss 0.12029280513525009
batch 2864: loss 0.040938131511211395

batch 3071: loss 0.028692658990621567
batch 3072: loss 0.029867030680179596
batch 3073: loss 0.06233566254377365
batch 3074: loss 0.06010990962386131
batch 3075: loss 0.07843604683876038
batch 3076: loss 0.23430082201957703
batch 3077: loss 0.14510735869407654
batch 3078: loss 0.01995847001671791
batch 3079: loss 0.04244048893451691
batch 3080: loss 0.07809233665466309
batch 3081: loss 0.09145224094390869
batch 3082: loss 0.06367561966180801
batch 3083: loss 0.12491054087877274
batch 3084: loss 0.11588021367788315
batch 3085: loss 0.1427973508834839
batch 3086: loss 0.05326111242175102
batch 3087: loss 0.11503597348928452
batch 3088: loss 0.03900478035211563
batch 3089: loss 0.077642522752285
batch 3090: loss 0.1189141497015953
batch 3091: loss 0.012078914791345596
batch 3092: loss 0.07074509561061859
batch 3093: loss 0.019341982901096344
batch 3094: loss 0.023076917976140976
batch 3095: loss 0.034651439636945724
batch 3096: loss 0.1412518322467804
batch 3097: loss 0.027991807088255882

batch 3303: loss 0.1721855252981186
batch 3304: loss 0.27314823865890503
batch 3305: loss 0.1020222157239914
batch 3306: loss 0.013486032374203205
batch 3307: loss 0.2241886705160141
batch 3308: loss 0.04375320300459862
batch 3309: loss 0.04613880068063736
batch 3310: loss 0.08755498379468918
batch 3311: loss 0.05024833604693413
batch 3312: loss 0.1210804432630539
batch 3313: loss 0.0444004200398922
batch 3314: loss 0.047660358250141144
batch 3315: loss 0.0679621547460556
batch 3316: loss 0.03569868206977844
batch 3317: loss 0.03385927155613899
batch 3318: loss 0.1328859180212021
batch 3319: loss 0.08034559339284897
batch 3320: loss 0.09201285988092422
batch 3321: loss 0.13011273741722107
batch 3322: loss 0.055712562054395676
batch 3323: loss 0.04406911879777908
batch 3324: loss 0.11436714231967926
batch 3325: loss 0.04697142541408539
batch 3326: loss 0.1537514477968216
batch 3327: loss 0.056993741542100906
batch 3328: loss 0.18270649015903473
batch 3329: loss 0.11109789460897446
batch

batch 3536: loss 0.09453058242797852
batch 3537: loss 0.0641007125377655
batch 3538: loss 0.027202052995562553
batch 3539: loss 0.07875315845012665
batch 3540: loss 0.12689760327339172
batch 3541: loss 0.1135612428188324
batch 3542: loss 0.01910015381872654
batch 3543: loss 0.07078294456005096
batch 3544: loss 0.033393606543540955
batch 3545: loss 0.143934428691864
batch 3546: loss 0.030366849154233932
batch 3547: loss 0.06180335953831673
batch 3548: loss 0.14781661331653595
batch 3549: loss 0.03899306058883667
batch 3550: loss 0.07014326006174088
batch 3551: loss 0.0766938254237175
batch 3552: loss 0.11689906567335129
batch 3553: loss 0.12726755440235138
batch 3554: loss 0.04892101138830185
batch 3555: loss 0.031354859471321106
batch 3556: loss 0.1495324820280075
batch 3557: loss 0.14865702390670776
batch 3558: loss 0.05133343115448952
batch 3559: loss 0.20719100534915924
batch 3560: loss 0.06184220314025879
batch 3561: loss 0.07600609958171844
batch 3562: loss 0.05306467041373253
bat

batch 3768: loss 0.2540540397167206
batch 3769: loss 0.15654203295707703
batch 3770: loss 0.05851902812719345
batch 3771: loss 0.13186438381671906
batch 3772: loss 0.14092646539211273
batch 3773: loss 0.06293380260467529
batch 3774: loss 0.022629400715231895
batch 3775: loss 0.07162503898143768
batch 3776: loss 0.06273949146270752
batch 3777: loss 0.0838392898440361
batch 3778: loss 0.04195934534072876
batch 3779: loss 0.007529278751462698
batch 3780: loss 0.08189790695905685
batch 3781: loss 0.025563962757587433
batch 3782: loss 0.13351964950561523
batch 3783: loss 0.057307250797748566
batch 3784: loss 0.12182866036891937
batch 3785: loss 0.09130673855543137
batch 3786: loss 0.02286677621304989
batch 3787: loss 0.04691353812813759
batch 3788: loss 0.18000978231430054
batch 3789: loss 0.03255210444331169
batch 3790: loss 0.05179348960518837
batch 3791: loss 0.030574802309274673
batch 3792: loss 0.08946559578180313
batch 3793: loss 0.13809223473072052
batch 3794: loss 0.1577856242656707

batch 3999: loss 0.009392740204930305
batch 4000: loss 0.044879309833049774
batch 4001: loss 0.10019142180681229
batch 4002: loss 0.07227958738803864
batch 4003: loss 0.06500329822301865
batch 4004: loss 0.10353606194257736
batch 4005: loss 0.11051268130540848
batch 4006: loss 0.12731248140335083
batch 4007: loss 0.06379308551549911
batch 4008: loss 0.019855299964547157
batch 4009: loss 0.1292591542005539
batch 4010: loss 0.0261918343603611
batch 4011: loss 0.027373850345611572
batch 4012: loss 0.03949876129627228
batch 4013: loss 0.22327342629432678
batch 4014: loss 0.04657222330570221
batch 4015: loss 0.05603257194161415
batch 4016: loss 0.040496814996004105
batch 4017: loss 0.03922063112258911
batch 4018: loss 0.016838369891047478
batch 4019: loss 0.10327206552028656
batch 4020: loss 0.04307350143790245
batch 4021: loss 0.03405178338289261
batch 4022: loss 0.07805048674345016
batch 4023: loss 0.20349754393100739
batch 4024: loss 0.02473738230764866
batch 4025: loss 0.095702804625034

batch 4230: loss 0.08226146548986435
batch 4231: loss 0.05692882463335991
batch 4232: loss 0.09493596106767654
batch 4233: loss 0.018580717965960503
batch 4234: loss 0.02123429998755455
batch 4235: loss 0.05818045139312744
batch 4236: loss 0.015826862305402756
batch 4237: loss 0.16572332382202148
batch 4238: loss 0.05125819146633148
batch 4239: loss 0.08622781932353973
batch 4240: loss 0.0357387475669384
batch 4241: loss 0.07343786209821701
batch 4242: loss 0.027255196124315262
batch 4243: loss 0.030307907611131668
batch 4244: loss 0.03991209343075752
batch 4245: loss 0.025164131075143814
batch 4246: loss 0.03787998482584953
batch 4247: loss 0.013577418401837349
batch 4248: loss 0.035869065672159195
batch 4249: loss 0.0551343634724617
batch 4250: loss 0.023580776527523994
batch 4251: loss 0.033320482820272446
batch 4252: loss 0.0322171151638031
batch 4253: loss 0.04151683300733566
batch 4254: loss 0.045548632740974426
batch 4255: loss 0.043570756912231445
batch 4256: loss 0.04425105825

batch 4455: loss 0.124223493039608
batch 4456: loss 0.0486871562898159
batch 4457: loss 0.1285775899887085
batch 4458: loss 0.015531294979155064
batch 4459: loss 0.10573092848062515
batch 4460: loss 0.02721850387752056
batch 4461: loss 0.0589543916285038
batch 4462: loss 0.04090629518032074
batch 4463: loss 0.023546865209937096
batch 4464: loss 0.03551863506436348
batch 4465: loss 0.04710167273879051
batch 4466: loss 0.03348633646965027
batch 4467: loss 0.07892189174890518
batch 4468: loss 0.03873438388109207
batch 4469: loss 0.02049337327480316
batch 4470: loss 0.12387368083000183
batch 4471: loss 0.02577192708849907
batch 4472: loss 0.05636851117014885
batch 4473: loss 0.021134668961167336
batch 4474: loss 0.009531998075544834
batch 4475: loss 0.09622514247894287
batch 4476: loss 0.13036225736141205
batch 4477: loss 0.12516525387763977
batch 4478: loss 0.07142367213964462
batch 4479: loss 0.03531532734632492
batch 4480: loss 0.008582143113017082
batch 4481: loss 0.061696041375398636


batch 4689: loss 0.22388692200183868
batch 4690: loss 0.09300503879785538
batch 4691: loss 0.14734739065170288
batch 4692: loss 0.0454648956656456
batch 4693: loss 0.143210768699646
batch 4694: loss 0.20717324316501617
batch 4695: loss 0.020057279616594315
batch 4696: loss 0.05945516377687454
batch 4697: loss 0.018213097006082535
batch 4698: loss 0.023616841062903404
batch 4699: loss 0.04981011152267456
batch 4700: loss 0.03202986344695091
batch 4701: loss 0.025759385898709297
batch 4702: loss 0.032208990305662155
batch 4703: loss 0.01735798828303814
batch 4704: loss 0.022728776559233665
batch 4705: loss 0.014387856237590313
batch 4706: loss 0.06026336923241615
batch 4707: loss 0.16928665339946747
batch 4708: loss 0.04953768104314804
batch 4709: loss 0.029455993324518204
batch 4710: loss 0.029889749363064766
batch 4711: loss 0.02845695987343788
batch 4712: loss 0.037795472890138626
batch 4713: loss 0.07532337307929993
batch 4714: loss 0.010022041387856007
batch 4715: loss 0.14652803540

batch 4919: loss 0.052892979234457016
batch 4920: loss 0.0937715619802475
batch 4921: loss 0.06274821609258652
batch 4922: loss 0.044390905648469925
batch 4923: loss 0.12508781254291534
batch 4924: loss 0.09197679162025452
batch 4925: loss 0.01023838296532631
batch 4926: loss 0.04252065718173981
batch 4927: loss 0.009032294154167175
batch 4928: loss 0.06254056841135025
batch 4929: loss 0.13622261583805084
batch 4930: loss 0.012982625514268875
batch 4931: loss 0.01126958429813385
batch 4932: loss 0.2361791580915451
batch 4933: loss 0.04636678472161293
batch 4934: loss 0.061300162225961685
batch 4935: loss 0.04163951426744461
batch 4936: loss 0.08000452071428299
batch 4937: loss 0.10513977706432343
batch 4938: loss 0.07086271792650223
batch 4939: loss 0.10618475824594498
batch 4940: loss 0.10804872959852219
batch 4941: loss 0.022961871698498726
batch 4942: loss 0.06637685745954514
batch 4943: loss 0.042823467403650284
batch 4944: loss 0.04062414914369583
batch 4945: loss 0.04805713519454

batch 5152: loss 0.023102696985006332
batch 5153: loss 0.04150742292404175
batch 5154: loss 0.12350206822156906
batch 5155: loss 0.038473162800073624
batch 5156: loss 0.2400617152452469
batch 5157: loss 0.05360627546906471
batch 5158: loss 0.012905625626444817
batch 5159: loss 0.03011118434369564
batch 5160: loss 0.12793321907520294
batch 5161: loss 0.06560973823070526
batch 5162: loss 0.07661076635122299
batch 5163: loss 0.03989051282405853
batch 5164: loss 0.009472118690609932
batch 5165: loss 0.07948201149702072
batch 5166: loss 0.014187612570822239
batch 5167: loss 0.008983428589999676
batch 5168: loss 0.0655910074710846
batch 5169: loss 0.06710302084684372
batch 5170: loss 0.20707343518733978
batch 5171: loss 0.011290322057902813
batch 5172: loss 0.10453291982412338
batch 5173: loss 0.08933732658624649
batch 5174: loss 0.1524049937725067
batch 5175: loss 0.01137480791658163
batch 5176: loss 0.026317190378904343
batch 5177: loss 0.007213954348117113
batch 5178: loss 0.0627487227320

batch 5388: loss 0.01735047809779644
batch 5389: loss 0.07532758265733719
batch 5390: loss 0.04700238257646561
batch 5391: loss 0.013811538927257061
batch 5392: loss 0.014669595286250114
batch 5393: loss 0.07054777443408966
batch 5394: loss 0.020277447998523712
batch 5395: loss 0.07851995527744293
batch 5396: loss 0.06875786930322647
batch 5397: loss 0.055692870169878006
batch 5398: loss 0.035018082708120346
batch 5399: loss 0.05971524119377136
batch 5400: loss 0.015367365442216396
batch 5401: loss 0.02884652465581894
batch 5402: loss 0.021510111168026924
batch 5403: loss 0.12548957765102386
batch 5404: loss 0.008313020691275597
batch 5405: loss 0.006594356149435043
batch 5406: loss 0.07213843613862991
batch 5407: loss 0.02876076102256775
batch 5408: loss 0.023489542305469513
batch 5409: loss 0.05345015972852707
batch 5410: loss 0.03022150881588459
batch 5411: loss 0.015230881981551647
batch 5412: loss 0.05406174436211586
batch 5413: loss 0.02229466661810875
batch 5414: loss 0.03478442

batch 5623: loss 0.01691661961376667
batch 5624: loss 0.015019950456917286
batch 5625: loss 0.02949177846312523
batch 5626: loss 0.09998094290494919
batch 5627: loss 0.03722020983695984
batch 5628: loss 0.03370256349444389
batch 5629: loss 0.09690870344638824
batch 5630: loss 0.040230028331279755
batch 5631: loss 0.006415148731321096
batch 5632: loss 0.055146872997283936
batch 5633: loss 0.05692228302359581
batch 5634: loss 0.01843654178082943
batch 5635: loss 0.11376971751451492
batch 5636: loss 0.015736715868115425
batch 5637: loss 0.012142731808125973
batch 5638: loss 0.013480628840625286
batch 5639: loss 0.058514565229415894
batch 5640: loss 0.06929200887680054
batch 5641: loss 0.14117345213890076
batch 5642: loss 0.020022470504045486
batch 5643: loss 0.022107014432549477
batch 5644: loss 0.026071475818753242
batch 5645: loss 0.08602296561002731
batch 5646: loss 0.028026530519127846
batch 5647: loss 0.17650628089904785
batch 5648: loss 0.09164627641439438
batch 5649: loss 0.0086803

batch 5859: loss 0.06635650247335434
batch 5860: loss 0.044807318598032
batch 5861: loss 0.031619034707546234
batch 5862: loss 0.04561344161629677
batch 5863: loss 0.025919174775481224
batch 5864: loss 0.0485365055501461
batch 5865: loss 0.05704955756664276
batch 5866: loss 0.04164555296301842
batch 5867: loss 0.015034801326692104
batch 5868: loss 0.0717722550034523
batch 5869: loss 0.1420058161020279
batch 5870: loss 0.008207676000893116
batch 5871: loss 0.0892280861735344
batch 5872: loss 0.03698015585541725
batch 5873: loss 0.04123660549521446
batch 5874: loss 0.02604474499821663
batch 5875: loss 0.01767832785844803
batch 5876: loss 0.0747755616903305
batch 5877: loss 0.06325137615203857
batch 5878: loss 0.016910558566451073
batch 5879: loss 0.009406445547938347
batch 5880: loss 0.04169231653213501
batch 5881: loss 0.09183676540851593
batch 5882: loss 0.0049897292628884315
batch 5883: loss 0.20171630382537842
batch 5884: loss 0.04530685022473335
batch 5885: loss 0.010482053272426128

## 模型的評估 
* 使用 **tf.keras.metrics** 中的 **SparseCategoricalAccuracy** 評估器來評估模型在測試集上的性能，該評估器能夠對模型預測的結果與真實結果進行比較，並輸出預測正確的樣本數佔總樣本數的比例。

In [9]:
sparse_catgorical_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()
num_batches = int(data_loader.num_test_data // batch_size)
for batch_index in range(num_batches):
    start_index, end_index = batch_index * batch_size, (batch_index + 1) * batch_size
    y_pred = model.predict(data_loader.test_data[start_index: end_index])
    sparse_catgorical_accuracy.update_state(y_true=data_loader.test_label[start_index: end_index], y_pred=y_pred)
print(f"test accuracy: {sparse_catgorical_accuracy.result()}")



test accuracy: 0.9725000262260437
