In [24]:
import numpy as np
import mnist
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

train_images = mnist.train_images()
train_labels = mnist.train_labels()
test_images = mnist.test_images()
test_labels = mnist.test_labels()

print(train_images.shape) # (60000, 28, 28)
print(train_labels.shape) # (60000,)

(60000, 28, 28)
(60000,)


In [25]:
###
seed = 1
x_train, x_val, y_train, y_val = train_test_split(train_images, train_labels, test_size=0.1, random_state=seed)
print(x_train.shape)
print(x_val.shape) 
print(y_train.shape)
print(y_val.shape) 

(54000, 28, 28)
(6000, 28, 28)
(54000,)
(6000,)


In [26]:
###
# Normalize the images.
x_train = (x_train / 255) - 0.5
x_val = (x_val / 255) - 0.5
test_images = (test_images / 255) - 0.5

# Flatten the images.
x_train = x_train.reshape((-1, 784))
x_val = x_val.reshape((-1, 784))
test_images = test_images.reshape((-1, 784))

print(x_train.shape) # (60000, 784)
print(x_val.shape)  # (6000, 784)
print(test_images.shape)  # (10000, 784)

(54000, 784)
(6000, 784)
(10000, 784)


In [3]:
# Normalize the images.
train_images = (train_images / 255) - 0.5
test_images = (test_images / 255) - 0.5

# Flatten the images.
train_images = train_images.reshape((-1, 784))
test_images = test_images.reshape((-1, 784))

print(train_images.shape) # (60000, 784)
print(test_images.shape)  # (10000, 784)

(60000, 784)
(10000, 784)


In [52]:
# 0.97 -> 0.94 / 2. layer 64 -> 10
model = Sequential([
  Dense(128, activation='relu', input_shape=(784,)),
  Dense(64, activation='relu'),
  Dense(32, activation='relu'),
  Dense(10, activation='softmax'),
])

In [53]:
model.compile(
  optimizer=Adam(learning_rate=0.001),
  loss='categorical_crossentropy',
  metrics=['accuracy'],
)

In [5]:
### Early Stopping
# monitor=accuracy, patience=8, min_delta=0.001 => l:0.2085 a:0.9609
# monitor=accuracy, patience=10, min_delta=0.001 => l:0.2298 a:0.9524 / l:0.2944 a:0.9583
# monitor=accuracy, patience=8 => l:0.3120 a:9551
# monitor=accuracy, patience=10 => l:0.2984 a:9592
# monitor=loss, patience=10 => l:0.3277 a:0.9567

In [54]:
early_stopping = EarlyStopping(
    monitor='val_loss', 
    patience=15, 
    mode='min'
)

In [63]:
# batch_size 32->64 / l:0.3277 a:0.9567 -> l:0.3574 a:0.9617
model.fit(
  x_train, # training data
  to_categorical(y_train), # training targets
  epochs=1000,
  batch_size=128,
  validation_data=(x_val, to_categorical(y_val)),
  callbacks=[early_stopping],
)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000


<keras.callbacks.History at 0x1c42fd03be0>

In [64]:
model.evaluate(
  test_images,
  to_categorical(test_labels)
)



[0.15139010548591614, 0.9769999980926514]

In [65]:
Y_pred = model.predict(test_images)
y_pred = np.argmax(Y_pred, axis=1)
print(accuracy_score(test_labels, y_pred))
print(Y_pred)
print(y_pred)

0.977
[[2.2499689e-16 1.5142906e-12 7.2200067e-12 ... 1.0000000e+00
  1.8186934e-14 1.4401128e-11]
 [1.1997017e-20 2.2926657e-16 1.0000000e+00 ... 1.4659589e-33
  9.8586136e-22 5.0594743e-28]
 [8.1572520e-11 9.9999964e-01 1.3747392e-07 ... 1.3222671e-08
  1.4930771e-10 1.3590295e-13]
 ...
 [2.3783608e-33 1.0795384e-24 8.5370152e-33 ... 2.4908306e-16
  3.5525488e-16 1.0097606e-13]
 [5.6103559e-13 9.9541837e-22 4.5625413e-22 ... 2.0535917e-13
  1.9938261e-12 2.7336990e-18]
 [9.5530055e-21 1.5260779e-26 6.5586242e-27 ... 2.7017833e-37
  2.5608492e-21 3.5877723e-21]]
[7 2 1 ... 4 5 6]


In [66]:
print(classification_report(test_labels, y_pred))

              precision    recall  f1-score   support

           0       0.98      0.98      0.98       980
           1       0.99      0.99      0.99      1135
           2       0.95      0.98      0.97      1032
           3       0.97      0.98      0.98      1010
           4       0.98      0.97      0.98       982
           5       0.97      0.97      0.97       892
           6       0.97      0.98      0.98       958
           7       0.98      0.97      0.98      1028
           8       0.98      0.97      0.97       974
           9       0.98      0.97      0.97      1009

    accuracy                           0.98     10000
   macro avg       0.98      0.98      0.98     10000
weighted avg       0.98      0.98      0.98     10000



In [10]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 64)                50240     
                                                                 
 dense_1 (Dense)             (None, 64)                4160      
                                                                 
 dense_2 (Dense)             (None, 10)                650       
                                                                 
Total params: 55,050
Trainable params: 55,050
Non-trainable params: 0
_________________________________________________________________
