Ref.: https://www.kaggle.com/prashant111/mnist-deep-neural-network-with-keras#5.-Designing-model-architecture-using-Keras-

# 1. Load and Preprocessing Data

In [None]:
from keras.datasets import mnist
import numpy as np

In [None]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

unique, counts = np.unique(y_train, return_counts=True)
print("Train labels: ", dict(zip(unique, counts)))

unique, counts = np.unique(y_test, return_counts=True)
print("\nTest labels: ", dict(zip(unique, counts)))

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
Train labels:  {0: 5923, 1: 6742, 2: 5958, 3: 6131, 4: 5842, 5: 5421, 6: 5918, 7: 6265, 8: 5851, 9: 5949}

Test labels:  {0: 980, 1: 1135, 2: 1032, 3: 1010, 4: 982, 5: 892, 6: 958, 7: 1028, 8: 974, 9: 1009}


In [None]:
print(x_train.shape)

(60000, 28, 28)


In [None]:
input_size = x_train.shape[1] * x_train.shape[2]
print(input_size)

784


In [None]:
x_train = np.reshape(x_train, [-1, input_size])
x_test = np.reshape(x_test, [-1, input_size])

In [None]:
print(x_train.shape)

(60000, 784)


In [None]:
x_train = x_train.astype('float32') / 255
x_test = x_test.astype('float32') / 255

In [None]:
n_labels = len(np.unique(y_train))

In [None]:
from tensorflow.keras.utils import to_categorical

In [None]:
print(y_train[0])

5


In [None]:
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

In [None]:
print(y_train[0])

[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]


# 2. Define Hyperparameter Set

In [None]:
batch_size_set = [32,64,128]
hidden_units_set = [64,128,256]
optimizer_set = ['Adam', 'rmsprop']

## Fixed Hyperparameters

In [None]:
dropout = 0.45
act_fn = 'relu'

# 3. Searching

In [None]:
from mlp import *

In [None]:
import pandas as pd

In [None]:
df = pd.DataFrame({'batch_size':[],
                'hidden_units':[],
                'optimizer':[],
                'test_acc':[]})

In [None]:
for batch_size in batch_size_set:
  for hidden_units in hidden_units_set:
    for optimizer in optimizer_set:
      model = MLP(input_size, n_labels, hidden_units, act_fn, dropout)
      model.compile(loss='categorical_crossentropy',
              optimizer = optimizer,
              metrics = ['accuracy'])
      model.fit(x_train, y_train, epochs = 10, batch_size=batch_size, verbose = 0)
      loss, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
      
      rlt = {'batch_size':batch_size,
             'hidden_units':hidden_units,
             'optimizer':optimizer,
             'test_acc':acc*100} 
      print(rlt)     
      df = df.append(rlt,ignore_index=True)

{'batch_size': 32, 'hidden_units': 64, 'optimizer': 'Adam', 'test_acc': 96.46999835968018}
{'batch_size': 32, 'hidden_units': 64, 'optimizer': 'rmsprop', 'test_acc': 95.69000005722046}
{'batch_size': 32, 'hidden_units': 128, 'optimizer': 'Adam', 'test_acc': 97.64000177383423}
{'batch_size': 32, 'hidden_units': 128, 'optimizer': 'rmsprop', 'test_acc': 96.79999947547913}
{'batch_size': 32, 'hidden_units': 256, 'optimizer': 'Adam', 'test_acc': 97.93000221252441}
{'batch_size': 32, 'hidden_units': 256, 'optimizer': 'rmsprop', 'test_acc': 97.61999845504761}
{'batch_size': 64, 'hidden_units': 64, 'optimizer': 'Adam', 'test_acc': 96.5499997138977}
{'batch_size': 64, 'hidden_units': 64, 'optimizer': 'rmsprop', 'test_acc': 95.95999717712402}
{'batch_size': 64, 'hidden_units': 128, 'optimizer': 'Adam', 'test_acc': 97.46999740600586}
{'batch_size': 64, 'hidden_units': 128, 'optimizer': 'rmsprop', 'test_acc': 96.92999720573425}
{'batch_size': 64, 'hidden_units': 256, 'optimizer': 'Adam', 'test_acc

# 4. Results

In [None]:
print(df)

    batch_size  hidden_units optimizer   test_acc
0         32.0          64.0      Adam  96.469998
1         32.0          64.0   rmsprop  95.690000
2         32.0         128.0      Adam  97.640002
3         32.0         128.0   rmsprop  96.799999
4         32.0         256.0      Adam  97.930002
5         32.0         256.0   rmsprop  97.619998
6         64.0          64.0      Adam  96.550000
7         64.0          64.0   rmsprop  95.959997
8         64.0         128.0      Adam  97.469997
9         64.0         128.0   rmsprop  96.929997
10        64.0         256.0      Adam  98.009998
11        64.0         256.0   rmsprop  97.680002
12       128.0          64.0      Adam  96.179998
13       128.0          64.0   rmsprop  96.100003
14       128.0         128.0      Adam  97.549999
15       128.0         128.0   rmsprop  97.500002
16       128.0         256.0      Adam  98.000002
17       128.0         256.0   rmsprop  97.759998


In [None]:
df.describe()

Unnamed: 0,batch_size,hidden_units,test_acc
count,18.0,18.0,18.0
mean,74.666667,149.333333,97.102222
std,41.068093,82.136185,0.771276
min,32.0,64.0,95.69
25%,32.0,64.0,96.489999
50%,64.0,128.0,97.485
75%,128.0,256.0,97.670002
max,128.0,256.0,98.009998


In [None]:
best_idx = np.argmax(df['test_acc'])

In [None]:
best_hp = df.iloc[best_idx,:]
print(best_hp)

batch_size         64
hidden_units      256
optimizer        Adam
test_acc        98.01
Name: 10, dtype: object
