In [None]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.datasets import mnist

In [None]:
### load mnist dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [None]:
### compute the number of labels
num_labels = len(np.unique(y_train))

In [None]:
### convert to one-hot vector
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

In [None]:
y_train.shape

In [None]:
y_train[:10,]

In [None]:
x_train.shape

In [None]:
x_train.shape[1]

In [None]:
### image dimensions (assumed square)
image_size = x_train.shape[1]
input_size = image_size * image_size

In [None]:
input_size

In [None]:
### resize and normalize
x_train = np.reshape(x_train, [-1, input_size])
x_train = x_train.astype('float32') / 255
x_test = np.reshape(x_test, [-1, input_size])
x_test = x_test.astype('float32') / 255

In [None]:
### network parameters
batch_size = 128
hidden_units = 256
dropout = 0.45

In [None]:
### model is a 3-layer MLP with ReLU and dropout after each layer

# Dense 레이어는 선형연산을 하기 때문에, 일련의 Dense 레이어는 선형함수만 근사가능
# 문제는 MNIST 숫자 분류가 본질적으로 비선형 처리과정이라는 점
# Dense 레이어 사이에 relu 활성화를 삽입하면 MLP 네트워크가 비선형 매핑을 모델링하는 것이 가능

model = Sequential()
model.add(Dense(hidden_units, input_dim=input_size))
model.add(Activation('relu'))
model.add(Dropout(dropout))
model.add(Dense(hidden_units))
model.add(Activation('relu'))
model.add(Dropout(dropout))
model.add(Dense(num_labels))

In [None]:
### this is the output for one-hot vector
model.add(Activation('softmax'))

In [None]:
model.summary()

In [None]:
### compile

# loss function for one-hot vector

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [None]:
### train the network
model.fit(x_train, y_train, epochs=20, batch_size=batch_size)

In [None]:
### validate the model on test dataset to determine generalization
_, acc = model.evaluate(x_test,
                        y_test,
                        batch_size=batch_size,
                        verbose=0)

In [None]:
print("\nTest accuracy: %.1f%%" % (100.0 * acc))