In [1]:
import tensorflow as tf
from tensorflow import keras as tf_keras

In [2]:
from re import X
# 데이터 준비 : keras 내장 데이터셋
(X_train, y_train), (X_test, y_test) = tf_keras.datasets.mnist.load_data()

print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
(60000, 28, 28) (10000, 28, 28) (60000,) (10000,)


In [3]:
# 입력 데이터 구조 변경
X_train_flatten = X_train.reshape(-1, 28*28)
X_test_flatten = X_test.reshape(-1, 28*28)
print(X_train_flatten.shape, X_test_flatten.shape)
print(X_train_flatten.min(), X_train_flatten.max())

X_train_flatten = X_train_flatten / 255.0
X_test_flatten = X_test_flatten / 255.0
print(X_train_flatten.min(), X_train_flatten.max())

(60000, 784) (10000, 784)
0 255
0.0 1.0


In [4]:
# 모델 학습 2-1

model = tf_keras.Sequential([
    tf_keras.layers.Input(shape=(784,)),            # 입력층
    tf_keras.layers.Dense(512, activation="relu"),
    tf_keras.layers.Dense(256, activation="relu"),
    tf_keras.layers.Dense(10, activation="softmax") # 출력층
])

In [5]:
# 모델 학습 2-2

model.compile(loss="sparse_categorical_crossentropy",
              optimizer="adam",
              metrics=["accuracy"])

model.fit(X_train_flatten, y_train, batch_size=256, epochs=10, validation_split=0.2)

Epoch 1/10
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.8417 - loss: 0.5606 - val_accuracy: 0.9534 - val_loss: 0.1556
Epoch 2/10
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 26ms/step - accuracy: 0.9664 - loss: 0.1166 - val_accuracy: 0.9678 - val_loss: 0.1096
Epoch 3/10
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 31ms/step - accuracy: 0.9801 - loss: 0.0689 - val_accuracy: 0.9724 - val_loss: 0.0963
Epoch 4/10
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 20ms/step - accuracy: 0.9855 - loss: 0.0472 - val_accuracy: 0.9784 - val_loss: 0.0798
Epoch 5/10
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 28ms/step - accuracy: 0.9901 - loss: 0.0339 - val_accuracy: 0.9768 - val_loss: 0.0813
Epoch 6/10
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 21ms/step - accuracy: 0.9944 - loss: 0.0192 - val_accuracy: 0.9770 - val_loss: 0.0817
Epoch 7/10
[1m188/188

<keras.src.callbacks.history.History at 0x783c40098bb0>

In [6]:
# 모델 검증(테스트)

model.evaluate(X_train_flatten, y_train), model.evaluate(X_test_flatten, y_test)

[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9982 - loss: 0.0069
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9766 - loss: 0.0864


([0.02104133926331997, 0.9949166774749756],
 [0.07396619021892548, 0.9797999858856201])

# example08 후 모델 개선 시도 (과대적합 방지)

In [7]:
# 모델 개선 시도 - 1. 가중치 초기화
model_wud = tf_keras.models.Sequential()
model_wud.add(tf_keras.layers.Input(shape=(784,)))
model_wud.add(tf_keras.layers.Dense(512, activation='relu', kernel_initializer="he_uniform"))
model_wud.add(tf_keras.layers.Dense(256, activation='relu', kernel_initializer="he_uniform"))
model_wud.add(tf_keras.layers.Dense(128, activation='relu', kernel_initializer="he_uniform"))
model_wud.add(tf_keras.layers.Dense(10, activation='softmax'))

# 모델 개선 시도 - 2. 배치 정규화
model_bn = tf_keras.models.Sequential()
model_bn.add(tf_keras.layers.Input(shape=(784,)))
model_bn.add(tf_keras.layers.Dense(512))
model_bn.add(tf_keras.layers.BatchNormalization())
model_bn.add(tf_keras.layers.Activation('relu'))
model_bn.add(tf_keras.layers.Dense(256))
model_bn.add(tf_keras.layers.BatchNormalization())
model_bn.add(tf_keras.layers.Activation('relu'))
model_bn.add(tf_keras.layers.Dense(128))
model_bn.add(tf_keras.layers.BatchNormalization())
model_bn.add(tf_keras.layers.Activation('relu'))
model_bn.add(tf_keras.layers.Dense(10, activation='softmax'))

# 모델 개선 시도 3 : 가중치 규제 (L1, L2)
model_l = tf_keras.models.Sequential()
model_l.add(tf_keras.layers.Input(shape=(784,)))
model_l.add(tf_keras.layers.Dense(512, activation='relu', kernel_regularizer=tf_keras.regularizers.L2(0.1)))
model_l.add(tf_keras.layers.Dense(256, activation='relu', kernel_regularizer=tf_keras.regularizers.L2(0.1)))
model_l.add(tf_keras.layers.Dense(128, activation='relu', kernel_regularizer=tf_keras.regularizers.L2(0.1)))
model_l.add(tf_keras.layers.Dense(10, activation='softmax'))

# 모델 개선 시도 4 : 드롭아웃 적용
model_do = tf_keras.models.Sequential()
model_do.add(tf_keras.layers.Input(shape=(784,)))
model_do.add(tf_keras.layers.Dense(512, activation='relu'))
model_do.add(tf_keras.layers.Dropout(0.5, seed=42))
model_do.add(tf_keras.layers.Dense(256, activation='relu'))
model_do.add(tf_keras.layers.Dropout(0.5, seed=42))
model_do.add(tf_keras.layers.Dense(128, activation='relu'))
model_do.add(tf_keras.layers.Dropout(0.5, seed=42))
model_do.add(tf_keras.layers.Dense(10, activation='softmax'))

In [8]:
model.compile(loss='sparse_categorical_crossentropy',
               optimizer = 'adam',
               metrics=['accuracy'])

model_wud.compile( loss='sparse_categorical_crossentropy',
                    optimizer='adam',
                    metrics=['accuracy'])

model_bn.compile( loss='sparse_categorical_crossentropy',
                    optimizer='adam',
                    metrics=['accuracy'])

model_l.compile( loss='sparse_categorical_crossentropy',
                    optimizer='adam',
                    metrics=['accuracy'])

model_do.compile( loss='sparse_categorical_crossentropy',
                    optimizer='adam',
                    metrics=['accuracy'])

In [9]:
# 모델 훈련
fit_history_base = model.fit(X_train_flatten, y_train, batch_size=256, epochs=10, validation_split=0.2)
fit_history_wud = model_wud.fit(X_train_flatten, y_train, batch_size=256, epochs=10, validation_split=0.2)
fit_history_bn = model_bn.fit(X_train_flatten, y_train, batch_size=256, epochs=10, validation_split=0.2)
fit_history_l = model_l.fit(X_train_flatten, y_train, batch_size=256, epochs=10, validation_split=0.2)
fit_history_do = model_do.fit(X_train_flatten, y_train, batch_size=256, epochs=10, validation_split=0.2)

Epoch 1/10
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 21ms/step - accuracy: 0.9959 - loss: 0.0130 - val_accuracy: 0.9785 - val_loss: 0.0888
Epoch 2/10
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 20ms/step - accuracy: 0.9968 - loss: 0.0101 - val_accuracy: 0.9789 - val_loss: 0.0989
Epoch 3/10
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 25ms/step - accuracy: 0.9970 - loss: 0.0092 - val_accuracy: 0.9793 - val_loss: 0.0933
Epoch 4/10
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 21ms/step - accuracy: 0.9984 - loss: 0.0051 - val_accuracy: 0.9786 - val_loss: 0.1014
Epoch 5/10
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 22ms/step - accuracy: 0.9977 - loss: 0.0074 - val_accuracy: 0.9728 - val_loss: 0.1358
Epoch 6/10
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 27ms/step - accuracy: 0.9968 - loss: 0.0088 - val_accuracy: 0.9783 - val_loss: 0.1136
Epoch 7/10
[1m188/188

In [10]:
# 모델 평가
print( model.evaluate(X_train_flatten, y_train), model.evaluate(X_test_flatten, y_test) )         # 0.9947500228881836, 0.9800000190734863
print( model_wud.evaluate(X_train_flatten, y_train), model_wud.evaluate(X_test_flatten, y_test) ) # 0.9925166964530945, 0.9782999753952026
print( model_bn.evaluate(X_train_flatten, y_train), model_bn.evaluate(X_test_flatten, y_test) )   # 0.995199978351593,  0.9789999723434448
print( model_l.evaluate(X_train_flatten, y_train), model_l.evaluate(X_test_flatten, y_test) )     # 0.819350004196167,  0.8256999850273132
print( model_do.evaluate(X_train_flatten, y_train), model_do.evaluate(X_test_flatten, y_test) )   # 0.9885500073432922, 0.9797000288963318

[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9984 - loss: 0.0063
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9763 - loss: 0.0994
[0.026212677359580994, 0.9947500228881836] [0.08613505959510803, 0.9800000190734863]
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - accuracy: 0.9961 - loss: 0.0137
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9748 - loss: 0.1038
[0.029490193352103233, 0.9925166964530945] [0.08980993926525116, 0.9782999753952026]
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9985 - loss: 0.0065
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.9751 - loss: 0.0909
[0.01977347955107689, 0.995199978351593] [0.07461534440517426, 0.9789999723434448]
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy