<a href="https://colab.research.google.com/github/Kimhantag/Oracle-SQL-Developer-practice/blob/main/5.Regularization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
from tensorflow.keras import Model, layers, regularizers, initializers
import numpy as np
from matplotlib import pyplot as plt
import random
from sklearn.model_selection import train_test_split
random.seed(5)
np.random.seed(5)
tf.random.set_seed(5)

In [None]:
(x_trainval, y_trainval), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
print(type(x_trainval), x_trainval.shape)
print(type(y_trainval), y_trainval.shape)

x_train, x_valid, y_train, y_valid = train_test_split(x_trainval, y_trainval, test_size= 1/6, shuffle=True, stratify = y_trainval, random_state=34)
print(type(x_train), x_train.shape)
print(type(y_train), y_train.shape)

num_features = x_train.shape[1] * x_train.shape[2]


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
<class 'numpy.ndarray'> (60000, 28, 28)
<class 'numpy.ndarray'> (60000,)
<class 'numpy.ndarray'> (50000, 28, 28)
<class 'numpy.ndarray'> (50000,)


In [None]:
# Scale to [0, 1]
x_train, x_valid, x_test = x_train.astype('float32') / 255 , x_valid.astype('float32') / 255, x_test.astype('float32') / 255

# CNN need a dimension of channels
x_train = x_train.reshape(-1,num_features)
x_valid = x_valid.reshape(-1, num_features)
x_test = x_test.reshape(-1, num_features)


print(x_train.shape, x_valid.shape, x_test.shape)
print(y_train.shape, y_valid.shape, y_test.shape)

(50000, 784) (10000, 784) (10000, 784)
(50000,) (10000,) (10000,)


In [None]:
@tf.keras.saving.register_keras_serializable()
class NeuralNet(Model): # Regularization skill을 추가하지 않은 기본 뉴럴네트워크 구조
    def __init__(self):
        super(NeuralNet, self).__init__()
        self.fc1 = layers.Dense(128, kernel_initializer = initializers.GlorotNormal())
        self.bn1 = layers.BatchNormalization()
        self.ac1 = layers.Activation(tf.nn.relu)
        self.fc2 = layers.Dense(256, kernel_initializer = initializers.GlorotNormal())
        self.bn2 = layers.BatchNormalization()
        self.ac2 = layers.Activation(tf.nn.relu)
        self.out = layers.Dense(10, kernel_initializer = initializers.GlorotNormal())

    def call(self, x, is_training=False):
        x = self.fc1(x)
        x = self.bn1(x, training=is_training)
        x = self.ac1(x)
        x = self.fc2(x)
        x = self.bn2(x, training=is_training)
        x = self.ac2(x)
        x = self.out(x)
        if not is_training:
            x = tf.nn.softmax(x)
        return x

In [None]:
nn = NeuralNet()

In [None]:
nn.compile(loss = 'sparse_categorical_crossentropy', optimizer = tf.keras.optimizers.SGD(learning_rate=0.1), metrics = ['accuracy'])

In [None]:
#early stopping
es = tf.keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', patience=4, verbose=1)
#model checkpoint
#tf.keras.callbacks.ModelCheckpoing(path, monitor, mode, save_Best_only)
mc = tf.keras.callbacks.ModelCheckpoint('./mlp', monitor='accuracy', mode='max', save_best_only=True, verbose=1)

In [None]:
nn.fit(x_train, y_train,
                epochs=20,
                batch_size=128,
                shuffle=True,
                validation_data=(x_valid, y_valid),
                callbacks = [es, mc]
                )

In [None]:
nn_result = nn.evaluate(x_test, y_test)  # 기본 성능, 앞으로 Regularization skill을 적용했을 때 이 부분의 성능과 비교해볼 것



In [None]:
print('test loss: ', nn_result[0])
print('test acc: ', nn_result[1])

test loss:  0.07804710417985916
test acc:  0.9776999950408936


## Regularization

---


### weight decay

In [None]:
class WD_NeuralNet(Model):
    def __init__(self):
        super(WD_NeuralNet, self).__init__()
        self.fc1 = layers.Dense(128,  kernel_initializer = initializers.GlorotNormal(), kernel_regularizer=regularizers.l2(0.01))
        self.bn1 = layers.BatchNormalization()
        self.ac1 = layers.Activation(tf.nn.relu)
        self.fc2 = layers.Dense(256,  kernel_initializer = initializers.GlorotNormal(), kernel_regularizer=regularizers.l2(0.01))
        self.bn2 = layers.BatchNormalization()
        self.ac2 = layers.Activation(tf.nn.relu)
        self.out = layers.Dense(10,  kernel_initializer = initializers.GlorotNormal(), kernel_regularizer=regularizers.l2(0.01))

    def call(self, x, is_training=False):
        x = self.fc1(x)
        x = self.bn1(x, training=is_training)
        x = self.ac1(x)
        x = self.fc2(x)
        x = self.bn2(x, training=is_training)
        x = self.ac2(x)
        x = self.out(x)
        if not is_training:
            x = tf.nn.softmax(x)
        return x

In [None]:
nn_wd = WD_NeuralNet()
nn_wd.compile(loss = 'sparse_categorical_crossentropy', optimizer = tf.keras.optimizers.SGD(learning_rate=0.1), metrics = ['accuracy'])
nn_wd.fit(x_train, y_train,
                epochs=20,
                batch_size=128,
                shuffle=True,
                validation_data=(x_valid, y_valid),
                callbacks = [es, mc]
                )

Epoch 1/20
Epoch 1: accuracy did not improve from 0.99578
Epoch 2/20
Epoch 2: accuracy did not improve from 0.99578
Epoch 3/20
Epoch 3: accuracy did not improve from 0.99578
Epoch 4/20
Epoch 4: accuracy did not improve from 0.99578
Epoch 5/20
Epoch 5: accuracy did not improve from 0.99578
Epoch 6/20
Epoch 6: accuracy did not improve from 0.99578
Epoch 7/20
Epoch 7: accuracy did not improve from 0.99578
Epoch 8/20
Epoch 8: accuracy did not improve from 0.99578
Epoch 9/20
Epoch 9: accuracy did not improve from 0.99578
Epoch 10/20
Epoch 10: accuracy did not improve from 0.99578
Epoch 11/20
Epoch 11: accuracy did not improve from 0.99578
Epoch 12/20
Epoch 12: accuracy did not improve from 0.99578
Epoch 13/20
Epoch 13: accuracy did not improve from 0.99578
Epoch 14/20
Epoch 14: accuracy did not improve from 0.99578
Epoch 15/20
Epoch 15: accuracy did not improve from 0.99578
Epoch 16/20
Epoch 16: accuracy did not improve from 0.99578
Epoch 17/20
Epoch 17: accuracy did not improve from 0.9957

<keras.src.callbacks.History at 0x7e60d40bd4b0>

In [None]:
nn_wd_result = nn_wd.evaluate(x_test, y_test)
print('test loss: ', nn_wd_result[0])
print('test acc: ', nn_wd_result[1])


test loss:  0.4048057794570923
test acc:  0.9394000172615051


In [None]:
class DO_NeuralNet(Model):
    def __init__(self):
        super(DO_NeuralNet, self).__init__()
        self.fc1 = layers.Dense(128, kernel_initializer = initializers.GlorotNormal())
        self.bn1 = layers.BatchNormalization()
        self.ac1 = layers.Activation(tf.nn.relu)
        self.do1 = layers.Dropout(rate=0.5)
        self.fc2 = layers.Dense(256, kernel_initializer = initializers.GlorotNormal())
        self.bn2 = layers.BatchNormalization()
        self.ac2 = layers.Activation(tf.nn.relu)
        self.do2 = layers.Dropout(rate=0.5)
        self.out = layers.Dense(num_classes, kernel_initializer = initializers.GlorotNormal())

    def call(self, x, is_training=False):
        x = self.fc1(x)
        x = self.bn1(x, training=is_training)
        x = self.ac1(x)
        x = self.do1(x, training=is_training)
        x = self.fc2(x)
        x = self.bn2(x, training=is_training)
        x = self.ac2(x)
        x = self.do2(x, training=is_training)
        x = self.out(x)
        if not is_training:
            x = tf.nn.softmax(x)
        return x

In [None]:
nn_do = DO_NeuralNet()
nn_do.compile(loss = 'sparse_categorical_crossentropy', optimizer = tf.keras.optimizers.SGD(learning_rate=0.1), metrics = ['accuracy'])
nn_do.fit(x_train, y_train,
                epochs=20,
                batch_size=128,
                shuffle=True,
                validation_data=(x_valid, y_valid),
                callbacks = [es, mc]
                )

Epoch 1/20
Epoch 1: accuracy did not improve from 0.99578
Epoch 2/20
Epoch 2: accuracy did not improve from 0.99578
Epoch 3/20
Epoch 3: accuracy did not improve from 0.99578
Epoch 4/20
Epoch 4: accuracy did not improve from 0.99578
Epoch 5/20
Epoch 5: accuracy did not improve from 0.99578
Epoch 6/20
Epoch 6: accuracy did not improve from 0.99578
Epoch 7/20
Epoch 7: accuracy did not improve from 0.99578
Epoch 8/20
Epoch 8: accuracy did not improve from 0.99578
Epoch 9/20
Epoch 9: accuracy did not improve from 0.99578
Epoch 10/20
Epoch 10: accuracy did not improve from 0.99578
Epoch 11/20
Epoch 11: accuracy did not improve from 0.99578
Epoch 12/20
Epoch 12: accuracy did not improve from 0.99578
Epoch 13/20
Epoch 13: accuracy did not improve from 0.99578
Epoch 14/20
Epoch 14: accuracy did not improve from 0.99578
Epoch 15/20
Epoch 15: accuracy did not improve from 0.99578
Epoch 16/20
Epoch 16: accuracy did not improve from 0.99578
Epoch 16: early stopping


<keras.src.callbacks.History at 0x7a1e2a707640>

In [None]:
nn_do_result = nn_do.evaluate(x_test, y_test)
print('test loss: ', nn_do_result[0])
print('test acc: ', nn_do_result[1])


test loss:  0.08274487406015396
test acc:  0.977400004863739


## Model_Ensemble

In [None]:
def accuracy(y_pred, y_true):
    correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.cast(y_true, tf.int64))
    return tf.reduce_mean(tf.cast(correct_prediction, tf.float32)).numpy()

pred = (nn.predict(x_test)+ nn_wd.predict(x_test) + nn_do.predict(x_test))/3

ensemble_result = accuracy(pred, y_test)

print(ensemble_result)

0.9792


In [None]:
## 최종 결과 종합 비교  ##어떤 스킬이 주로 성능향상 효과를 보이는 경향이 있다고 해도 항상 동일하게 그 성능 향상 효과를 관측하기는 어려울 수 있음
print(ensemble_result)  # ensemble 정확도
print(nn_result[1])  # 기본 뉴럴네트워크의 정확도
print(nn_wd_result[1])  # 기본 뉴럴네트워크에 weight decay 적용한 정확도
print(nn_do_result[1])   # 기본 뉴럴네트워크에 dropout 적용한 정확도


0.9792
0.9776999950408936
0.9394000172615051
0.977400004863739
