# **過擬合（Overfitting）**
從模型調校當中了解分別需要查看訓練集以及驗證集的模型表現結果，然而在驗證集上若沒有如訓練集表現的，其中一個可能發生的原因即是模型過擬合在訓練集上，此份程式碼會介紹在過擬合情況產生時，如何在模型上做抑制的手段。

## 本章節內容大綱
* ### [Regularization](#Regularization)
* ### [Early Stopping](#EarlyStopping)
* ### [Dropout](#Dropout)
* ### [Parameter Initialization](#ParameterInitialization)
* ### [Batch Normalization](#BatchNormalization)
-----------------

## 匯入套件

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Tensorflow 相關套件
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

## 創建資料集／載入資料集（Dataset Creating / Loading）

In [None]:
# 上傳資料
!wget -q https://github.com/TA-aiacademy/course_3.0/releases/download/DL/Data_part3.zip
!unzip -q Data_part3.zip

In [None]:
train_df = pd.read_csv('./Data/News_train.csv')
test_df = pd.read_csv('./Data/News_test.csv')

In [None]:
train_df.head()

In [None]:
X_df = train_df.iloc[:, :-1].values
y_df = train_df.y_category.values

In [None]:
X_test = test_df.iloc[:, :-1].values
y_test = test_df.y_category.values

## 資料前處理（Data Preprocessing）

In [None]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler
# Feature scaling
sc = StandardScaler()
X_scale = sc.fit_transform(X_df, y_df)
X_test_scale = sc.transform(X_test)

In [None]:
# Convert to One-Hot encoding
y_onehot = keras.utils.to_categorical(y_df)
y_test_onehot = keras.utils.to_categorical(y_test)

In [None]:
# train, valid/test dataset split
from sklearn.model_selection import train_test_split
X_train, X_valid, y_train, y_valid = train_test_split(X_scale, y_onehot,
                                                      test_size=0.2,
                                                      random_state=17,
                                                      stratify=y_df)

In [None]:
print(f'X_train shape: {X_train.shape}')
print(f'X_valid shape: {X_valid.shape}')
print(f'y_train shape: {y_train.shape}')
print(f'y_valid shape: {y_valid.shape}')

## 模型建置（Model Building）

In [None]:
def build_model(input_shape, output_shape):
    keras.backend.clear_session()
    tf.random.set_seed(17)  # 固定隨機產生的數字序列

    model = keras.models.Sequential()
    model.add(layers.Dense(64,
                           input_shape=input_shape,
                           activation='tanh'))
    model.add(layers.Dense(64,
                           activation='tanh'))
    model.add(layers.Dense(output_shape,
                           activation='softmax'))

    return model

In [None]:
model = build_model(X_train[0].shape, y_onehot.shape[1])
model.summary()

## 模型訓練（Model Training）

In [None]:
# 編譯模型用以訓練 (設定 optimizer, loss function, metrics, 等等)
model.compile(loss='categorical_crossentropy',
              optimizer=keras.optimizers.Nadam(0.001),
              metrics=['acc'])

In [None]:
history = model.fit(X_train, y_train,
                    epochs=20,
                    batch_size=512,
                    validation_data=(X_valid, y_valid))

## 模型評估（Model Evaluation）

In [None]:
train_loss = history.history['loss']
train_acc = history.history['acc']

valid_loss = history.history['val_loss']
valid_acc = history.history['val_acc']

In [None]:
plt.figure(figsize=(15, 4))
plt.subplot(1, 2, 1)
plt.plot(range(len(train_loss)), train_loss, label='train_loss')
plt.plot(range(len(valid_loss)), valid_loss, label='valid_loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(range(len(train_acc)), train_acc, label='train_acc')
plt.plot(range(len(valid_acc)), valid_acc, label='valid_acc')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

In [None]:
# Print the results of testing data
print('============================')
print('Testing data')
print('============================')
print(f'loss: {model.evaluate(X_test_scale, y_test_onehot, verbose=0)[0]}')
print(f'acc: {model.evaluate(X_test_scale, y_test_onehot, verbose=0)[1]}')

## 過擬合抑制策略

<img src=https://i.imgur.com/cuV6ERG.png>

<a name="Regularization"></a>
* ## Regularization
<img src="https://i.imgur.com/28Q625O.png" width="50%" height="50%">

In [None]:
def build_model_regular(input_shape, output_shape, l1_alpha, l2_alpha):
    # 重新建構一個可以新增 Regularizers 的模型

    keras.backend.clear_session()
    tf.random.set_seed(17)  # 固定隨機產生的數字序列

    model = keras.models.Sequential()
    model.add(layers.Dense(64,
                           input_shape=input_shape,
                           activation='tanh',
                           kernel_regularizer=keras.regularizers.l1_l2(
                               l1=l1_alpha, l2=l2_alpha)))
    model.add(layers.Dense(64,
                           activation='tanh',
                           kernel_regularizer=keras.regularizers.l1_l2(
                               l1=l1_alpha, l2=l2_alpha)))

    model.add(layers.Dense(output_shape,
                           activation='softmax'))

    return model

In [None]:
# 以下放置要比較的 regularizer 數值
l1_l2_list = [(0, 0), (1e-3, 0), (0, 1e-2), (1e-3, 1e-2)]

batch_size = 512
epochs = 20

# 建立兩個 list 記錄選用不同 regularizer 數值的訓練結果
train_loss_list = []
train_acc_list = []

# 建立兩個 list 記錄選用不同 regularizer 數值的驗證結果
valid_loss_list = []
valid_acc_list = []

# 建立一個 list 紀錄選用不同 regularizer 數值的測試結果
test_eval = []

# 迭代不同的 regularizer 數值去訓練模型
for l1_alpha, l2_alpha in l1_l2_list:
    print('Training a model with regularizer L1: {}, L2: {}'
          .format(l1_alpha, l2_alpha))

    # 確保每次都是訓練新的模型，而不是接續上一輪的模型
    model = build_model_regular(X_train[0].shape, y_onehot.shape[1],
                                l1_alpha, l2_alpha)
    model.compile(loss='categorical_crossentropy',
                  optimizer=keras.optimizers.Nadam(0.001),
                  metrics=['acc'])

    # 確保每次都設定一樣的參數
    history = model.fit(X_train, y_train,
                        batch_size=batch_size,
                        epochs=epochs,
                        verbose=0,
                        validation_data=(X_valid, y_valid))

    # 將訓練過程記錄下來
    train_loss_list.append(history.history['loss'])
    valid_loss_list.append(history.history['val_loss'])
    train_acc_list.append(history.history['acc'])
    valid_acc_list.append(history.history['val_acc'])
    test_eval.append(model.evaluate(X_test_scale,
                                    y_test_onehot,
                                    verbose=0))
print('----------------- training done! -----------------')

In [None]:
# 視覺化訓練過程
plt.figure(figsize=(15, 7))

train_line = ()
valid_line = ()

# 繪製 Training loss
plt.subplot(121)
for k in range(len(l1_l2_list)):
    l1, l2 = l1_l2_list[k]
    loss = train_loss_list[k]
    val_loss = valid_loss_list[k]
    train_l = plt.plot(
        range(len(loss)), loss,
        label=f'Training    L1: {l1}, L2: {l2}')
    valid_l = plt.plot(
        range(len(val_loss)), val_loss, '--',
        label=f'Validation L1: {l1}, L2: {l2}')

    train_line += tuple(train_l)
    valid_line += tuple(valid_l)
plt.title('Loss')

# 繪製 Training accuracy
plt.subplot(122)
train_acc_line = []
valid_acc_line = []
for k in range(len(l1_l2_list)):
    l1, l2 = l1_l2_list[k]
    acc = train_acc_list[k]
    val_acc = valid_acc_list[k]
    plt.plot(range(len(acc)), acc,
             label=f'Training    L1: {l1}, L2: {l2}')
    plt.plot(range(len(val_acc)), val_acc, '--',
             label=f'Validation L1: {l1}, L2: {l2}')
plt.title('Accuracy')

first_legend = plt.legend(handles=train_line,
                          bbox_to_anchor=(1.05, 1))

plt.gca().add_artist(first_legend)
plt.legend(handles=valid_line,
           bbox_to_anchor=(1.05, 0.8))
plt.show()

In [None]:
# Print the results of testing data
for k in range(len(l1_l2_list)):
    print('============================')
    print(f'(l1, l2) = {l1_l2_list[k]}')
    print('============================')
    print(f'loss: {test_eval[k][0]}')
    print(f'acc: {test_eval[k][1]}\n')

<a name="EarlyStopping"></a>
* ## Early Stopping

In [None]:
n_patience = 5  # 訓練過程經過 n_patience 次沒有進步之後停止
early_stopping = keras.callbacks.EarlyStopping(
    monitor='val_loss',   # 是否進步的指標
    patience=n_patience,
    verbose=1)

In [None]:
model = build_model(X_train[0].shape, y_onehot.shape[1])
model.summary()

In [None]:
# 編譯模型用以訓練 (設定 optimizer, loss function, metrics, 等等)
model.compile(loss='categorical_crossentropy',
              optimizer=keras.optimizers.Nadam(0.001),
              metrics=['acc'])

In [None]:
history = model.fit(X_train, y_train,
                    epochs=20,
                    batch_size=512,
                    validation_data=(X_valid, y_valid),
                    callbacks=[early_stopping])

In [None]:
train_loss = history.history['loss']
train_acc = history.history['acc']

valid_loss = history.history['val_loss']
valid_acc = history.history['val_acc']

In [None]:
plt.figure(figsize=(15, 4))
plt.subplot(1, 2, 1)
plt.plot(range(len(train_loss)), train_loss, label='train_loss')
plt.plot(range(len(valid_loss)), valid_loss, label='valid_loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(range(len(train_acc)), train_acc, label='train_acc')
plt.plot(range(len(valid_acc)), valid_acc, label='valid_acc')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

In [None]:
# Print the results of testing data
print('============================')
print('Testing data')
print('============================')
print(f'loss: {model.evaluate(X_test_scale, y_test_onehot, verbose=0)[0]}')
print(f'acc: {model.evaluate(X_test_scale, y_test_onehot, verbose=0)[1]}')

<a name="Dropout"></a>
* ## Dropout
![](https://i.imgur.com/NWokIte.png)

In [None]:
def build_model_dropout(input_shape, output_shape, droprate):
    keras.backend.clear_session()
    tf.random.set_seed(17)  # 固定隨機產生的數字序列

    model = keras.models.Sequential()
    model.add(layers.Dense(64,
                           input_shape=input_shape,
                           activation='tanh'))
    # 加入 Dropout
    model.add(layers.Dropout(droprate, seed=17))

    model.add(layers.Dense(64,
                           activation='tanh'))
    # 加入 Dropout
    model.add(layers.Dropout(droprate, seed=17))

    model.add(layers.Dense(output_shape,
                           activation='softmax'))

    return model

In [None]:
# 以下放置要比較的 dropout rate
dropout_rates = [0, 0.1, 0.2, 0.4]

batch_size = 512
epochs = 20

# 建立兩個 list 記錄選用不同 dropout rate 的訓練結果
train_loss_list = []
train_acc_list = []

# 建立兩個 list 記錄選用不同 dropout rate 的驗證結果
valid_loss_list = []
valid_acc_list = []

# 建立一個 list 紀錄選用不同 dropout rate 數值的測試結果
test_eval = []

# 迭代不同的 dropout rate 去訓練模型
for drop_r in dropout_rates:
    print('Training a model with dropout rate: {}'
          .format(drop_r))

    # 確保每次都是訓練新的模型，而不是接續上一輪的模型
    model = build_model_dropout(X_train[0].shape,
                                y_onehot.shape[1],
                                drop_r)
    model.compile(loss='categorical_crossentropy',
                  optimizer=keras.optimizers.Nadam(0.001),
                  metrics=['acc'])

    # 確保每次都設定一樣的參數
    history = model.fit(X_train, y_train,
                        batch_size=batch_size,
                        epochs=epochs,
                        verbose=0,
                        validation_data=(X_valid, y_valid))

    # 將訓練結果記錄下來
    train_loss_list.append(history.history['loss'])
    train_acc_list.append(history.history['acc'])
    valid_loss_list.append(history.history['val_loss'])
    valid_acc_list.append(history.history['val_acc'])
    test_eval.append(model.evaluate(X_test_scale,
                                    y_test_onehot,
                                    verbose=0))
print('----------------- training done! -----------------')

In [None]:
# 視覺化訓練過程
plt.figure(figsize=(15, 7))

train_line = ()
valid_line = ()

# 繪製 Training loss
plt.subplot(121)
for k in range(len(dropout_rates)):
    loss = train_loss_list[k]
    val_loss = valid_loss_list[k]
    train_l = plt.plot(
        range(len(loss)), loss,
        label=f'Training    dropout rate:{dropout_rates[k]}')
    valid_l = plt.plot(
        range(len(val_loss)), val_loss, '--',
        label=f'Validation dropout rate:{dropout_rates[k]}')

    train_line += tuple(train_l)
    valid_line += tuple(valid_l)
plt.title('Loss')

# 繪製 Training accuracy
plt.subplot(122)
train_acc_line = []
valid_acc_line = []
for k in range(len(dropout_rates)):
    acc = train_acc_list[k]
    val_acc = valid_acc_list[k]
    plt.plot(range(len(acc)), acc,
             label=f'Training    dropout rate:{dropout_rates[k]}')
    plt.plot(range(len(val_acc)), val_acc, '--',
             label=f'Validation dropout rate:{dropout_rates[k]}')
plt.title('Accuracy')

first_legend = plt.legend(handles=train_line,
                          bbox_to_anchor=(1.05, 1))

plt.gca().add_artist(first_legend)
plt.legend(handles=valid_line,
           bbox_to_anchor=(1.05, 0.8))
plt.show()

In [None]:
# Print the results of testing data
for k in range(len(dropout_rates)):
    print('============================')
    print(f'dropout_rate = {dropout_rates[k]}')
    print('============================')
    print(f'loss: {test_eval[k][0]}')
    print(f'acc: {test_eval[k][1]}\n')

<a name="ParameterInitialization"></a>
* ## Parameter Initialization
tf.keras.initializers: https://www.tensorflow.org/api_docs/python/tf/keras/initializers

In [None]:
def build_model_init(input_shape, output_shape, init):
    keras.backend.clear_session()
    tf.random.set_seed(17)

    model = keras.models.Sequential()
    model.add(layers.Dense(64,
                           input_shape=input_shape,
                           activation='tanh',
                           kernel_initializer=init))  # 由此更改初始化方式
    model.add(layers.Dense(64,
                           activation='tanh',
                           kernel_initializer=init))  # 由此更改初始化方式
    model.add(layers.Dense(output_shape,
                           activation='softmax',
                           kernel_initializer=init))  # 由此更改初始化方式
    return model

In [None]:
# 以下放置要比較的 initializer
init_l = ['glorot_normal',
          'he_normal',
          'lecun_normal',
          'random_normal',
          'truncated_normal']

batch_size = 512
epochs = 20

# 建立兩個 list 記錄選用不同 initializer 的訓練結果
train_loss_list = []
train_acc_list = []

# 建立兩個 list 記錄選用不同 initializer 的驗證結果
valid_loss_list = []
valid_acc_list = []

# 建立一個 list 紀錄選用不同 initializer 數值的測試結果
test_eval = []

# 迭代不同的 initializer 去訓練模型
for init in init_l:
    print(f'Training model, init = {init}')

    # 確保每次都是訓練新的模型，而不是接續上一輪的模型
    model = build_model_init(X_train[0].shape,
                             y_onehot.shape[1],
                             init)
    model.compile(loss='categorical_crossentropy',
                  optimizer=keras.optimizers.Nadam(0.001),
                  metrics=['acc'])

    # 確保每次都設定一樣的參數
    history = model.fit(X_train, y_train,
                        batch_size=batch_size,
                        epochs=epochs,
                        verbose=0,
                        validation_data=(X_valid, y_valid))

    # 將訓練結果記錄下來
    train_loss_list.append(history.history['loss'])
    train_acc_list.append(history.history['acc'])
    valid_loss_list.append(history.history['val_loss'])
    valid_acc_list.append(history.history['val_acc'])
    test_eval.append(model.evaluate(X_test_scale,
                                    y_test_onehot,
                                    verbose=0))
print('----------------- training done! -----------------')

In [None]:
# 視覺化訓練過程
plt.figure(figsize=(15, 7))

train_line = ()
valid_line = ()

# 繪製 Training loss
plt.subplot(121)
for k in range(len(init_l)):
    loss = train_loss_list[k]
    val_loss = valid_loss_list[k]
    train_l = plt.plot(
        range(len(loss)), loss,
        label=f'Training    init: {init_l[k]}')
    valid_l = plt.plot(
        range(len(val_loss)), val_loss, '--',
        label=f'Validation init: {init_l[k]}')

    train_line += tuple(train_l)
    valid_line += tuple(valid_l)
plt.title('Loss')

# 繪製 Training accuracy
plt.subplot(122)
train_acc_line = []
valid_acc_line = []
for k in range(len(init_l)):
    acc = train_acc_list[k]
    val_acc = valid_acc_list[k]
    plt.plot(range(len(acc)), acc,
             label=f'Training    init: {init_l[k]}')
    plt.plot(range(len(val_acc)), val_acc, '--',
             label=f'Validation init: {init_l[k]}')
plt.title('Accuracy')

first_legend = plt.legend(handles=train_line,
                          bbox_to_anchor=(1.05, 1))

plt.gca().add_artist(first_legend)
plt.legend(handles=valid_line,
           bbox_to_anchor=(1.05, 0.75))
plt.show()

In [None]:
# Print the results of testing data
for k in range(len(init_l)):
    print('============================')
    print(f'initializer = {init_l[k]}')
    print('============================')
    print(f'loss: {test_eval[k][0]}')
    print(f'acc: {test_eval[k][1]}\n')

<a name="BatchNormalization"></a>
* ## Batch Normalization

In [None]:
def build_model_bn(input_shape, output_shape, bn=True):
    keras.backend.clear_session()
    tf.random.set_seed(17)  # 固定隨機產生的數字序列

    model = keras.models.Sequential()
    model.add(layers.Dense(64,
                           input_shape=input_shape))
    if bn:
        model.add(layers.BatchNormalization())
    model.add(layers.Activation('tanh'))

    model.add(layers.Dense(64))

    if bn:
        model.add(layers.BatchNormalization())
    model.add(layers.Activation('tanh'))

    model.add(layers.Dense(output_shape,
                                    activation='softmax'))
    return model

In [None]:
BN = [False, True]

batch_size = 512
epochs = 20

# 建立兩個 list 記錄是否加入 BatchNormalization 的訓練結果
train_loss_list = []
train_acc_list = []

# 建立兩個 list 記錄是否加入 BatchNormalization 的驗證結果
valid_loss_list = []
valid_acc_list = []

# 建立一個 list 紀錄是否加入 BatchNormalization 的測試結果
test_eval = []

# 迭代是否加入 BatchNormalization 去訓練模型
for bn in BN:
    print('Training a model with BatchNormalization: {}'
          .format(str(bn)))

    # 確保每次都是訓練新的模型，而不是接續上一輪的模型
    model = build_model_bn(X_train[0].shape,
                           y_onehot.shape[1],
                           bn)
    model.compile(loss='categorical_crossentropy',
                  optimizer=keras.optimizers.Nadam(0.001),
                  metrics=['acc'])

    # 確保每次都設定一樣的參數
    history = model.fit(X_train, y_train,
                        batch_size=batch_size,
                        epochs=epochs,
                        verbose=0,
                        validation_data=(X_valid, y_valid))

    # 將訓練結果記錄下來
    train_loss_list.append(history.history['loss'])
    train_acc_list.append(history.history['acc'])
    valid_loss_list.append(history.history['val_loss'])
    valid_acc_list.append(history.history['val_acc'])
    test_eval.append(model.evaluate(X_test_scale,
                                    y_test_onehot,
                                    verbose=0))
print('----------------- training done! -----------------')

In [None]:
# 視覺化訓練過程
plt.figure(figsize=(15, 7))

train_line = ()
valid_line = ()

# 繪製 Training loss
plt.subplot(121)
for k in range(len(BN)):
    loss = train_loss_list[k]
    val_loss = valid_loss_list[k]
    train_l = plt.plot(
        range(len(loss)), loss,
        label=f'Training    BatchNormalization:{str(BN[k])}')
    valid_l = plt.plot(
        range(len(val_loss)), val_loss, '--',
        label=f'Validation BatchNormalization:{str(BN[k])}')

    train_line += tuple(train_l)
    valid_line += tuple(valid_l)
plt.title('Loss')

# 繪製 Training accuracy
plt.subplot(122)
train_acc_line = []
valid_acc_line = []
for k in range(len(BN)):
    acc = train_acc_list[k]
    val_acc = valid_acc_list[k]
    plt.plot(range(len(acc)), acc,
             label=f'Training    BatchNormalization:{str(BN[k])}')
    plt.plot(range(len(val_acc)), val_acc, '--',
             label=f'Validation BatchNormalization:{str(BN[k])}')
plt.title('Accuracy')

first_legend = plt.legend(handles=train_line,
                          bbox_to_anchor=(1.05, 1))

plt.gca().add_artist(first_legend)
plt.legend(handles=valid_line,
           bbox_to_anchor=(1.05, 0.75))
plt.show()

In [None]:
# Print the results of testing data
for k in range(len(BN)):
    print('============================')
    print(f'BatchNormalization = {BN[k]}')
    print('============================')
    print(f'loss: {test_eval[k][0]}')
    print(f'acc: {test_eval[k][1]}\n')

---
### Quiz
請試著利用 Data/pkgo_train.csv 做多元分類問題，預測五個種類的 pokemon，並使用 Data/pkgo_test.csv 驗證結果。

若出現 Overfitting 的情況，嘗試使用以上抑制 Overfitting 的方法調整訓練模型的策略。