## Work
### 請結合前面的知識與程式碼，比較不同的 regularization 的組合對訓練的結果與影響：如 dropout, regularizers, batch-normalization 等

In [1]:
import os
import keras
import itertools
# Disable GPU
os.environ["CUDA_VISIBLE_DEVICES"] = ""

Using TensorFlow backend.


In [2]:
train, test = keras.datasets.cifar10.load_data()

In [3]:
## 資料前處理
def preproc_x(x, flatten=True):
    x = x / 255.
    if flatten:
        x = x.reshape((len(x), -1))
    return x

def preproc_y(y, num_classes=10):
    if y.shape[-1] == 1:
        y = keras.utils.to_categorical(y, num_classes)
    return y    

In [4]:
x_train, y_train = train
x_test, y_test = test

# Preproc the inputs
x_train = preproc_x(x_train)
x_test = preproc_x(x_test)

# Preprc the outputs
y_train = preproc_y(y_train)
y_test = preproc_y(y_test)

In [5]:
from keras.layers import BatchNormalization, ReLU, Dropout
from keras.regularizers import l1, l2, l1_l2

def build_mlp(input_shape, output_units=10, num_neurons=[512, 256, 128], is_norm_after_ac=True, l2ratio=1e-4, drop=0.25):
    """Code Here
    建立你的神經網路
    """
    input_layer = keras.layers.Input(input_shape)
    
    for i, n_units in enumerate(num_neurons):
        if i == 0:
            x = keras.layers.Dense(units=n_units,  
                                   name="hidden_layer"+str(i+1),
                                   kernel_regularizer=l2(l2ratio))(input_layer)
            if is_norm_after_ac == True:
                x = ReLU()(x)
                x = BatchNormalization()(x)
            else:
                x = BatchNormalization()(x)
                x = ReLU()(x)
            x = Dropout(drop)(x)
        else:
            x = keras.layers.Dense(units=n_units,  
                                   name="hidden_layer"+str(i+1),
                                   kernel_regularizer=l2(l2ratio))(x)
            if is_norm_after_ac == True:
                x = ReLU()(x)
                x = BatchNormalization()(x)
            else:
                x = BatchNormalization()(x)
                x = ReLU()(x)
            x = Dropout(drop)(x)
    
    out = keras.layers.Dense(units=output_units, activation="softmax", name="output")(x)
    
    model = keras.models.Model(inputs=[input_layer], outputs=[out])
    return model

In [6]:
"""Code Here
設定超參數
"""
## 超參數設定
LEARNING_RATE = 1e-3
EPOCHS = 50
MOMENTUM = 0.95
BATCH_SIZE = [32, 256]
L2_EXP = [1e-4, 1e-12]
IS_NORM_AFTER_AC = [True, False]
DROPOUT = [0.25, 0.75]

In [7]:
results = {}
"""Code Here
撰寫你的訓練流程並將結果用 dictionary 紀錄
"""
for is_norm_after_ac in IS_NORM_AFTER_AC:
    for l2ratio in L2_EXP:
        for drop in DROPOUT:
            model = build_mlp(input_shape=x_train.shape[1:], is_norm_after_ac=is_norm_after_ac, l2ratio=l2ratio, drop=drop)
            model.summary()
            optimizer = keras.optimizers.SGD(lr=LEARNING_RATE, nesterov=True, momentum=MOMENTUM)
            model.compile(loss="categorical_crossentropy", metrics=["accuracy"], optimizer=optimizer)

            for batch in BATCH_SIZE:
                model.fit(x_train, y_train, 
                          epochs=EPOCHS, 
                          batch_size=batch, 
                          validation_data=(x_test, y_test), 
                          shuffle=True)

                # Collect results
                train_loss = model.history.history["loss"]
                valid_loss = model.history.history["val_loss"]
                train_acc = model.history.history["acc"]
                valid_acc = model.history.history["val_acc"]

                results['batch_'+str(batch)+"_is_norm_after_ac="+str(is_norm_after_ac)+"_drop="+str(drop)+"l2ratio="+str(l2ratio)] = [train_loss, valid_loss, train_acc, valid_acc]

W0722 14:06:31.704648 4508657088 deprecation_wrapper.py:119] From /Users/ianfan/anaconda3/envs/tensorflow/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0722 14:06:31.718039 4508657088 deprecation_wrapper.py:119] From /Users/ianfan/anaconda3/envs/tensorflow/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0722 14:06:31.720273 4508657088 deprecation_wrapper.py:119] From /Users/ianfan/anaconda3/envs/tensorflow/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0722 14:06:31.846626 4508657088 deprecation_wrapper.py:119] From /Users/ianfan/anaconda3/envs/tensorflow/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:133: The name tf.placeholder_with_default 

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 3072)              0         
_________________________________________________________________
hidden_layer1 (Dense)        (None, 512)               1573376   
_________________________________________________________________
re_lu_1 (ReLU)               (None, 512)               0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 512)               2048      
_________________________________________________________________
dropout_1 (Dropout)          (None, 512)               0         
_________________________________________________________________
hidden_layer2 (Dense)        (None, 256)               131328    
_________________________________________________________________
re_lu_2 (ReLU)               (None, 256)               0         
__________

W0722 14:06:32.444844 4508657088 deprecation.py:323] From /Users/ianfan/anaconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/ops/math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Train on 50000 samples, validate on 10000 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


NameError: name 'is_norm_after_acc' is not defined

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
"""Code Here
將結果繪出
"""

In [None]:
for is_norm_after_ac in IS_NORM_AFTER_AC:
    for l2ratio in L2_EXP:
        for drop in DROPOUT:
            for batch in BATCH_SIZE:
                key = 'batch_'+str(batch)+"_is_norm_after_ac="+str(is_norm_after_ac)+"_drop="+str(drop)+"l2ratio="+str(l2ratio)
                result = results[key]
                train_loss = result[0]
                valid_loss = result[1]
                train_acc = result[2]
                valid_acc = result[3]

                plt.plot(range(len(train_loss)), train_loss, label="train loss")
                plt.plot(range(len(valid_loss)), valid_loss, label="valid loss")
                plt.legend()
                plt.title("Loss_"+key)
                plt.show()

                plt.plot(range(len(train_acc)), train_acc, label="train accuracy")
                plt.plot(range(len(valid_acc)), valid_acc, label="valid accuracy")
                plt.legend()
                plt.title("Accuracy_"+key)
                plt.show()