In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow

In [2]:
from tensorflow import keras

In [3]:
(x_train_full, y_train_full),(x_test, y_test) = keras.datasets.fashion_mnist.load_data()
x_train_full, x_test = x_train_full / 255.0, x_test / 255.0
x_valid, x_train = x_train_full[:5000], x_train_full[5000:]
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]

## 활성화 함수

In [None]:
''' 
## 활성화 함수
# HE init
keras.layers.Dense(10, activation='relu', kernel_initializer='he_noraml')

# fan out HE init
he_avg_init = keras.initializers.VarianceScaling(scale=2., mode='fan_avg', distribution='uniform')
keras.layers.Dense(10, activation='sigmoid', kernel_initializer=he_avg_init)
'''
'''
# LeakyReLU 활성화
model = keras.models.Sequential([
    keras.layers.Dense(10, kernel_initializer='he_normal'),
    keras.layers.LeakyReLU(alpha=0.2),
])
'''
'''
# SELU 활성화
model = keras.models.Sequential([
    keras.layers.Dense(10, activation='selu', kernel_initializer='lecun_normal'),
])
'''

In [4]:
def modelcompile(model):
        model.compile(loss = keras.losses.sparse_categorical_crossentropy, 
                  optimizer = keras.optimizers.SGD(1e-3),
                 metrics=[keras.metrics.sparse_categorical_accuracy])
        return model

In [5]:
# LeakyReLU 활성화
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]),
    keras.layers.Dense(300, kernel_initializer='he_normal'),
    keras.layers.LeakyReLU(),
    keras.layers.Dense(100, kernel_initializer='he_normal'),
    keras.layers.LeakyReLU(),
    keras.layers.Dense(10, activation='softmax')
])

In [6]:
modelcompile(model)

<tensorflow.python.keras.engine.sequential.Sequential at 0x65a17c4d0>

In [7]:
history = model.fit(x_train, y_train, epochs=10, validation_data=(x_valid, y_valid))

Train on 55000 samples, validate on 5000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [8]:
# PReLU 활성화
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]),
    keras.layers.Dense(300, kernel_initializer='he_normal'),
    keras.layers.PReLU(),
    keras.layers.Dense(100, kernel_initializer='he_normal'),
    keras.layers.PReLU(),
    keras.layers.Dense(10, activation='softmax')
])

In [9]:
modelcompile(model)

<tensorflow.python.keras.engine.sequential.Sequential at 0x67a2149d0>

In [10]:
history2 = model.fit(x_train, y_train, epochs = 10, validation_data=(x_valid, y_valid))

Train on 55000 samples, validate on 5000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [11]:
# SELU
model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=[28, 28]))
model.add(keras.layers.Dense(300, activation='selu', kernel_initializer='lecun_normal'))

for layer in range(99):
    model.add(keras.layers.Dense(100, activation='selu', kernel_initializer='lecun_normal'))
model.add(keras.layers.Dense(10, activation='softmax'))

In [12]:
modelcompile(model)

<tensorflow.python.keras.engine.sequential.Sequential at 0x67e1b4790>

In [11]:
# 이제 훈련해 보죠. 입력을 평균 0과 표준 편차 1로 바꾸어야 한다는 것을 잊지 마세요:
pixel_means = x_train.mean(axis=0, keepdims=True)
pixel_stds = x_train.std(axis=0, keepdims=True)

In [12]:
x_train_scaled = (x_train - pixel_means) / pixel_stds
x_valid_scaled = (x_valid - pixel_means) / pixel_stds
x_test_scaled = (x_test - pixel_means) / pixel_stds

In [21]:
history = model.fit(x_train_scaled, y_train, epochs = 5, validation_data=(x_valid_scaled, y_valid) )

Train on 55000 samples, validate on 5000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [25]:
model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=[28, 28]))
model.add(keras.layers.Dense(300, activation='relu', kernel_initializer='he_normal'))
for layer in range(99):
    model.add(keras.layers.Dense(100, activation='relu', kernel_initializer='he_normal'))
model.add(keras.layers.Dense(10, activation='softmax'))

In [26]:
modelcompile(model)

<tensorflow.python.keras.engine.sequential.Sequential at 0x645fd5150>

In [28]:
history = model.fit(x_train_scaled, y_train, epochs = 5, validation_data = (x_valid_scaled, y_valid))

Train on 55000 samples, validate on 5000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


## Batch Nomarlizaion

In [29]:
# Batch Nomarlization #1
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]),
    keras.layers.BatchNormalization(),
    keras.layers.Dense(300, activation='relu'),
    keras.layers.BatchNormalization(),
    keras.layers.Dense(100, activation='relu'),
    keras.layers.BatchNormalization(),
    keras.layers.Dense(10, activation='softmax'),
])

In [30]:
model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_4 (Flatten)          (None, 784)               0         
_________________________________________________________________
batch_normalization (BatchNo (None, 784)               3136      
_________________________________________________________________
dense_208 (Dense)            (None, 300)               235500    
_________________________________________________________________
batch_normalization_1 (Batch (None, 300)               1200      
_________________________________________________________________
dense_209 (Dense)            (None, 100)               30100     
_________________________________________________________________
batch_normalization_2 (Batch (None, 100)               400       
_________________________________________________________________
dense_210 (Dense)            (None, 10)               

In [31]:
modelcompile(model)

<tensorflow.python.keras.engine.sequential.Sequential at 0x64a22bcd0>

In [32]:
history = model.fit(x_train, y_train, epochs = 10, validation_data=(x_valid, y_valid))

Train on 55000 samples, validate on 5000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [35]:
# Batch Nomarlization #2
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]),
    keras.layers.BatchNormalization(),
    keras.layers.Dense(300, use_bias=False),
    keras.layers.BatchNormalization(),
    keras.layers.Activation('relu'),
    keras.layers.Dense(100, use_bias=False),
    keras.layers.BatchNormalization(),
    keras.layers.Activation('relu'),
    keras.layers.Dense(10, activation='softmax'),    
])

In [36]:
modelcompile(model)
history = model.fit(x_train, y_train, epochs = 10, validation_data=(x_valid, y_valid))

Train on 55000 samples, validate on 5000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [37]:
# Batch Nomarlization #3
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]),
    keras.layers.BatchNormalization(),
    keras.layers.Dense(300, activation='elu', kernel_initializer='he_normal'),
    keras.layers.BatchNormalization(),
    keras.layers.Dense(100, activation='elu', kernel_initializer='he_normal'),
    keras.layers.BatchNormalization(),
    keras.layers.Dense(10, activation='softmax'),
])

In [15]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_3 (Flatten)          (None, 784)               0         
_________________________________________________________________
batch_normalization_9 (Batch (None, 784)               3136      
_________________________________________________________________
dense_9 (Dense)              (None, 300)               235500    
_________________________________________________________________
batch_normalization_10 (Batc (None, 300)               1200      
_________________________________________________________________
dense_10 (Dense)             (None, 100)               30100     
_________________________________________________________________
batch_normalization_11 (Batc (None, 100)               400       
_________________________________________________________________
dense_11 (Dense)             (None, 10)               

In [16]:
[(var.name, var.trainable) for var in model.layers[1].variables]

[('batch_normalization_9/gamma:0', True),
 ('batch_normalization_9/beta:0', True),
 ('batch_normalization_9/moving_mean:0', False),
 ('batch_normalization_9/moving_variance:0', False)]

In [17]:
model.layers[1].updates

[<tf.Operation 'cond_6/Identity' type=Identity>,
 <tf.Operation 'cond_7/Identity' type=Identity>]

In [38]:
modelcompile(model)
history = model.fit(x_train, y_train, epochs = 10, validation_data=(x_valid, y_valid))

Train on 55000 samples, validate on 5000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [39]:
# Batch Nomarlization #4
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]),
    keras.layers.BatchNormalization(),
    keras.layers.Dense(300, kernel_initializer='he_normal', use_bias=False),
    keras.layers.BatchNormalization(),
    keras.layers.Activation('elu'),
    keras.layers.Dense(100, kernel_initializer='he_normal', use_bias=False),
    keras.layers.BatchNormalization(),
    keras.layers.Activation('elu'),
    keras.layers.Dense(10, activation='softmax'),    
])

In [40]:
print(model.summary())
[(var.name, var.trainable) for var in model.layers[3].variables]

Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_8 (Flatten)          (None, 784)               0         
_________________________________________________________________
batch_normalization_12 (Batc (None, 784)               3136      
_________________________________________________________________
dense_220 (Dense)            (None, 300)               235200    
_________________________________________________________________
batch_normalization_13 (Batc (None, 300)               1200      
_________________________________________________________________
activation_4 (Activation)    (None, 300)               0         
_________________________________________________________________
dense_221 (Dense)            (None, 100)               30000     
_________________________________________________________________
batch_normalization_14 (Batc (None, 100)              

[('batch_normalization_13/gamma:0', True),
 ('batch_normalization_13/beta:0', True),
 ('batch_normalization_13/moving_mean:0', False),
 ('batch_normalization_13/moving_variance:0', False)]

In [41]:
modelcompile(model)
history = model.fit(x_train, y_train, epochs = 10, validation_data=(x_valid, y_valid))

Train on 55000 samples, validate on 5000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## 그레이디언트 클리핑

In [23]:
# 클리핑이 벡터의 방향을 바꾸지 못하게 할려면 clipvalue -> clipnorm
optimizer = keras.optimizers.SGD(clipvalue = 1.0)
optimizer = keras.optimizers.SGD(clipnorm = 1.0)
model.compile(loss='mse', optimizer=optimizer)

## 사전 훈련된 층 재사용하기

In [43]:
def split_dataset(x, y):
    y_5_or_6 = (y==5) | (y==6)
    y_A = y[~y_5_or_6]
    y_A[y_A > 6] -= 2
    y_B = (y[y_5_or_6] ==6).astype(np.float32)
    
    return ((x[~y_5_or_6], y_A), (x[y_5_or_6], y_B))

In [44]:
(x_train_A, y_train_A), (x_train_B, y_train_B) = split_dataset(x_train, y_train)
(x_valid_A, y_valid_A), (x_valid_B, y_valid_B) = split_dataset(x_valid, y_valid)
(x_test_A, y_test_A), (x_test_B, y_test_B) = split_dataset(x_test, y_test)
x_train_B = x_train_B[:200]
y_train_B = y_train_B[:200]

In [47]:
x_train_A.shape, x_train_B.shape, 

((43986, 28, 28), (200, 28, 28))

In [48]:
y_train_A[:30], y_train_B[:30]

(array([4, 0, 5, 7, 7, 7, 4, 4, 3, 4, 0, 1, 6, 3, 4, 3, 2, 6, 5, 3, 4, 5,
        1, 3, 4, 2, 0, 6, 7, 1], dtype=uint8),
 array([1., 1., 0., 0., 0., 0., 1., 1., 1., 0., 0., 1., 1., 0., 0., 0., 0.,
        0., 0., 1., 1., 0., 0., 1., 1., 0., 1., 1., 1., 1.], dtype=float32))

In [49]:
model_A = keras.models.Sequential()
model_A.add(keras.layers.Flatten(input_shape=[28, 28]))
for n_hidden in (300, 100, 50, 50, 50):
    model_A.add(keras.layers.Dense(n_hidden, activation='selu'))
model_A.add(keras.layers.Dense(8, activation='softmax'))

In [50]:
modelcompile(model_A)
history_A = model_A.fit(x_train_A, y_train_A, epochs=20, validation_data=(x_valid_A, y_valid_A))

Train on 43986 samples, validate on 4014 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [68]:
model_A.save('./models/my_model_A.h5')

In [79]:
model_B = keras.models.Sequential()
model_B.add(keras.layers.Flatten(input_shape=[28, 28]))
for n_hidden in (300, 100, 50, 50, 50):
    model_B.add(keras.layers.Dense(n_hidden, activation='selu'))
model_B.add(keras.layers.Dense(1, activation='sigmoid'))

In [80]:
model_B.compile(loss="binary_crossentropy",
                optimizer='sgd',
                metrics=["accuracy"])

In [81]:
hitory_B =  model_B.fit(x_train_B, y_train_B, epochs=20, validation_data=(x_valid_B, y_valid_B))

Train on 200 samples, validate on 986 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [83]:
model_B.save('./models/my_model_B.h5')

In [85]:
model_A = keras.models.load_model('./models/my_model_A.h5')
model_B_on_A = keras.models.Sequential(model_A.layers[:-1])
model_B_on_A.add(keras.layers.Dense(1, activation='sigmoid'))

In [86]:
model_A_clone = keras.models.clone_model(model_A)
model_A_clone.set_weights(model_A.get_weights())

In [87]:
for layer in model_B_on_A.layers[:-1]:
    layer.trainable = False
    
model_B_on_A.compile(loss = 'binary_crossentropy', optimizer ='sgd', metrics=['accuracy'])

In [88]:
history = model_B_on_A.fit(x_train_B, y_train_B, epochs=4, validation_data=(x_valid_B, y_valid_B))

Train on 200 samples, validate on 986 samples
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


In [89]:
for layer in model_B_on_A.layers[:-1]:
    layer.trainable = True

In [90]:
optimizer = keras.optimizers.SGD(1e-4)
model_B_on_A.compile(loss="binary_crossentropy",
                    optimizer = optimizer,
                    metrics=['accuracy'])

In [92]:
history = model_B_on_A.fit(x_train_B, y_train_B, epochs=16, validation_data=(x_valid_B, y_valid_B))

Train on 200 samples, validate on 986 samples
Epoch 1/16
Epoch 2/16
Epoch 3/16
Epoch 4/16
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoch 11/16
Epoch 12/16
Epoch 13/16
Epoch 14/16
Epoch 15/16
Epoch 16/16


In [93]:
model_B_on_A.evaluate(x_test_B, y_test_B)



[0.11179840433597564, 0.993]

In [94]:
model_B.evaluate(x_test_B, y_test_B)



[0.029746141504496337, 0.989]

## 고속 옵티마이저

In [None]:
# 모멘텀 최적화 0.9에서 보통 잘 작동됨. == momentum
optimizer = keras.optimizers.SGD(lr=0.001, momentum=0.9)

# 네스테로프 가속 경사  == nesterov , monentum 도 같이 해야함
optimizer = keras.optimizers.SGD(lr=0.001, momentum = 0.9, nesterov = True)

# AdaGrad
optimizer = keras.optimizers.Adagrad(lr=0.001)

# RMSProp
opmizer = keras.optimizers.RMSprop(lr = 0.001, rho=0.9)

In [None]:
# Adam Optimizer basic init
optimizer = keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999)

# AdaMax / Nadam  Adam 변종 최적화
# AdamMax -> 실전에선 Adam보다 안정적이지만 성능이 더 낮음
# Adam이 잘 작도오디지 않는다면 시도해볼 옵디마이저 중 하나
optimizer = keras.optimizers.Adamax(lr=0.001, beta_1=0.9, beta_2=0.999)
# Nadam -> Adam + newterov
optimizer = keras.optimizers.Nadam(lr=0.001, beta_1=0.9, beta_2=0.999)

## 학습 스케줄링

In [5]:
# 거듭제근 기반 스케줄링 -> decay 매개변수만 지정
optimizer = keras.optimizers.SGD(lr=0.01, decay = 1e-4)

In [6]:
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]),
    keras.layers.Dense(300, activation='selu', kernel_initializer='lecun_normal'),
    keras.layers.Dense(100, activation='selu', kernel_initializer='lecun_normal'),
    keras.layers.Dense(10, activation='softmax')
])

In [7]:
model.compile(loss = keras.losses.sparse_categorical_crossentropy,
             optimizer = optimizer,
             metric = [keras.metrics.sparse_categorical_accuracy])
n_epochs = 25
history = model.fit(x_train, y_train, epochs=n_epochs, validation_data=(x_valid, y_valid))

Train on 55000 samples, validate on 5000 samples
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


In [8]:
learning_rate = 0.01
decay = 1e-4
batch_size = 32
n_steps_per_epoch = len(x_train) // batch_size
epochs = np.arange(n_epochs)
lrs = learning_rate / (1 + decay * epochs * n_steps_per_epoch)

In [None]:
plt.plot(epochs, lrs,  "o-")
plt.axis([0, n_epochs - 1, 0, 0.01])
plt.xlabel("Epoch")
plt.ylabel("Learning Rate")
plt.title("Power Scheduling", fontsize=14)
plt.grid(True)
plt.show()

In [4]:
# 지수 기반 스케일링
def exponential_decay_fn(epoch):
    return 0.01 * 0.1 **(epoch/20)

In [7]:
def exponential_decay(lr0, s):
    def exponential_decay_fn(epoch):
        return 0.01 * 0.1 **(epoch/20)
    return exponential_decay_fn

exponential_decay_fn = exponential_decay(lr0 = 0.01, s = 20)

In [9]:
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]),
    keras.layers.Dense(300, activation='selu', kernel_initializer='lecun_normal'),
    keras.layers.Dense(100, activation='selu', kernel_initializer='lecun_normal'),
    keras.layers.Dense(10, activation='softmax')
])

In [10]:
model.compile(loss = keras.losses.sparse_categorical_crossentropy,
             optimizer = 'nadam',
             metrics = ['accuracy'])
n_epochs = 25

In [16]:
lr_scheduler = keras.callbacks.LearningRateScheduler(exponential_decay_fn)
history = model.fit(x_train_scaled, y_train, epochs = n_epochs, validation_data=(x_valid_scaled, y_valid), callbacks=[lr_scheduler])

Train on 55000 samples, validate on 5000 samples
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


In [None]:
plt.plot(steps, lrs, "-", linewidth=2)
plt.axis([0, n_steps - 1, 0, lr0 * 1.1])
plt.xlabel("Batch")
plt.ylabel("Learning Rate")
plt.title("Exponential Scheduling (per batch)", fontsize=14)
plt.grid(True)
plt.show()

## 규제를 사용해 과대적합 피하기

In [22]:
# l1과 l2 규제
layer = keras.layers.Dense(100, activation='elu', 
                           kernel_initializer='he_normal', 
                           kernel_regularizer=keras.regularizers.l2(0.01))

In [23]:
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]),
    keras.layers.Dense(300, activation ='elu',
                      kernel_initializer='he_normal',
                      kernel_regularizer=keras.regularizers.l2(0.01)),
    keras.layers.Dense(100, activation ='elu',
                      kernel_initializer='he_normal',
                      kernel_regularizer=keras.regularizers.l2(0.01)),
    keras.layers.Dense(300, activation ='softmax',
                      kernel_regularizer=keras.regularizers.l2(0.01)),
])

In [25]:
model.compile(loss = keras.losses.sparse_categorical_crossentropy,
             optimizer = keras.optimizers.Nadam(),
             metrics = [keras.metrics.sparse_categorical_accuracy])

In [26]:
history = model.fit(x_train_scaled, y_train, epochs=2, validation_data=(x_valid_scaled, y_valid))

Train on 55000 samples, validate on 5000 samples
Epoch 1/2
Epoch 2/2


In [31]:
# Code Refactoring
from functools import partial

RegularizedDense = partial(keras.layers.Dense,
                          activation='elu',
                          kernel_initializer = 'he_normal',
                          kernel_regularizer = keras.regularizers.l2(0.01))

model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]),
    RegularizedDense(300),
    RegularizedDense(100),
    RegularizedDense(10, activation='softmax', kernel_initializer='glorot_uniform')
])

In [34]:
model.compile(loss = keras.losses.sparse_categorical_crossentropy,
             optimizer = keras.optimizers.Nadam(),
             metrics = [keras.metrics.sparse_categorical_accuracy])

In [35]:
history = model.fit(x_train_scaled, y_train, epochs=2, validation_data=(x_valid_scaled, y_valid))

Train on 55000 samples, validate on 5000 samples
Epoch 1/2
Epoch 2/2


## DropOut

In [36]:
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]),
    keras.layers.Dropout(0.2),
    keras.layers.Dense(300, activation='elu', kernel_initializer='he_normal'),
    keras.layers.Dropout(0.2),
    keras.layers.Dense(100, activation='elu', kernel_initializer='he_normal'),
    keras.layers.Dropout(0.2),
    keras.layers.Dense(10, activation='softmax')
])

In [37]:
model.compile(loss = keras.losses.sparse_categorical_crossentropy,
             optimizer = keras.optimizers.Nadam(),
             metrics = [keras.metrics.sparse_categorical_accuracy])

In [39]:
history = model.fit(x_train_scaled, y_train, epochs =2 , validation_data=(x_valid_scaled, y_valid))

Train on 55000 samples, validate on 5000 samples
Epoch 1/2
Epoch 2/2


In [40]:
# SELU base dropout
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]),
    keras.layers.AlphaDropout(0.2),
    keras.layers.Dense(300, activation='selu', kernel_initializer='lecun_normal'),
    keras.layers.AlphaDropout(0.2),
    keras.layers.Dense(100, activation='selu', kernel_initializer='lecun_normal'),
    keras.layers.AlphaDropout(0.2),
    keras.layers.Dense(10, activation='softmax')
])
optimizer = keras.optimizers.SGD(lr = 0.01, momentum=0.9, nesterov=True)

In [41]:
model.compile(loss = keras.losses.sparse_categorical_crossentropy,
             optimizer =optimizer,
             metrics = [keras.metrics.sparse_categorical_accuracy])

In [42]:
history = model.fit(x_train_scaled, y_train, epochs =2 , validation_data=(x_valid_scaled, y_valid))

Train on 55000 samples, validate on 5000 samples
Epoch 1/2
Epoch 2/2


In [43]:
model.evaluate(x_test_scaled, y_test)



[0.5896442852497101, 0.8373]

In [44]:
model.evaluate(x_test_scaled, y_test)



[0.5896442852497101, 0.8373]

In [45]:
history = model.fit(x_train_scaled, y_train)

Train on 55000 samples


## Monete Carlo dropout

In [49]:
y_probas = np.stack([model(x_test_scaled, training=True) for sample in range(100)])

In [50]:
y_proba = y_probas.mean(axis=0)

In [51]:
y_std = y_probas.std(axis = 0)

In [56]:
np.round(model.predict(x_test_scaled[:1]), 2)

array([[0.  , 0.  , 0.  , 0.  , 0.  , 0.01, 0.  , 0.68, 0.  , 0.31]],
      dtype=float32)

In [58]:
np.round(y_probas[:, :1], 2)

array([[[0.  , 0.  , 0.  , 0.  , 0.  , 0.07, 0.  , 0.33, 0.  , 0.6 ]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.1 , 0.  , 0.29, 0.  , 0.61]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.01, 0.  , 0.74, 0.  , 0.25]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.75, 0.  , 0.13, 0.  , 0.11]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.21, 0.  , 0.31, 0.  , 0.48]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.46, 0.  , 0.43, 0.  , 0.11]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.17, 0.  , 0.65, 0.  , 0.18]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.3 , 0.  , 0.59, 0.01, 0.11]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.21, 0.  , 0.58, 0.  , 0.2 ]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.58, 0.  , 0.08, 0.  , 0.34]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.04, 0.  , 0.21, 0.  , 0.75]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.51, 0.  , 0.09, 0.  , 0.39]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.14, 0.  , 0.26, 0.  , 0.61]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.75, 0.  , 0.13, 0.  , 0

In [60]:
np.round(y_proba[:1], 2)

array([[0.  , 0.  , 0.  , 0.  , 0.  , 0.18, 0.  , 0.48, 0.  , 0.34]],
      dtype=float32)

In [61]:
np.round(y_std[:1], 2)

array([[0.  , 0.  , 0.  , 0.  , 0.  , 0.18, 0.  , 0.26, 0.01, 0.24]],
      dtype=float32)

In [65]:
y_pred = np.argmax(y_proba, axis=1)
accuracy = np.sum(y_pred == y_test) / len(y_test)
accuracy

0.8358

In [66]:
# BatchNormalization 층과 같은 층을 가지고 있으면 훈련 모드를 강제로 설정하면 안됨.
# 대신 dropout 층 아래와 같은 클래스를 작성하여 변경할 것
class MCDropout(keras.layers.Dropout):
    def call(self, inputs):
        return super().call(inputs, training =True)
class MCAlphaDropout(keras.layers.AlphaDropout):
    def call(self, inputs):
        return super().call(inputs, training=True)

In [67]:
mc_model = keras.models.Sequential([
    MCAlphaDropout(layer.rate) if isinstance(layer, keras.layers.AlphaDropout) else layer
    for layer in model.layers
])

In [68]:
mc_model.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_9 (Flatten)          (None, 784)               0         
_________________________________________________________________
mc_alpha_dropout (MCAlphaDro (None, 784)               0         
_________________________________________________________________
dense_19 (Dense)             (None, 300)               235500    
_________________________________________________________________
mc_alpha_dropout_1 (MCAlphaD (None, 300)               0         
_________________________________________________________________
dense_20 (Dense)             (None, 100)               30100     
_________________________________________________________________
mc_alpha_dropout_2 (MCAlphaD (None, 100)               0         
_________________________________________________________________
dense_21 (Dense)             (None, 10)               

In [69]:
optimizer = keras.optimizers.SGD(lr =0.01, momentum=0.9, nesterov=True)

In [70]:
mc_model.compile( loss = keras.losses.sparse_categorical_crossentropy,
                optimizer = optimizer,
                metrics = [keras.metrics.sparse_categorical_accuracy])

In [71]:
mc_model.set_weights(model.get_weights())

In [72]:
# 윗 단계를 해야 mcDropout을 사용 할 수 있음.
np.round(np.mean([mc_model.predict(x_test_scaled[:1]) for sample in range(100)], axis=0), 2)

array([[0.  , 0.  , 0.  , 0.  , 0.  , 0.21, 0.  , 0.51, 0.  , 0.28]],
      dtype=float32)

## MAX-Norm 규제

In [73]:
layer = keras.layers.Dense(100, activation='elu', 
                           kernel_initializer='he_normal', 
                           kernel_constraint=keras.constraints.max_norm(1.))

In [74]:
MaxNormDense = partial(keras.layers.Dense,
                      activation = 'selu', kernel_initializer='lecun_normal',
                      kernel_constraint = keras.constraints.max_norm(1.))

In [75]:
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]),
    MaxNormDense(300),
    MaxNormDense(100),
    keras.layers.Dense(10, activation='softmax')
])

In [77]:
model.compile(loss = keras.losses.sparse_categorical_crossentropy,
             optimizer = keras.optimizers.Nadam(),
             metrics = [keras.metrics.sparse_categorical_accuracy])

In [78]:
history = model.fit(x_train_scaled, y_train, epochs = 2, validation_data=(x_valid_scaled, y_valid))

Train on 55000 samples, validate on 5000 samples
Epoch 1/2
Epoch 2/2
