## 実践的なガイドライン
- 初期値 : Heの初期値
- 活性化関数 : ELU
- 正則化 : バッチ正規化、Dropout
- オプティマイザ : NAG
- 学習率のスケジューリング : なし

In [0]:
'''前準備'''
# Common imports
import numpy as np

# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

In [0]:
'''構築フェーズ'''
import tensorflow as tf
from functools import partial

# ハイパーパラメータ
batch_norm_momentum = 0.9
dropout_rate = 0.5
learning_rate = 0.01
momentum = 0.9

# ニューロン数
n_inputs = 28*28
n_hidden1 = 500
n_hidden2 = 100
n_hidden3 = 50
n_hidden4 = 30
n_outputs = 10

# グラフのリセット
reset_graph()

# プレースホルダー
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name='X')
y = tf.placeholder(tf.int64, shape=(None), name='y')
training = tf.placeholder_with_default(False, shape=(), name='training')
X_drop = tf.layers.dropout(X, dropout_rate, training=training)

# グラフの構築(正則化 : バッチ処理、Dropout)
with tf.name_scope('dnn'):
    # Heの初期値
    he_init = tf.variance_scaling_initializer()
    
    # バッチ処理
    my_batch_norm_layer = partial(
        tf.layers.batch_normalization,
        training = training,
        momentum = batch_norm_momentum)
    
    # 各層
    my_dense_layer = partial(
        tf.layers.dense,
        kernel_initializer = he_init)
    
    # 隠れ層1
    hidden1 = my_dense_layer(X_drop, n_hidden1, name='hidden1')
    bn1 = tf.nn.elu(my_batch_norm_layer(hidden1))
    bn1_drop = tf.layers.dropout(bn1, dropout_rate, training=training)
    
    # 隠れ層2
    hidden2 = my_dense_layer(bn1_drop, n_hidden2, name='hidden2')
    bn2 = tf.nn.elu(my_batch_norm_layer(hidden2))
    bn2_drop = tf.layers.dropout(bn2, dropout_rate, training=training)
    
    # 隠れ層3
    hidden3 = my_dense_layer(bn2_drop, n_hidden3, name='hidden3')
    bn3 = tf.nn.elu(my_batch_norm_layer(hidden3))
    bn3_drop = tf.layers.dropout(bn3, dropout_rate, training=training)
    
    # 隠れ層4
    hidden4 = my_dense_layer(bn3_drop, n_hidden4, name='hidden4')
    bn4 = tf.nn.elu(my_batch_norm_layer(hidden4))
    bn4_drop = tf.layers.dropout(bn4, dropout_rate, training=training)
    
    # 出力層
    logits_before_bn = my_dense_layer(bn4_drop, n_outputs, name='logits')
    logits = my_batch_norm_layer(logits_before_bn)
    
# 誤差関数
with tf.name_scope('loss'):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name='loss')

# 重みの更新
with tf.name_scope('train'):
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    training_op = optimizer.minimize(loss)

# モデルの評価
with tf.name_scope('eval'):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [0]:
import numpy as np
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train = X_train.astype(np.float32).reshape(-1, 28*28) / 255.0
X_test = X_test.astype(np.float32).reshape(-1, 28*28) / 255.0
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)
X_valid, X_train = X_train[:5000], X_train[5000:]
y_valid, y_train = y_train[:5000], y_train[5000:]

def shuffle_batch(X, y, batch_size):
    rnd_idx = np.random.permutation(len(X))
    n_batches = len(X) // batch_size
    for batch_idx in np.array_split(rnd_idx, n_batches):
        X_batch, y_batch = X[batch_idx], y[batch_idx]
        yield X_batch, y_batch

In [39]:
'''実行フェーズ'''
n_epochs = 10
batch_size = 50

# BatchNormalization用
extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run([training_op, extra_update_ops], 
                    feed_dict={training: True, X: X_batch, y: y_batch})
        accuracy_batch = accuracy.eval(feed_dict={X: X_train, y: y_train})
        accuracy_val = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
        print(epoch, "Batch accurcy:", accuracy_batch, "Val accuracy:", accuracy_val)
        
    save_path = saver.save(sess, "./my_model_final.ckpt")

0 Batch accurcy: 0.91956365 Val accuracy: 0.9262
1 Batch accurcy: 0.93832725 Val accuracy: 0.9432
2 Batch accurcy: 0.94856364 Val accuracy: 0.9536
3 Batch accurcy: 0.9508182 Val accuracy: 0.9536
4 Batch accurcy: 0.9578182 Val accuracy: 0.9606
5 Batch accurcy: 0.958 Val accuracy: 0.9638
6 Batch accurcy: 0.9606182 Val accuracy: 0.9624
7 Batch accurcy: 0.9666 Val accuracy: 0.9696
8 Batch accurcy: 0.9662 Val accuracy: 0.97
9 Batch accurcy: 0.9685818 Val accuracy: 0.9698


## プレトレーニング済み層の再利用

In [0]:
'''構築フェーズ'''
import tensorflow as tf
from functools import partial

# ハイパーパラメータ
batch_norm_momentum = 0.9
dropout_rate = 0.5
learning_rate = 0.01
momentum = 0.9

# ニューロン数
n_inputs = 28*28
n_hidden1 = 500
n_hidden2 = 100
n_hidden3 = 50
n_hidden4 = 30  # new
n_outputs = 10  # new

# グラフのリセット
reset_graph()

# プレースホルダー
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name='X')
y = tf.placeholder(tf.int64, shape=(None), name='y')
training = tf.placeholder_with_default(False, shape=(), name='training')
X_drop = tf.layers.dropout(X, dropout_rate, training=training)

# グラフの構築(正則化 : バッチ処理、Dropout)
with tf.name_scope('dnn'):
    # Heの初期値
    he_init = tf.variance_scaling_initializer()
    
    # バッチ処理
    my_batch_norm_layer = partial(
        tf.layers.batch_normalization,
        training = training,
        momentum = batch_norm_momentum)
    
    # 各層
    my_dense_layer = partial(
        tf.layers.dense,
        kernel_initializer = he_init)
    
    # 隠れ層1
    hidden1 = my_dense_layer(X_drop, n_hidden1, name='hidden1')
    bn1 = tf.nn.elu(my_batch_norm_layer(hidden1))
    bn1_drop = tf.layers.dropout(bn1, dropout_rate, training=training)
    
    # 隠れ層2
    hidden2 = my_dense_layer(bn1_drop, n_hidden2, name='hidden2')
    bn2 = tf.nn.elu(my_batch_norm_layer(hidden2))
    bn2_drop = tf.layers.dropout(bn2, dropout_rate, training=training)
    
    # 隠れ層3
    hidden3 = my_dense_layer(bn2_drop, n_hidden3, name='hidden3')
    bn3 = tf.nn.elu(my_batch_norm_layer(hidden3))
    bn3_drop = tf.layers.dropout(bn3, dropout_rate, training=training)
    
    # 隠れ層4 new
    hidden4 = my_dense_layer(bn3_drop, n_hidden4, name='hidden4')
    bn4 = tf.nn.elu(my_batch_norm_layer(hidden4))
    bn4_drop = tf.layers.dropout(bn4, dropout_rate, training=training)
    
    # 出力層 new
    logits_before_bn = my_dense_layer(bn4_drop, n_outputs, name='logits')
    logits = my_batch_norm_layer(logits_before_bn)
    
# 誤差関数
with tf.name_scope('loss'):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name='loss')

# 重みの更新
with tf.name_scope('train'):
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    training_op = optimizer.minimize(loss)

# モデルの評価
with tf.name_scope('eval'):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

In [41]:
'''実行フェーズ'''
n_epochs = 20
batch_size = 50

# 再利用(隠れ層1~3)
reuse_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                               scope="hidden[123]") # regular expression
restore_saver = tf.train.Saver(reuse_vars) # to restore layers 1-3

init = tf.global_variables_initializer()
saver = tf.train.Saver()

# BatchNormalization用
extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

with tf.Session() as sess:
    init.run()
    restore_saver.restore(sess, "./my_model_final.ckpt")
    
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run([training_op, extra_update_ops], 
                    feed_dict={training: True, X: X_batch, y: y_batch})
        accuracy_batch = accuracy.eval(feed_dict={X: X_train, y: y_train})
        accuracy_val = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
        print(epoch, "Batch accurcy:", accuracy_batch, "Val accuracy:", accuracy_val)
        
    save_path = saver.save(sess, "./my_new_model_final.ckpt")

INFO:tensorflow:Restoring parameters from ./my_model_final.ckpt
0 Batch accurcy: 0.95863634 Val accuracy: 0.9596
1 Batch accurcy: 0.9641273 Val accuracy: 0.9664
2 Batch accurcy: 0.9657818 Val accuracy: 0.967
3 Batch accurcy: 0.9651818 Val accuracy: 0.966
4 Batch accurcy: 0.96892726 Val accuracy: 0.9704
5 Batch accurcy: 0.9677455 Val accuracy: 0.971
6 Batch accurcy: 0.9703091 Val accuracy: 0.969
7 Batch accurcy: 0.9717636 Val accuracy: 0.973
8 Batch accurcy: 0.97238183 Val accuracy: 0.9724
9 Batch accurcy: 0.9728182 Val accuracy: 0.9734
10 Batch accurcy: 0.9736 Val accuracy: 0.9734
11 Batch accurcy: 0.9711091 Val accuracy: 0.9714
12 Batch accurcy: 0.9734 Val accuracy: 0.9724
13 Batch accurcy: 0.97425455 Val accuracy: 0.9742
14 Batch accurcy: 0.97545457 Val accuracy: 0.9758
15 Batch accurcy: 0.9724909 Val accuracy: 0.973
16 Batch accurcy: 0.9758545 Val accuracy: 0.9758
17 Batch accurcy: 0.9766727 Val accuracy: 0.977
18 Batch accurcy: 0.97565454 Val accuracy: 0.9748
19 Batch accurcy: 0.97

In [0]:
for op in tf.get_default_graph().get_operations():
    print(op.name)