## 深層ニューラルネットワーク 

### DNN
- 隠れ層 : ニューロン数 100 × 5
- 初期値 : He
- 活性化関数 : ELU
- オプティマイザ : Adam
- 正則化 : 早期打ち切り

In [0]:
'''前準備'''
# Common imports
import tensorflow as tf
import numpy as np

# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

In [3]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train = X_train.astype(np.float32).reshape(-1, 28*28) / 255.0
X_test = X_test.astype(np.float32).reshape(-1, 28*28) / 255.0
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)
X_valid, X_train = X_train[:5000], X_train[5000:]
y_valid, y_train = y_train[:5000], y_train[5000:]

def shuffle_batch(X, y, batch_size):
    rnd_idx = np.random.permutation(len(X))
    n_batches = len(X) // batch_size
    for batch_idx in np.array_split(rnd_idx, n_batches):
        X_batch, y_batch = X[batch_idx], y[batch_idx]
        yield X_batch, y_batch

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [0]:
# 前設定

he_init = tf.variance_scaling_initializer()

# DNN
def dnn(inputs, n_hidden_layers=5, n_neurons=100, name=None,
       activation=tf.nn.elu, initializer=he_init):
    with tf.variable_scope(name, "dnn"):
        for layer in range(n_hidden_layers):
            inputs = tf.layers.dense(inputs, n_neurons, activation=activation,
                                     kernel_initializer=initializer,
                                     name = "hidden%d" % (layer + 1))
        return inputs

In [0]:
# 構築フェーズ

reset_graph()

n_inputs = 28*28
n_outputs = 5

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name='X')
y = tf.placeholder(tf.int64, shape=(None), name='y')

dnn_outputs = dnn(X)

logits = tf.layers.dense(dnn_outputs, n_outputs, kernel_initializer=he_init,  name='logits')
y_proba = tf.nn.softmax(logits, name='y_proba')

In [0]:
learning_rate = 0.01

xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
loss = tf.reduce_mean(xentropy, name="loss")

optimizer = tf.train.AdamOptimizer(learning_rate)
training_op = optimizer.minimize(loss, name="training_op")

correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")

init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [0]:
# データ
X_train1 = X_train[y_train < 5]
y_train1 = y_train[y_train < 5]
X_valid1 = X_valid[y_valid < 5]
y_valid1 = y_valid[y_valid < 5]
X_test1 = X_test[y_test < 5]
y_test1 = y_test[y_test < 5]

In [8]:
# 実行フェーズ(早期打ち切り)
import time

n_epochs = 1000
batch_size = 20

max_checks_without_progress = 20
checks_without_progress = 0
best_loss = np.infty

with tf.Session() as sess:
    init.run()
    t0 = time.time()

    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train1, y_train1, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={X: X_valid1, y: y_valid1})
        if loss_val < best_loss:
            save_path = saver.save(sess, "./my_mnist_model_0_to_4.ckpt")
            best_loss = loss_val
            checks_without_progress = 0
        else:
            checks_without_progress += 1
            if checks_without_progress > max_checks_without_progress:
                print("Early stopping!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))
    
    t1 = time.time()
    print("Total training time: {:.1f}s".format(t1 - t0))

with tf.Session() as sess:
    saver.restore(sess, "./my_mnist_model_0_to_4.ckpt")
    acc_test = accuracy.eval(feed_dict={X: X_test1, y: y_test1})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

0	Validation loss: 0.111523	Best loss: 0.111523	Accuracy: 97.85%
1	Validation loss: 0.259246	Best loss: 0.111523	Accuracy: 95.00%
2	Validation loss: 0.116553	Best loss: 0.111523	Accuracy: 97.46%
3	Validation loss: 0.439698	Best loss: 0.111523	Accuracy: 96.72%
4	Validation loss: 0.124247	Best loss: 0.111523	Accuracy: 96.48%
5	Validation loss: 0.556761	Best loss: 0.111523	Accuracy: 78.93%
6	Validation loss: 0.488352	Best loss: 0.111523	Accuracy: 87.88%
7	Validation loss: 0.327603	Best loss: 0.111523	Accuracy: 96.56%
8	Validation loss: 0.153663	Best loss: 0.111523	Accuracy: 97.97%
9	Validation loss: 0.481955	Best loss: 0.111523	Accuracy: 84.17%
10	Validation loss: 0.657136	Best loss: 0.111523	Accuracy: 77.95%
11	Validation loss: 0.583860	Best loss: 0.111523	Accuracy: 80.02%
12	Validation loss: 0.384622	Best loss: 0.111523	Accuracy: 91.67%
13	Validation loss: 200.980087	Best loss: 0.111523	Accuracy: 96.56%
14	Validation loss: 0.147616	Best loss: 0.111523	Accuracy: 97.62%
15	Validation loss

### 以下を追加したVersion
- 交差検証
- Batch Normalization
- Dropout

In [0]:
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.exceptions import NotFittedError
import time

class DNNClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, n_hidden_layers=5, n_neurons=100, optimizer_class=tf.train.AdamOptimizer,
                 learning_rate=0.01, batch_size=20, activation=tf.nn.elu, initializer=he_init,
                 batch_norm_momentum=None, dropout_rate=None, random_state=None):
        """Initialize the DNNClassifier by simply storing all the hyperparameters."""
        self.n_hidden_layers = n_hidden_layers
        self.n_neurons = n_neurons
        self.optimizer_class = optimizer_class
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.activation = activation
        self.initializer = initializer
        self.batch_norm_momentum = batch_norm_momentum
        self.dropout_rate = dropout_rate
        self.random_state = random_state
        self._session = None

    def _dnn(self, inputs):
        """Build the hidden layers, with support for batch normalization and dropout."""
        for layer in range(self.n_hidden_layers):
            if self.dropout_rate:
                inputs = tf.layers.dropout(inputs, self.dropout_rate, training=self._training)
            inputs = tf.layers.dense(inputs, self.n_neurons,
                                     kernel_initializer=self.initializer,
                                     name="hidden%d" % (layer + 1))
            if self.batch_norm_momentum:
                inputs = tf.layers.batch_normalization(inputs, momentum=self.batch_norm_momentum,
                                                       training=self._training)
            inputs = self.activation(inputs, name="hidden%d_out" % (layer + 1))
        return inputs

    def _build_graph(self, n_inputs, n_outputs):
        """Build the same model as earlier"""
        if self.random_state is not None:
            tf.set_random_seed(self.random_state)
            np.random.seed(self.random_state)

        X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
        y = tf.placeholder(tf.int32, shape=(None), name="y")

        if self.batch_norm_momentum or self.dropout_rate:
            self._training = tf.placeholder_with_default(False, shape=(), name='training')
        else:
            self._training = None

        dnn_outputs = self._dnn(X)

        logits = tf.layers.dense(dnn_outputs, n_outputs, kernel_initializer=he_init, name="logits")
        Y_proba = tf.nn.softmax(logits, name="Y_proba")

        xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,
                                                                  logits=logits)
        loss = tf.reduce_mean(xentropy, name="loss")

        optimizer = self.optimizer_class(learning_rate=self.learning_rate)
        training_op = optimizer.minimize(loss)

        correct = tf.nn.in_top_k(logits, y, 1)
        accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")

        init = tf.global_variables_initializer()
        saver = tf.train.Saver()

        # Make the important operations available easily through instance variables
        self._X, self._y = X, y
        self._Y_proba, self._loss = Y_proba, loss
        self._training_op, self._accuracy = training_op, accuracy
        self._init, self._saver = init, saver

    def close_session(self):
        if self._session:
            self._session.close()

    def _get_model_params(self):
        """Get all variable values (used for early stopping, faster than saving to disk)"""
        with self._graph.as_default():
            gvars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
        return {gvar.op.name: value for gvar, value in zip(gvars, self._session.run(gvars))}

    def _restore_model_params(self, model_params):
        """Set all variables to the given values (for early stopping, faster than loading from disk)"""
        gvar_names = list(model_params.keys())
        assign_ops = {gvar_name: self._graph.get_operation_by_name(gvar_name + "/Assign")
                      for gvar_name in gvar_names}
        init_values = {gvar_name: assign_op.inputs[1] for gvar_name, assign_op in assign_ops.items()}
        feed_dict = {init_values[gvar_name]: model_params[gvar_name] for gvar_name in gvar_names}
        self._session.run(assign_ops, feed_dict=feed_dict)

    def fit(self, X, y, n_epochs=100, X_valid=None, y_valid=None):
        """Fit the model to the training set. If X_valid and y_valid are provided, use early stopping."""
        self.close_session()

        # infer n_inputs and n_outputs from the training set.
        n_inputs = X.shape[1]
        self.classes_ = np.unique(y)
        n_outputs = len(self.classes_)
        
        # Translate the labels vector to a vector of sorted class indices, containing
        # integers from 0 to n_outputs - 1.
        # For example, if y is equal to [8, 8, 9, 5, 7, 6, 6, 6], then the sorted class
        # labels (self.classes_) will be equal to [5, 6, 7, 8, 9], and the labels vector
        # will be translated to [3, 3, 4, 0, 2, 1, 1, 1]
        self.class_to_index_ = {label: index
                                for index, label in enumerate(self.classes_)}
        y = np.array([self.class_to_index_[label]
                      for label in y], dtype=np.int32)
        
        self._graph = tf.Graph()
        with self._graph.as_default():
            self._build_graph(n_inputs, n_outputs)
            # extra ops for batch normalization
            extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

        # needed in case of early stopping
        max_checks_without_progress = 20
        checks_without_progress = 0
        best_loss = np.infty
        best_params = None
        
        # Now train the model!
        self._session = tf.Session(graph=self._graph)
        with self._session.as_default() as sess:
            self._init.run()
            t0 = time.time()
            for epoch in range(n_epochs):
                rnd_idx = np.random.permutation(len(X))
                for rnd_indices in np.array_split(rnd_idx, len(X) // self.batch_size):
                    X_batch, y_batch = X[rnd_indices], y[rnd_indices]
                    feed_dict = {self._X: X_batch, self._y: y_batch}
                    if self._training is not None:
                        feed_dict[self._training] = True
                    sess.run(self._training_op, feed_dict=feed_dict)
                    if extra_update_ops:
                        sess.run(extra_update_ops, feed_dict=feed_dict)
                if X_valid is not None and y_valid is not None:
                    loss_val, acc_val = sess.run([self._loss, self._accuracy],
                                                 feed_dict={self._X: X_valid,
                                                            self._y: y_valid})
                    if loss_val < best_loss:
                        best_params = self._get_model_params()
                        best_loss = loss_val
                        checks_without_progress = 0
                    else:
                        checks_without_progress += 1
                    print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
                        epoch, loss_val, best_loss, acc_val * 100))
                    if checks_without_progress > max_checks_without_progress:
                        print("Early stopping!")
                        break
                else:
                    loss_train, acc_train = sess.run([self._loss, self._accuracy],
                                                     feed_dict={self._X: X_batch,
                                                                self._y: y_batch})
                    print("{}\tLast training batch loss: {:.6f}\tAccuracy: {:.2f}%".format(
                        epoch, loss_train, acc_train * 100))
            t1 = time.time()
            print("Total training time: {:.1f}s".format(t1 - t0))
            # If we used early stopping then rollback to the best model found
            if best_params:
                self._restore_model_params(best_params)
            return self

    def predict_proba(self, X):
        if not self._session:
            raise NotFittedError("This %s instance is not fitted yet" % self.__class__.__name__)
        with self._session.as_default() as sess:
            return self._Y_proba.eval(feed_dict={self._X: X})

    def predict(self, X):
        class_indices = np.argmax(self.predict_proba(X), axis=1)
        return np.array([[self.classes_[class_index]]
                         for class_index in class_indices], np.int32)

    def save(self, path):
        self._saver.save(self._session, path)

### 結果 : 交差検証なし

In [12]:
dnn_clf = DNNClassifier(random_state=42)
dnn_clf.fit(X_train1, y_train1, n_epochs=1000, X_valid=X_valid1, y_valid=y_valid1)

0	Validation loss: 0.111523	Best loss: 0.111523	Accuracy: 97.85%
1	Validation loss: 0.259246	Best loss: 0.111523	Accuracy: 95.00%
2	Validation loss: 0.116553	Best loss: 0.111523	Accuracy: 97.46%
3	Validation loss: 0.439698	Best loss: 0.111523	Accuracy: 96.72%
4	Validation loss: 0.124247	Best loss: 0.111523	Accuracy: 96.48%
5	Validation loss: 0.556761	Best loss: 0.111523	Accuracy: 78.93%
6	Validation loss: 0.488352	Best loss: 0.111523	Accuracy: 87.88%
7	Validation loss: 0.327603	Best loss: 0.111523	Accuracy: 96.56%
8	Validation loss: 0.153663	Best loss: 0.111523	Accuracy: 97.97%
9	Validation loss: 0.481955	Best loss: 0.111523	Accuracy: 84.17%
10	Validation loss: 0.657136	Best loss: 0.111523	Accuracy: 77.95%
11	Validation loss: 0.583860	Best loss: 0.111523	Accuracy: 80.02%
12	Validation loss: 0.384622	Best loss: 0.111523	Accuracy: 91.67%
13	Validation loss: 200.980087	Best loss: 0.111523	Accuracy: 96.56%
14	Validation loss: 0.147616	Best loss: 0.111523	Accuracy: 97.62%
15	Validation loss

DNNClassifier(activation=<function elu at 0x7f0a0064d1e0>,
       batch_norm_momentum=None, batch_size=20, dropout_rate=None,
       initializer=<tensorflow.python.ops.init_ops.VarianceScaling object at 0x7f09fb0fd1d0>,
       learning_rate=0.01, n_hidden_layers=5, n_neurons=100,
       optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>,
       random_state=42)

In [13]:
# 　結果
from sklearn.metrics import accuracy_score

y_pred = dnn_clf.predict(X_test1)
accuracy_score(y_test1, y_pred)

0.9793734189531037

### 結果 : 交差検証あり


In [0]:
from functools import partial
from sklearn.model_selection import RandomizedSearchCV

def leaky_relu(alpha=0.01):
    def parametrized_leaky_relu(z, name=None):
        return tf.maximum(alpha * z, z, name=name)
    return parametrized_leaky_relu

param_distribs = {
    "n_neurons": [10, 30, 50, 70, 90, 100, 120, 140, 160],
    "batch_size": [10, 50, 100, 500],
    "learning_rate": [0.01, 0.02, 0.05, 0.1],
    "activation": [tf.nn.relu, tf.nn.elu, leaky_relu(alpha=0.01), leaky_relu(alpha=0.1)],
#     # you could also try exploring different numbers of hidden layers, different optimizers, etc.
#     "n_hidden_layers": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
#     "optimizer_class": [tf.train.AdamOptimizer, partial(tf.train.MomentumOptimizer, momentum=0.95)],
}

rnd_search = RandomizedSearchCV(DNNClassifier(random_state=42), param_distribs, n_iter=50,
                                random_state=42, verbose=2)
rnd_search.fit(X_train1, y_train1, X_valid=X_valid1, y_valid=y_valid1, n_epochs=1000)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Fitting 3 folds for each of 50 candidates, totalling 150 fits
[CV] n_neurons=10, learning_rate=0.05, batch_size=100, activation=<function elu at 0x7f0a0064d1e0> 
0	Validation loss: 0.129516	Best loss: 0.129516	Accuracy: 96.13%
1	Validation loss: 0.145597	Best loss: 0.129516	Accuracy: 95.86%
2	Validation loss: 0.138621	Best loss: 0.129516	Accuracy: 96.87%
3	Validation loss: 0.113979	Best loss: 0.113979	Accuracy: 97.03%
4	Validation loss: 0.105074	Best loss: 0.105074	Accuracy: 96.87%
5	Validation loss: 0.117311	Best loss: 0.105074	Accuracy: 97.30%
6	Validation loss: 0.124047	Best loss: 0.105074	Accuracy: 97.19%
7	Validation loss: 0.117882	Best loss: 0.105074	Accuracy: 96.95%
8	Validation loss: 0.366215	Best loss: 0.105074	Accuracy: 96.87%
9	Validation loss: 0.675871	Best loss: 0.105074	Accuracy: 69.08%
10	Validation loss: 0.973863	Best loss: 0.105074	Accuracy: 59.19%
11	Validation loss: 0.789437	Best loss: 0.105074	Accuracy: 70.91%
12	Validation loss: 0.686359	Best loss: 0.105074	Accurac

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    7.3s remaining:    0.0s


0	Validation loss: 0.157013	Best loss: 0.157013	Accuracy: 96.33%
1	Validation loss: 0.133719	Best loss: 0.133719	Accuracy: 96.60%
2	Validation loss: 0.179000	Best loss: 0.133719	Accuracy: 95.74%
3	Validation loss: 0.701486	Best loss: 0.133719	Accuracy: 66.69%
4	Validation loss: 0.430556	Best loss: 0.133719	Accuracy: 87.96%
5	Validation loss: 0.817886	Best loss: 0.133719	Accuracy: 62.00%
6	Validation loss: 0.570671	Best loss: 0.133719	Accuracy: 77.68%
7	Validation loss: 1.249756	Best loss: 0.133719	Accuracy: 60.87%
8	Validation loss: 0.793419	Best loss: 0.133719	Accuracy: 59.58%
9	Validation loss: 0.743039	Best loss: 0.133719	Accuracy: 60.52%
10	Validation loss: 0.737980	Best loss: 0.133719	Accuracy: 60.48%
11	Validation loss: 0.732052	Best loss: 0.133719	Accuracy: 60.56%
12	Validation loss: 0.743588	Best loss: 0.133719	Accuracy: 60.40%
13	Validation loss: 0.732825	Best loss: 0.133719	Accuracy: 60.36%
14	Validation loss: 0.745985	Best loss: 0.133719	Accuracy: 61.06%
15	Validation loss: 

In [0]:
# 最良のパラメータ
rnd_search.best_params_

In [0]:
# 結果
y_pred = rnd_search.predict(X_test1)
accuracy_score(y_test1, y_pred)

### Batch Normalization + 交差検証


In [0]:
from sklearn.model_selection import RandomizedSearchCV

param_distribs = {
    "n_neurons": [10, 30, 50, 70, 90, 100, 120, 140, 160],
    "batch_size": [10, 50, 100, 500],
    "learning_rate": [0.01, 0.02, 0.05, 0.1],
    "activation": [tf.nn.relu, tf.nn.elu, leaky_relu(alpha=0.01), leaky_relu(alpha=0.1)],
    # you could also try exploring different numbers of hidden layers, different optimizers, etc.
    #"n_hidden_layers": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    #"optimizer_class": [tf.train.AdamOptimizer, partial(tf.train.MomentumOptimizer, momentum=0.95)],
    "batch_norm_momentum": [0.9, 0.95, 0.98, 0.99, 0.999],
}

rnd_search_bn = RandomizedSearchCV(DNNClassifier(random_state=42), param_distribs, n_iter=50,
                                   fit_params={"X_valid": X_valid1, "y_valid": y_valid1, "n_epochs": 1000},
                                   random_state=42, verbose=2)
rnd_search_bn.fit(X_train1, y_train1)

In [0]:
# 最良のパラメータ
rnd_search.best_params_

In [0]:
y_pred = rnd_search.predict(X_test1)
accuracy_score(y_test1, y_pred)

### Dropout + 交差検証

In [0]:
from sklearn.model_selection import RandomizedSearchCV

param_distribs = {
    "n_neurons": [10, 30, 50, 70, 90, 100, 120, 140, 160],
    "batch_size": [10, 50, 100, 500],
    "learning_rate": [0.01, 0.02, 0.05, 0.1],
    "activation": [tf.nn.relu, tf.nn.elu, leaky_relu(alpha=0.01), leaky_relu(alpha=0.1)],
    # you could also try exploring different numbers of hidden layers, different optimizers, etc.
    #"n_hidden_layers": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    #"optimizer_class": [tf.train.AdamOptimizer, partial(tf.train.MomentumOptimizer, momentum=0.95)],
    "dropout_rate": [0.2, 0.3, 0.4, 0.5, 0.6],
}

rnd_search_dropout = RandomizedSearchCV(DNNClassifier(random_state=42), param_distribs, n_iter=50,
                                        fit_params={"X_valid": X_valid1, "y_valid": y_valid1, "n_epochs": 1000},
                                        random_state=42, verbose=2)
rnd_search_dropout.fit(X_train1, y_train1)

In [0]:
# 最良のパラメータ
rnd_search.best_params_

{}

In [0]:
y_pred = rnd_search.predict(X_test1)
accuracy_score(y_test1, y_pred)

0.9926055652850749

In [0]:
# 簡易バージョン(時間短縮)
from sklearn.model_selection import RandomizedSearchCV

param_distribs = {}

# 最良のパラメータ結果
# {'activation': <function tensorflow.python.ops.gen_nn_ops.relu(features, name=None)>,
#  'batch_size': 100,
#  'dropout_rate': 0.2,
#  'learning_rate': 0.01,
#  'n_neurons': 160}

rnd_search_dropout = RandomizedSearchCV(DNNClassifier(random_state=42, activation=tf.nn.relu, batch_size=100,
                                                     dropout_rate=0.2, learning_rate=0.01, n_neurons=160),
                                        param_distribs, n_iter=50, fit_params={"X_valid": X_valid1, "y_valid": y_valid1, "n_epochs": 1000},
                                        random_state=42, verbose=2)
rnd_search_dropout.fit(X_train1, y_train1)

In [0]:
# モデルの保存
rnd_search.best_estimator_.save("./my_best_mnist_model_0_to_4")

### 転移学習


#### 前問の全ての訓練済み隠れ層(5層)を再利用
- 隠れ層を凍結し、新しいソフトマックス出力層を追加

In [0]:
# プレトレーニング済み層の再利用
reset_graph()

restore_saver = tf.train.import_meta_graph("./my_best_mnist_model_0_to_4.meta")

X = tf.get_default_graph().get_tensor_by_name("X:0")
y = tf.get_default_graph().get_tensor_by_name("y:0")
loss = tf.get_default_graph().get_tensor_by_name("loss:0")
Y_proba = tf.get_default_graph().get_tensor_by_name("Y_proba:0")
logits = Y_proba.op.inputs[0]
accuracy = tf.get_default_graph().get_tensor_by_name("accuracy:0")

In [0]:
# 隠れ層の凍結
learning_rate = 0.01

output_layer_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="logits")  # logitsの下位層は除外
optimizer = tf.train.AdamOptimizer(learning_rate, name="Adam2")
training_op = optimizer.minimize(loss, var_list=output_layer_vars)

correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")

init = tf.global_variables_initializer()
five_frozen_saver = tf.train.Saver()

- 数字1つあたり100個の画像で5から9までの数字について新しいDNNを訓練

In [0]:
# 5から9のデータ
X_train2_full = X_train[y_train >= 5]
y_train2_full = y_train[y_train >= 5] - 5
X_valid2_full = X_valid[y_valid >= 5]
y_valid2_full = y_valid[y_valid >= 5] - 5
X_test2 = X_test[y_test >= 5]
y_test2 = y_test[y_test >= 5] - 5

In [0]:
def sample_n_instances_per_class(X, y, n=100):
    Xs, ys = [], []
    for label in np.unique(y):
        idx = (y == label)
        Xc = X[idx][:n]
        yc = y[idx][:n]
        Xs.append(Xc)
        ys.append(yc)
    return np.concatenate(Xs), np.concatenate(ys)

In [0]:
X_train2, y_train2 = sample_n_instances_per_class(X_train2_full, y_train2_full, n=100)
X_valid2, y_valid2 = sample_n_instances_per_class(X_valid2_full, y_valid2_full, n=30)

In [0]:
# 学習
import time

n_epochs = 1000
batch_size = 20

max_checks_without_progress = 20
checks_without_progress = 0
best_loss = np.infty

with tf.Session() as sess:
    init.run()
    restore_saver.restore(sess, "./my_best_mnist_model_0_to_4")
    t0 = time.time()
        
    for epoch in range(n_epochs):
        rnd_idx = np.random.permutation(len(X_train2))
        for rnd_indices in np.array_split(rnd_idx, len(X_train2) // batch_size):
            X_batch, y_batch = X_train2[rnd_indices], y_train2[rnd_indices]
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={X: X_valid2, y: y_valid2})
        if loss_val < best_loss:
            save_path = five_frozen_saver.save(sess, "./my_mnist_model_5_to_9_five_frozen")
            best_loss = loss_val
            checks_without_progress = 0
        else:
            checks_without_progress += 1
            if checks_without_progress > max_checks_without_progress:
                print("Early stopping!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))

    t1 = time.time()
    print("Total training time: {:.1f}s".format(t1 - t0))

with tf.Session() as sess:
    five_frozen_saver.restore(sess, "./my_mnist_model_5_to_9_five_frozen")
    acc_test = accuracy.eval(feed_dict={X: X_test2, y: y_test2})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

#### 凍結層のキャッシング
- 隠れ層5の出力のバッチを作成し、それを上位層の訓練に使用

In [0]:
hidden5_out = tf.get_default_graph().get_tensor_by_name("hidden5_out:0")

In [0]:
import time

n_epochs = 1000
batch_size = 20

max_checks_without_progress = 20
checks_without_progress = 0
best_loss = np.infty

with tf.Session() as sess:
    init.run()
    restore_saver.restore(sess, "./my_best_mnist_model_0_to_4")
    t0 = time.time()
    
    hidden5_train = hidden5_out.eval(feed_dict={X: X_train2, y: y_train2})
    hidden5_valid = hidden5_out.eval(feed_dict={X: X_valid2, y: y_valid2})
        
    for epoch in range(n_epochs):
        rnd_idx = np.random.permutation(len(X_train2))
        for rnd_indices in np.array_split(rnd_idx, len(X_train2) // batch_size):
            h5_batch, y_batch = hidden5_train[rnd_indices], y_train2[rnd_indices]
            sess.run(training_op, feed_dict={hidden5_out: h5_batch, y: y_batch})
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={hidden5_out: hidden5_valid, y: y_valid2})
        if loss_val < best_loss:
            save_path = five_frozen_saver.save(sess, "./my_mnist_model_5_to_9_five_frozen")
            best_loss = loss_val
            checks_without_progress = 0
        else:
            checks_without_progress += 1
            if checks_without_progress > max_checks_without_progress:
                print("Early stopping!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))

    t1 = time.time()
    print("Total training time: {:.1f}s".format(t1 - t0))

with tf.Session() as sess:
    five_frozen_saver.restore(sess, "./my_mnist_model_5_to_9_five_frozen")
    acc_test = accuracy.eval(feed_dict={X: X_test2, y: y_test2})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

INFO:tensorflow:Restoring parameters from ./my_best_mnist_model_0_to_4
0	Validation loss: 1.956339	Best loss: 1.956339	Accuracy: 34.00%
1	Validation loss: 1.194342	Best loss: 1.194342	Accuracy: 49.33%
2	Validation loss: 1.091982	Best loss: 1.091982	Accuracy: 60.67%
3	Validation loss: 1.082380	Best loss: 1.082380	Accuracy: 52.67%
4	Validation loss: 1.037122	Best loss: 1.037122	Accuracy: 56.00%
5	Validation loss: 1.021572	Best loss: 1.021572	Accuracy: 56.67%
6	Validation loss: 1.023972	Best loss: 1.021572	Accuracy: 58.67%
7	Validation loss: 0.999554	Best loss: 0.999554	Accuracy: 64.00%
8	Validation loss: 1.018910	Best loss: 0.999554	Accuracy: 57.33%
9	Validation loss: 1.001856	Best loss: 0.999554	Accuracy: 55.33%
10	Validation loss: 0.966381	Best loss: 0.966381	Accuracy: 64.00%
11	Validation loss: 0.965756	Best loss: 0.965756	Accuracy: 62.67%
12	Validation loss: 0.986529	Best loss: 0.965756	Accuracy: 56.00%
13	Validation loss: 0.976854	Best loss: 0.965756	Accuracy: 59.33%
14	Validation l

#### 再利用する隠れ層の数を変更
- 5 --> 4

In [0]:
reset_graph()

n_outputs = 5

restore_saver = tf.train.import_meta_graph("./my_best_mnist_model_0_to_4.meta")

X = tf.get_default_graph().get_tensor_by_name("X:0")
y = tf.get_default_graph().get_tensor_by_name("y:0")

hidden4_out = tf.get_default_graph().get_tensor_by_name("hidden4_out:0")
logits = tf.layers.dense(hidden4_out, n_outputs, kernel_initializer=he_init, name="new_logits")
Y_proba = tf.nn.softmax(logits)
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
loss = tf.reduce_mean(xentropy)
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")

In [0]:
learning_rate = 0.01

output_layer_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="new_logits")
optimizer = tf.train.AdamOptimizer(learning_rate, name="Adam2")
training_op = optimizer.minimize(loss, var_list=output_layer_vars)

init = tf.global_variables_initializer()
four_frozen_saver = tf.train.Saver()

In [0]:
n_epochs = 1000
batch_size = 20

max_checks_without_progress = 20
checks_without_progress = 0
best_loss = np.infty

with tf.Session() as sess:
    init.run()
    restore_saver.restore(sess, "./my_best_mnist_model_0_to_4")
        
    for epoch in range(n_epochs):
        rnd_idx = np.random.permutation(len(X_train2))
        for rnd_indices in np.array_split(rnd_idx, len(X_train2) // batch_size):
            X_batch, y_batch = X_train2[rnd_indices], y_train2[rnd_indices]
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={X: X_valid2, y: y_valid2})
        if loss_val < best_loss:
            save_path = four_frozen_saver.save(sess, "./my_mnist_model_5_to_9_four_frozen")
            best_loss = loss_val
            checks_without_progress = 0
        else:
            checks_without_progress += 1
            if checks_without_progress > max_checks_without_progress:
                print("Early stopping!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))

with tf.Session() as sess:
    four_frozen_saver.restore(sess, "./my_mnist_model_5_to_9_four_frozen")
    acc_test = accuracy.eval(feed_dict={X: X_test2, y: y_test2})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

#### 上位2層の凍結を解除(隠れ層1,2のみを凍結)

In [0]:
learning_rate = 0.01

unfrozen_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="hidden[34]|new_logits")  # 隠れ層1, 2だけ除外される
optimizer = tf.train.AdamOptimizer(learning_rate, name="Adam3")
training_op = optimizer.minimize(loss, var_list=unfrozen_vars)

init = tf.global_variables_initializer()
two_frozen_saver = tf.train.Saver()

In [0]:
n_epochs = 1000
batch_size = 20

max_checks_without_progress = 20
checks_without_progress = 0
best_loss = np.infty

with tf.Session() as sess:
    init.run()
    four_frozen_saver.restore(sess, "./my_mnist_model_5_to_9_four_frozen")
        
    for epoch in range(n_epochs):
        rnd_idx = np.random.permutation(len(X_train2))
        for rnd_indices in np.array_split(rnd_idx, len(X_train2) // batch_size):
            X_batch, y_batch = X_train2[rnd_indices], y_train2[rnd_indices]
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={X: X_valid2, y: y_valid2})
        if loss_val < best_loss:
            save_path = two_frozen_saver.save(sess, "./my_mnist_model_5_to_9_two_frozen")
            best_loss = loss_val
            checks_without_progress = 0
        else:
            checks_without_progress += 1
            if checks_without_progress > max_checks_without_progress:
                print("Early stopping!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))

with tf.Session() as sess:
    two_frozen_saver.restore(sess, "./my_mnist_model_5_to_9_two_frozen")
    acc_test = accuracy.eval(feed_dict={X: X_test2, y: y_test2})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

INFO:tensorflow:Restoring parameters from ./my_mnist_model_5_to_9_four_frozen
0	Validation loss: 1.071706	Best loss: 1.071706	Accuracy: 73.33%
1	Validation loss: 0.502234	Best loss: 0.502234	Accuracy: 86.67%
2	Validation loss: 0.514655	Best loss: 0.502234	Accuracy: 84.67%
3	Validation loss: 0.385756	Best loss: 0.385756	Accuracy: 88.00%
4	Validation loss: 0.667843	Best loss: 0.385756	Accuracy: 86.67%
5	Validation loss: 0.531288	Best loss: 0.385756	Accuracy: 88.67%
6	Validation loss: 0.520654	Best loss: 0.385756	Accuracy: 88.67%
7	Validation loss: 0.594461	Best loss: 0.385756	Accuracy: 87.33%
8	Validation loss: 0.707682	Best loss: 0.385756	Accuracy: 83.33%
9	Validation loss: 0.602872	Best loss: 0.385756	Accuracy: 88.00%
10	Validation loss: 0.697784	Best loss: 0.385756	Accuracy: 90.00%
11	Validation loss: 0.650856	Best loss: 0.385756	Accuracy: 89.33%
12	Validation loss: 0.741689	Best loss: 0.385756	Accuracy: 89.33%
13	Validation loss: 1.033944	Best loss: 0.385756	Accuracy: 83.33%
14	Valid

参考文献 : Hands-On Machine Learning with Scikit-Learn and Tensorflow" Aurelien Geron, O'Reilly, 978-1-491-96229-9"