In [1]:
# DL framework
import tensorflow as tf

from datetime import datetime

# common packages
import numpy as np
import os
import math

# timing epochs
import time

# read image
from scipy.misc import imread

# plotting pretty figures
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt

# const
SEED = 42

# Helper to make the output consistent
def reset_graph(seed=SEED):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

    
# set log level to supress messages, unless an error
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

# Check TensorFlow version
print('TensorFlow Version: {}'.format(tf.__version__))

# Check if using GPU
if not tf.test.gpu_device_name():
    print('No GPU found')
else:
    print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))
    
reset_graph()

TensorFlow Version: 1.2.1
Default GPU Device: /gpu:0


In [2]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/")

Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.
Extracting /tmp/data/train-images-idx3-ubyte.gz
Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Successfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [3]:
reset_graph()

n_inputs = 28 * 28  # MNIST
# n_hidden1 = 300
conv1_fmaps = 16
conv1_ksize = 3
conv1_stride = 1
conv1_pad = "SAME"

n_hidden2 = 50
n_hidden3 = 50
n_hidden4 = 50
n_hidden5 = 50
n_outputs = 10


with tf.name_scope("inputs"):
    X = tf.placeholder(tf.float32, shape=[None, n_inputs], name="X")
    X_reshaped = tf.reshape(X, shape=[-1, 28, 28, 1])
    y = tf.placeholder(tf.int64, shape=(None), name="y")
    # y = tf.placeholder(tf.int32, shape=[None], name="y")

#X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")


with tf.name_scope("dnn"):
   # hidden1 = tf.layers.dense(X, n_hidden1, activation=tf.nn.relu, name="hidden1")
    conv1 = tf.layers.conv2d(inputs=X_reshaped, filters=conv1_fmaps, kernel_size=conv1_ksize,
                             strides=conv1_stride, padding=conv1_pad, 
                             activation=tf.nn.relu, 
                             kernel_initializer=tf.contrib.layers.xavier_initializer(), 
                             bias_initializer=tf.zeros_initializer(), trainable=True, 
                             name="conv1")
    flatten = tf.reshape(conv1, shape=[-1, int(28 * 28 * conv1_fmaps)])
    hidden2 = tf.layers.dense(flatten, n_hidden2, activation=tf.nn.relu, name="hidden2")
    hidden3 = tf.layers.dense(hidden2, n_hidden3, activation=tf.nn.relu, name="hidden3")
    hidden4 = tf.layers.dense(hidden3, n_hidden4, activation=tf.nn.relu, name="hidden4")
    hidden5 = tf.layers.dense(hidden4, n_hidden5, activation=tf.nn.relu, name="hidden5")
    
    # logits = tf.layers.dense(hidden5, n_outputs, name="outputs")


with tf.name_scope("output"):
    logits = tf.layers.dense(hidden5, n_outputs, name="outputs")
    Y_proba = tf.nn.softmax(logits, name="Y_proba")

with tf.name_scope("train"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y)
    loss = tf.reduce_mean(xentropy)
    optimizer = tf.train.AdamOptimizer()
    training_op = optimizer.minimize(loss)
    
    
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")

with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")


learning_rate = 0.01
threshold = 1.0

optimizer = tf.train.GradientDescentOptimizer(learning_rate)
grads_and_vars = optimizer.compute_gradients(loss)
capped_gvs = [(tf.clip_by_value(grad, -threshold, threshold), var)
              for grad, var in grads_and_vars]
training_op = optimizer.apply_gradients(capped_gvs)

init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [30]:
all_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
print(len(all_vars))
print(all_vars)

conv1_vars =tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="hidden2|hidden3")
print(len(conv1_vars))
print(conv1_vars)


hack_vars = [var for var in all_vars if var not in conv1_vars]
print(len(hack_vars))
print(hack_vars)

not_conv1_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="^conv1")
print(len(not_conv1_vars))
print(not_conv1_vars)

12
[<tf.Variable 'conv1/kernel:0' shape=(3, 3, 1, 16) dtype=float32_ref>, <tf.Variable 'conv1/bias:0' shape=(16,) dtype=float32_ref>, <tf.Variable 'hidden2/kernel:0' shape=(12544, 50) dtype=float32_ref>, <tf.Variable 'hidden2/bias:0' shape=(50,) dtype=float32_ref>, <tf.Variable 'hidden3/kernel:0' shape=(50, 50) dtype=float32_ref>, <tf.Variable 'hidden3/bias:0' shape=(50,) dtype=float32_ref>, <tf.Variable 'hidden4/kernel:0' shape=(50, 50) dtype=float32_ref>, <tf.Variable 'hidden4/bias:0' shape=(50,) dtype=float32_ref>, <tf.Variable 'hidden5/kernel:0' shape=(50, 50) dtype=float32_ref>, <tf.Variable 'hidden5/bias:0' shape=(50,) dtype=float32_ref>, <tf.Variable 'outputs/kernel:0' shape=(50, 10) dtype=float32_ref>, <tf.Variable 'outputs/bias:0' shape=(10,) dtype=float32_ref>]
4
[<tf.Variable 'hidden2/kernel:0' shape=(12544, 50) dtype=float32_ref>, <tf.Variable 'hidden2/bias:0' shape=(50,) dtype=float32_ref>, <tf.Variable 'hidden3/kernel:0' shape=(50, 50) dtype=float32_ref>, <tf.Variable 'hi

In [9]:
with tf.Session() as sess:
    init.run()
    conv1_var_values_after_init = sess.run(conv1_vars)
    print(conv1_var_values_after_init)

[array([[[[ 0.1436774 ,  0.06208725,  0.19428511,  0.15965192,  0.09255137,
          -0.15043685,  0.04323363, -0.15209712,  0.13919635, -0.19596508,
          -0.11665009, -0.19295509,  0.08599551,  0.00876857,  0.02424042,
           0.07372688]],

        [[ 0.0965506 ,  0.10263102,  0.08639391, -0.16994804, -0.1674898 ,
          -0.06032021, -0.16857859, -0.12199627, -0.138142  ,  0.0716949 ,
           0.14944614, -0.1142588 , -0.08626974, -0.02654484, -0.18412019,
          -0.07833289]],

        [[ 0.00527871,  0.00831437,  0.11751921, -0.06409185,  0.18480386,
           0.08745472,  0.07106398,  0.14283498, -0.07341319,  0.18218027,
          -0.1375303 ,  0.15108292,  0.135484  , -0.14575592,  0.09163739,
          -0.05395772]]],


       [[[-0.12464728, -0.00929835, -0.17455626, -0.08600647, -0.17162451,
          -0.12354314,  0.19538037,  0.1354145 ,  0.09801404, -0.19382387,
          -0.15673088,  0.07769804,  0.07321508,  0.06267332, -0.11012249,
          -0.155706

In [10]:
n_epochs = 20
batch_size = 50

with tf.Session() as sess:
    init.run()
    conv1_var_values_after_init = sess.run(conv1_vars)
    print(conv1_var_values_after_init)
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            # print(X_batch.shape)
            # print(y_batch.shape)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        
        # evaluate test set
        acc_list = []
        for iteration in range(mnist.test.num_examples // batch_size):
            X_batch, y_batch = mnist.test.next_batch(batch_size)
            accuracy_val = accuracy.eval(feed_dict={X: X_batch,
                                                    y: y_batch})
            acc_list.append(accuracy_val)
        test_acc = sum(acc_list) / float(len(acc_list))
        print(epoch, "Test accuracy:", test_acc)
        conv1_var_values_after_training = sess.run(conv1_vars)
        print(conv1_var_values_after_training)

    save_path = saver.save(sess, "./my_model_final.ckpt")

[array([[[[ 0.1436774 ,  0.06208725,  0.19428511,  0.15965192,  0.09255137,
          -0.15043685,  0.04323363, -0.15209712,  0.13919635, -0.19596508,
          -0.11665009, -0.19295509,  0.08599551,  0.00876857,  0.02424042,
           0.07372688]],

        [[ 0.0965506 ,  0.10263102,  0.08639391, -0.16994804, -0.1674898 ,
          -0.06032021, -0.16857859, -0.12199627, -0.138142  ,  0.0716949 ,
           0.14944614, -0.1142588 , -0.08626974, -0.02654484, -0.18412019,
          -0.07833289]],

        [[ 0.00527871,  0.00831437,  0.11751921, -0.06409185,  0.18480386,
           0.08745472,  0.07106398,  0.14283498, -0.07341319,  0.18218027,
          -0.1375303 ,  0.15108292,  0.135484  , -0.14575592,  0.09163739,
          -0.05395772]]],


       [[[-0.12464728, -0.00929835, -0.17455626, -0.08600647, -0.17162451,
          -0.12354314,  0.19538037,  0.1354145 ,  0.09801404, -0.19382387,
          -0.15673088,  0.07769804,  0.07321508,  0.06267332, -0.11012249,
          -0.155706

3 Test accuracy: 0.951800003648
[array([[[[ 0.19547525,  0.20023225,  0.27678648,  0.17722915,  0.06143334,
          -0.15036954,  0.22703263, -0.04335223,  0.16680555, -0.18517391,
          -0.11663932, -0.2623021 ,  0.31459674,  0.02059264, -0.00483765,
           0.05257277]],

        [[ 0.15697092,  0.26197094,  0.20015545, -0.16650398, -0.17133842,
          -0.0584757 ,  0.07976602,  0.06867598, -0.14653064,  0.09534092,
           0.15470976, -0.1296168 ,  0.21940485, -0.01896568, -0.20143786,
          -0.08859347]],

        [[ 0.04579711,  0.14140467,  0.23990391, -0.06596989,  0.21919471,
           0.08881015,  0.290851  ,  0.35441998, -0.11594483,  0.21568391,
          -0.12879024,  0.20767094,  0.44036257, -0.13746054,  0.09234408,
          -0.00948588]]],


       [[[-0.0693551 ,  0.16313611, -0.14569268, -0.0714402 , -0.20491488,
          -0.12351894,  0.52559638,  0.34595826,  0.10578772, -0.19388066,
          -0.15822667,  0.01289796,  0.4094094 ,  0.07279463, 

6 Test accuracy: 0.960400002897
[array([[[[ 0.20126446,  0.20693091,  0.28241429,  0.19350545,  0.04161679,
          -0.15047671,  0.22784741, -0.06257547,  0.18204194, -0.1745858 ,
          -0.11806916, -0.29967961,  0.31716067,  0.02838508, -0.0324775 ,
           0.04002348]],

        [[ 0.16321358,  0.27132943,  0.2180475 , -0.16509996, -0.17761728,
          -0.05863477,  0.09364089,  0.07411157, -0.15619272,  0.11317382,
           0.16009247, -0.14037596,  0.23981525, -0.01188786, -0.22889806,
          -0.09517638]],

        [[ 0.045554  ,  0.14114988,  0.25712174, -0.07016078,  0.24282226,
           0.08871222,  0.2943368 ,  0.36899066, -0.15275216,  0.24035917,
          -0.11969644,  0.23925282,  0.45626837, -0.12944044,  0.07541452,
           0.01576287]]],


       [[[-0.07080398,  0.17653351, -0.16414344, -0.05653079, -0.22657107,
          -0.12348458,  0.56138694,  0.36332634,  0.11475307, -0.1932683 ,
          -0.1599495 , -0.02847613,  0.43954676,  0.07764604, 

10 Test accuracy: 0.969000001848
[array([[[[ 0.20893481,  0.20858465,  0.29086852,  0.2120043 ,  0.01431494,
          -0.15059401,  0.21128975, -0.10095596,  0.2007447 , -0.1586179 ,
          -0.12097762, -0.34630582,  0.29968256,  0.03323152, -0.0665992 ,
           0.02084023]],

        [[ 0.17216997,  0.27900887,  0.24303302, -0.1685411 , -0.18642867,
          -0.05742211,  0.09594341,  0.06560985, -0.16955367,  0.13328764,
           0.16604251, -0.1556261 ,  0.2464309 , -0.00605708, -0.26606578,
          -0.10691109]],

        [[ 0.04481595,  0.13555792,  0.28212392, -0.08135807,  0.27401015,
           0.09019496,  0.2872189 ,  0.37572792, -0.19932054,  0.26676086,
          -0.10930831,  0.28185123,  0.46443745, -0.12066785,  0.05064533,
           0.04379658]]],


       [[[-0.07416013,  0.1837883 , -0.19108945, -0.03718373, -0.25567409,
          -0.12344895,  0.59024447,  0.3696999 ,  0.12739204, -0.19312289,
          -0.16290769, -0.07907611,  0.45462972,  0.08092222,

14 Test accuracy: 0.97499999851
[array([[[[  2.11551979e-01,   2.04777971e-01,   3.07061672e-01,
            2.29608715e-01,  -8.05312488e-03,  -1.50710553e-01,
            1.83752060e-01,  -1.42617464e-01,   2.21235812e-01,
           -1.43823296e-01,  -1.26012444e-01,  -3.91104996e-01,
            2.73486227e-01,   3.84640507e-02,  -9.78188217e-02,
            5.00609260e-03]],

        [[  1.74662501e-01,   2.80958027e-01,   2.70695299e-01,
           -1.73710853e-01,  -1.94116235e-01,  -5.54792322e-02,
            8.16091523e-02,   4.73716706e-02,  -1.79671437e-01,
            1.51327968e-01,   1.69769421e-01,  -1.74053460e-01,
            2.37101763e-01,   5.12990868e-04,  -3.02984327e-01,
           -1.17764235e-01]],

        [[  3.87864523e-02,   1.24646001e-01,   3.09564352e-01,
           -9.44302380e-02,   3.01870316e-01,   9.48689505e-02,
            2.69409746e-01,   3.72809678e-01,  -2.38167554e-01,
            2.89466113e-01,  -1.02890708e-01,   3.17536116e-01,
         

17 Test accuracy: 0.973899999559
[array([[[[ 0.21535149,  0.20335363,  0.32746375,  0.24184754, -0.02313226,
          -0.15083703,  0.16545449, -0.17020945,  0.23880087, -0.13406694,
          -0.12943089, -0.4249413 ,  0.25791085,  0.04174537, -0.12077836,
          -0.00806811]],

        [[ 0.17691025,  0.28064281,  0.29640296, -0.17827384, -0.19966471,
          -0.05400036,  0.06521964,  0.03300255, -0.18482462,  0.16408512,
           0.172885  , -0.19094957,  0.22643791,  0.00596282, -0.33254609,
          -0.12746213]],

        [[ 0.03342142,  0.11161219,  0.3319782 , -0.10448758,  0.32383361,
           0.09835821,  0.24924424,  0.36700502, -0.26242399,  0.30458111,
          -0.09837262,  0.33958831,  0.44896871, -0.10471649,  0.00875299,
           0.08084438]]],


       [[[-0.08798561,  0.19158354, -0.22715227, -0.00674623, -0.30267969,
          -0.12345928,  0.62072325,  0.37360466,  0.15396616, -0.19649613,
          -0.16947673, -0.16118024,  0.4622198 ,  0.08579864,

In [13]:
with tf.Session() as sess:
    init.run()
    conv1_var_values_after_training = sess.run(conv1_vars)
    print(conv1_var_values_after_training)

[array([[[[ 0.1436774 ,  0.06208725,  0.19428511,  0.15965192,  0.09255137,
          -0.15043685,  0.04323363, -0.15209712,  0.13919635, -0.19596508,
          -0.11665009, -0.19295509,  0.08599551,  0.00876857,  0.02424042,
           0.07372688]],

        [[ 0.0965506 ,  0.10263102,  0.08639391, -0.16994804, -0.1674898 ,
          -0.06032021, -0.16857859, -0.12199627, -0.138142  ,  0.0716949 ,
           0.14944614, -0.1142588 , -0.08626974, -0.02654484, -0.18412019,
          -0.07833289]],

        [[ 0.00527871,  0.00831437,  0.11751921, -0.06409185,  0.18480386,
           0.08745472,  0.07106398,  0.14283498, -0.07341319,  0.18218027,
          -0.1375303 ,  0.15108292,  0.135484  , -0.14575592,  0.09163739,
          -0.05395772]]],


       [[[-0.12464728, -0.00929835, -0.17455626, -0.08600647, -0.17162451,
          -0.12354314,  0.19538037,  0.1354145 ,  0.09801404, -0.19382387,
          -0.15673088,  0.07769804,  0.07321508,  0.06267332, -0.11012249,
          -0.155706

## Create "new" graph (same) and continue training

In [14]:
reset_graph()

#n_inputs = 28 * 28  # MNIST
# n_hidden1 = 300
conv1_fmaps = 16
conv1_ksize = 3
conv1_stride = 1
conv1_pad = "SAME"

n_hidden2 = 50
n_hidden3 = 50
n_hidden4 = 50
n_hidden5 = 50
n_outputs = 10


with tf.name_scope("inputs"):
    X = tf.placeholder(tf.float32, shape=[None, n_inputs], name="X")
    X_reshaped = tf.reshape(X, shape=[-1, 28, 28, 1])
    y = tf.placeholder(tf.int64, shape=(None), name="y")


with tf.name_scope("dnn"):
   # hidden1 = tf.layers.dense(X, n_hidden1, activation=tf.nn.relu, name="hidden1")
    conv1 = tf.layers.conv2d(inputs=X_reshaped, filters=conv1_fmaps, kernel_size=conv1_ksize,
                             strides=conv1_stride, padding=conv1_pad, 
                             activation=tf.nn.relu, 
                             kernel_initializer=tf.contrib.layers.xavier_initializer(), 
                             bias_initializer=tf.zeros_initializer(), trainable=True, 
                             name="conv1")
    flatten = tf.reshape(conv1, shape=[-1, int(28 * 28 * conv1_fmaps)])
    hidden2 = tf.layers.dense(flatten, n_hidden2, activation=tf.nn.relu, name="hidden2")
    hidden3 = tf.layers.dense(hidden2, n_hidden3, activation=tf.nn.relu, name="hidden3")
    hidden4 = tf.layers.dense(hidden3, n_hidden4, activation=tf.nn.relu, name="hidden4")
    hidden5 = tf.layers.dense(hidden4, n_hidden5, activation=tf.nn.relu, name="hidden5")
    
    # logits = tf.layers.dense(hidden5, n_outputs, name="outputs")


with tf.name_scope("output"):
    logits = tf.layers.dense(hidden5, n_outputs, name="outputs")
    Y_proba = tf.nn.softmax(logits, name="Y_proba")

with tf.name_scope("train"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y)
    loss = tf.reduce_mean(xentropy)
    optimizer = tf.train.AdamOptimizer()
    training_op = optimizer.minimize(loss)
    
    
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")

with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")


learning_rate = 0.01
threshold = 1.0

optimizer = tf.train.GradientDescentOptimizer(learning_rate)
grads_and_vars = optimizer.compute_gradients(loss)
capped_gvs = [(tf.clip_by_value(grad, -threshold, threshold), var)
              for grad, var in grads_and_vars]
training_op = optimizer.apply_gradients(capped_gvs)

init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [16]:
with tf.Session() as sess:
    conv1_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                   scope="conv1")
    init.run()
    conv1_var_values_after_init = sess.run(conv1_vars)
    print(conv1_var_values_after_init)
    saver.restore(sess, "./my_model_final.ckpt")
    conv1_var_values_after_restore = sess.run(conv1_vars)
    print(conv1_var_values_after_restore)

    for epoch in range(5):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        
        # evaluate test set
        acc_list = []
        for iteration in range(mnist.test.num_examples // batch_size):
            X_batch, y_batch = mnist.test.next_batch(batch_size)
            accuracy_val = accuracy.eval(feed_dict={X: X_batch,
                                                    y: y_batch})
            acc_list.append(accuracy_val)
        test_acc = sum(acc_list) / float(len(acc_list))
        print(epoch, "Test accuracy:", test_acc)

    save_path = saver.save(sess, "./my_new_model_final.ckpt")

[array([[[[ 0.1436774 ,  0.06208725,  0.19428511,  0.15965192,  0.09255137,
          -0.15043685,  0.04323363, -0.15209712,  0.13919635, -0.19596508,
          -0.11665009, -0.19295509,  0.08599551,  0.00876857,  0.02424042,
           0.07372688]],

        [[ 0.0965506 ,  0.10263102,  0.08639391, -0.16994804, -0.1674898 ,
          -0.06032021, -0.16857859, -0.12199627, -0.138142  ,  0.0716949 ,
           0.14944614, -0.1142588 , -0.08626974, -0.02654484, -0.18412019,
          -0.07833289]],

        [[ 0.00527871,  0.00831437,  0.11751921, -0.06409185,  0.18480386,
           0.08745472,  0.07106398,  0.14283498, -0.07341319,  0.18218027,
          -0.1375303 ,  0.15108292,  0.135484  , -0.14575592,  0.09163739,
          -0.05395772]]],


       [[[-0.12464728, -0.00929835, -0.17455626, -0.08600647, -0.17162451,
          -0.12354314,  0.19538037,  0.1354145 ,  0.09801404, -0.19382387,
          -0.15673088,  0.07769804,  0.07321508,  0.06267332, -0.11012249,
          -0.155706

In [17]:
pre = [-0.00133609, -0.00257999,  0.00852737,  0.12076698, -0.00141041,
        0.00501976, -0.0043501 , -0.00424061,  0.28234836,  0.05975578,
        0.09724929, -0.0019383 , -0.00523843,  0.00256824, -0.00087943,
       -0.00256327]
post = [-0.00133609, -0.00257999,  0.00852737,  0.12076698, -0.00141041,
        0.00501976, -0.0043501 , -0.00424061,  0.28234836,  0.05975578,
        0.09724929, -0.0019383 , -0.00523843,  0.00256824, -0.00087943,
       -0.00256327]
print(np.allclose(pre,post))

True


## load only certain layers

In [18]:
reset_graph()

#n_inputs = 28 * 28  # MNIST
# n_hidden1 = 300
conv1_fmaps = 16
conv1_ksize = 3
conv1_stride = 1
conv1_pad = "SAME"

n_hidden2 = 40
n_hidden3 = 40
n_hidden4 = 40
n_hidden5 = 40
n_outputs = 10


with tf.name_scope("inputs"):
    X = tf.placeholder(tf.float32, shape=[None, n_inputs], name="X")
    X_reshaped = tf.reshape(X, shape=[-1, 28, 28, 1])
    y = tf.placeholder(tf.int64, shape=(None), name="y")


with tf.name_scope("dnn"):
   # hidden1 = tf.layers.dense(X, n_hidden1, activation=tf.nn.relu, name="hidden1")
    conv1 = tf.layers.conv2d(inputs=X_reshaped, filters=conv1_fmaps, kernel_size=conv1_ksize,
                             strides=conv1_stride, padding=conv1_pad, 
                             activation=tf.nn.relu, 
                             kernel_initializer=tf.contrib.layers.xavier_initializer(), 
                             bias_initializer=tf.zeros_initializer(), trainable=True, 
                             name="conv1")
    flatten = tf.reshape(conv1, shape=[-1, int(28 * 28 * conv1_fmaps)])
    hidden2 = tf.layers.dense(flatten, n_hidden2, activation=tf.nn.relu, name="hidden2")
    hidden3 = tf.layers.dense(hidden2, n_hidden3, activation=tf.nn.relu, name="hidden3")
    hidden4 = tf.layers.dense(hidden3, n_hidden4, activation=tf.nn.relu, name="hidden4")
    hidden5 = tf.layers.dense(hidden4, n_hidden5, activation=tf.nn.relu, name="hidden5")
    
    # logits = tf.layers.dense(hidden5, n_outputs, name="outputs")


with tf.name_scope("output"):
    logits = tf.layers.dense(hidden5, n_outputs, name="outputs")
    Y_proba = tf.nn.softmax(logits, name="Y_proba")

with tf.name_scope("train"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y)
    loss = tf.reduce_mean(xentropy)
    optimizer = tf.train.AdamOptimizer()
    training_op = optimizer.minimize(loss)
    
    
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")

with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")


learning_rate = 0.01
threshold = 1.0



with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    train_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                   scope="hidden[2345]|outputs")
    training_op = optimizer.minimize(loss, var_list=train_vars)



# optimizer = tf.train.GradientDescentOptimizer(learning_rate)
# grads_and_vars = optimizer.compute_gradients(loss)
# capped_gvs = [(tf.clip_by_value(grad, -threshold, threshold), var)
#               for grad, var in grads_and_vars]

# train_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="hidden[2345]|outputs")
# # training_op = optimizer.minimize(loss, var_list=train_vars)
# training_op = optimizer.apply_gradients(capped_gvs, var_list=train_vars)

init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [20]:
reuse_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                               scope="conv[1]") 
reuse_vars_dict = dict([(var.op.name, var) for var in reuse_vars])
restore_saver = tf.train.Saver(reuse_vars_dict)

init = tf.global_variables_initializer()
saver = tf.train.Saver()

with tf.Session() as sess:
    init.run()
    conv1_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                   scope="conv1")
    conv1_init = sess.run(conv1_vars)
    print(conv1_init)
    restore_saver.restore(sess, "./my_model_final.ckpt")
    conv1_restore = sess.run(conv1_vars)
    print(conv1_restore)

    for epoch in range(10):                                      # not shown in the book
        for iteration in range(mnist.train.num_examples // batch_size): # not shown
            X_batch, y_batch = mnist.train.next_batch(batch_size)      # not shown
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})  # not shown
        
        # evaluate test set
        acc_list = []
        for iteration in range(mnist.test.num_examples // batch_size):
            X_batch, y_batch = mnist.test.next_batch(batch_size)
            accuracy_val = accuracy.eval(feed_dict={X: X_batch,
                                                    y: y_batch})
            acc_list.append(accuracy_val)
        test_acc = sum(acc_list) / float(len(acc_list))
        print(epoch, "Test accuracy:", test_acc)                 # not shown

    conv1_after_train = sess.run(conv1_vars)
    print(conv1_after_train)
    save_path = saver.save(sess, "./my_new_model_final.ckpt")

[array([[[[ 0.1436774 ,  0.06208725,  0.19428511,  0.15965192,  0.09255137,
          -0.15043685,  0.04323363, -0.15209712,  0.13919635, -0.19596508,
          -0.11665009, -0.19295509,  0.08599551,  0.00876857,  0.02424042,
           0.07372688]],

        [[ 0.0965506 ,  0.10263102,  0.08639391, -0.16994804, -0.1674898 ,
          -0.06032021, -0.16857859, -0.12199627, -0.138142  ,  0.0716949 ,
           0.14944614, -0.1142588 , -0.08626974, -0.02654484, -0.18412019,
          -0.07833289]],

        [[ 0.00527871,  0.00831437,  0.11751921, -0.06409185,  0.18480386,
           0.08745472,  0.07106398,  0.14283498, -0.07341319,  0.18218027,
          -0.1375303 ,  0.15108292,  0.135484  , -0.14575592,  0.09163739,
          -0.05395772]]],


       [[[-0.12464728, -0.00929835, -0.17455626, -0.08600647, -0.17162451,
          -0.12354314,  0.19538037,  0.1354145 ,  0.09801404, -0.19382387,
          -0.15673088,  0.07769804,  0.07321508,  0.06267332, -0.11012249,
          -0.155706

In [None]:
init = [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.]
pre = [-0.00133609, -0.00257999,  0.00852737,  0.12076698, -0.00141041,
        0.00501976, -0.0043501 , -0.00424061,  0.28234836,  0.05975578,
        0.09724929, -0.0019383 , -0.00523843,  0.00256824, -0.00087943,
       -0.00256327]
post = [-0.00133609, -0.00257999,  0.00852737,  0.12076698, -0.00141041,
        0.00501976, -0.0043501 , -0.00424061,  0.28234836,  0.05975578,
        0.09724929, -0.0019383 , -0.00523843,  0.00256824, -0.00087943,
       -0.00256327]

In [21]:
print(np.allclose(pre, post))

True
