# Xavier and He Initialization

In [1]:
import tensorflow as tf
import numpy as np
from datetime import datetime
import os

tf.AUTO_REUSE

<_ReuseMode.AUTO_REUSE: 1>

In [2]:
tf.reset_default_graph()

In [3]:
#he_init= tf.variance_scaling_initializer()
#hidden1= tf.layers.dense(X, n_hidden, activation= tf.nn.relu, kernel_initializer=he_init, name="hidden1")

# Leaky ReLU

In [5]:
def leaky_relu(z, name=None):
    return tf.maximum(0.01 * z, z, name=name)

he_init= tf.variance_scaling_initializer()
#hidden1=tf.layers.dense(X, n_hidden, activation= leaky_relu, kernel_initializer=he_init, name="hidden1")

In [6]:
tf.reset_default_graph()


n_inputs= 28*28
n_hidden1=300
n_hidden2=100
n_outputs= 10

In [7]:
X= tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y= tf.placeholder(tf.int32, shape=(None), name="y")

In [8]:
with tf.name_scope("dnn"):
    hidden1= tf.layers.dense(X, n_hidden1, activation=leaky_relu, kernel_initializer=he_init, name="hidden1")
    hidden2= tf.layers.dense(hidden1, n_hidden2, activation=leaky_relu, kernel_initializer=he_init, name="hidden2")
    logits= tf.layers.dense(hidden2, n_outputs, name="output")

In [9]:
with tf.name_scope("loss"):
    xentropy= tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss= tf.reduce_mean(xentropy, name="cross_entropy")
    loss_summary= tf.summary.scalar('log_loss', loss)

In [10]:
with tf.name_scope("eval"):
    correct= tf.nn.in_top_k(logits, y, 1)
    accuracy= tf.reduce_mean(tf.cast(correct, tf.float32))
    accuracy_summary= tf.summary.scalar('log_eval', accuracy)

In [11]:
lr=0.01

with tf.name_scope("train"):
    optimizer= tf.train.GradientDescentOptimizer(lr)
    training_op= optimizer.minimize(loss)

In [12]:
init= tf.global_variables_initializer()
saver= tf.train.Saver()

In [13]:
def log_dir(prefix=""):
    now= datetime.utcnow().strftime("%Y%M%D%H%M%S")
    root_logdir= "C:/Users/soumyama/Documents/Python Scripts/Scikit Learn/tf_logs"
    if prefix:
        prefix +="-"
    name= prefix + "run-" + now
    return "{}/{}/".format(root_logdir, name)

In [14]:
logdir= log_dir("MNIST_dnn")

In [15]:
file_writer= tf.summary.FileWriter(logdir, tf.get_default_graph())

In [16]:
(X_train, y_train), (X_test, y_test)= tf.keras.datasets.mnist.load_data()
X_train= X_train.reshape(-1, 784).astype(np.float32)/255.
X_test= X_test.reshape(-1, 784).astype(np.float32)/255.
y_train= y_train.astype(np.int32)
y_test= y_test.astype(np.int32)

X_valid, y_valid= X_train[:5000], y_train[:5000]
X_train, y_train= X_train[5000:], y_train[5000:]

In [17]:
def shuffle_batch(X, y, batch_size):
    rnd_idx= np.random.permutation(len(X))
    n_batches= len(X) //batch_size
    for batch_idx in np.array_split(rnd_idx, n_batches):
        X_batch, y_batch= X[batch_idx], y[batch_idx]
        yield X_batch, y_batch

In [18]:
checkpoint_path= "C:/Users/soumyama/Documents/Python Scripts/Scikit Learn/checkpoint/mnist_model.ckpt"
checkpoint_epoch_path= checkpoint_path + ".epoch"
final_model_path= "./deep_mnist_model/" 

In [19]:
n_epochs=500
batch_size=128

best_loss= np.infty
epochs_without_progress=0
max_epochs_without_progress=50

In [20]:
with tf.Session() as sess:
    if os.path.isfile(checkpoint_epoch_path):
        with open(checkpoint_epoch_path, "rb") as f:
            start_epoch= int(f.read())
        print("Training was interrupted. Resuming from Epoch: ", start_epoch)
        saver.restore(sess, checkpoint_path)
    else:
        start_epoch=0
        sess.run(init)
        
    for epoch in range(start_epoch, n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        accuracy_val, loss_val, accuracy_summary_str, loss_summary_str= sess.run([accuracy, loss, accuracy_summary,
                                                                                  loss_summary], feed_dict={X: X_valid,
                                                                                                          y: y_valid})
        file_writer.add_summary(accuracy_summary_str, epoch)
        file_writer.add_summary(loss_summary_str, epoch)
        if epoch % 1 == 0:
            print("Epoch: ", epoch, "\tValidation Loss: {:.3f}".format(loss_val), 
                  "\tValidation Accuracy: {:.3f}%".format(accuracy_val*100))
            saver.save(sess, checkpoint_path)
            with open(checkpoint_epoch_path, "wb") as f:
                f.write(b"%d" % (epoch+1))
            if loss_val < best_loss:
                saver.save(sess, final_model_path)
                best_loss= loss_val
            else:
                epochs_without_progress+=5
                if epochs_without_progress> max_epochs_without_progress:
                    print("Early Stopping")
                    break

Epoch:  0 	Validation Loss: 0.680 	Validation Accuracy: 85.160%
Epoch:  1 	Validation Loss: 0.428 	Validation Accuracy: 88.960%
Epoch:  2 	Validation Loss: 0.356 	Validation Accuracy: 90.260%
Epoch:  3 	Validation Loss: 0.322 	Validation Accuracy: 91.140%
Epoch:  4 	Validation Loss: 0.297 	Validation Accuracy: 91.720%
Epoch:  5 	Validation Loss: 0.280 	Validation Accuracy: 92.360%
Epoch:  6 	Validation Loss: 0.266 	Validation Accuracy: 92.820%
Epoch:  7 	Validation Loss: 0.253 	Validation Accuracy: 93.080%
Epoch:  8 	Validation Loss: 0.244 	Validation Accuracy: 93.400%
Epoch:  9 	Validation Loss: 0.235 	Validation Accuracy: 93.440%
Epoch:  10 	Validation Loss: 0.225 	Validation Accuracy: 93.840%
Epoch:  11 	Validation Loss: 0.217 	Validation Accuracy: 94.040%
Epoch:  12 	Validation Loss: 0.209 	Validation Accuracy: 94.280%
Epoch:  13 	Validation Loss: 0.202 	Validation Accuracy: 94.440%
Epoch:  14 	Validation Loss: 0.196 	Validation Accuracy: 94.680%
Epoch:  15 	Validation Loss: 0.190 

![png (2)](https://user-images.githubusercontent.com/13174586/54606356-a0bcd300-4a71-11e9-8869-5e57b73bff86.png)