# The Libraries that I used and the MNIST dataset

In [3]:
import tensorflow as tf
import numpy as np
from sklearn.preprocessing import StandardScaler

from __future__ import absolute_import, division, print_function
import numbers
from tensorflow.contrib import layers
from tensorflow.python.framework import ops
from tensorflow.python.framework import tensor_shape
from tensorflow.python.framework import tensor_util
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import random_ops
from tensorflow.python.ops import array_ops
from tensorflow.python.layers import utils


# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)

Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.
Extracting /tmp/data/train-images-idx3-ubyte.gz
Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Successfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


# Parameters

In [4]:
learning_rate = 0.05 # learning rate for Gradient Descent Optimizer
training_epochs = 20 #  forward pass and backward pass of all the training examples 
batch_size = 100 # the number of training examples in forward/backward pass.
display_step = 1 # displays the epochs one by one 
drop_out=0.5 # drop out rate for RELU,Sigmoid 
n_classes = 10 # MNIST total classes (0-9 digits)

# tf Graph input
x = tf.placeholder("float", [None, 784])
y = tf.placeholder("float", [None, n_classes])
dropoutRate = tf.placeholder(tf.float32)
is_training= tf.placeholder(tf.bool)

# Selu activation function

In [5]:
def selu(x):
    with ops.name_scope('elu') as scope:
        alpha = 1.6732632423543772848170429916717
        scale = 1.0507009873554804934193349852946
        return scale*tf.where(x>=0.0, x, alpha*tf.nn.elu(x))


# Drop out for the Selu activation function

In [6]:
def dropout_selu(x, rate, alpha= -1.7580993408473766, fixedPointMean=0.0, fixedPointVar=1.0, 
                 noise_shape=None, seed=None, name=None, training=False):
    """Dropout to a value with rescaling."""

    def dropout_selu_impl(x, rate, alpha, noise_shape, seed, name):
        keep_prob = 1.0 - rate
        x = ops.convert_to_tensor(x, name="x")
        if isinstance(keep_prob, numbers.Real) and not 0 < keep_prob <= 1:
            raise ValueError("keep_prob must be a scalar tensor or a float in the "
                                             "range (0, 1], got %g" % keep_prob)
        keep_prob = ops.convert_to_tensor(keep_prob, dtype=x.dtype, name="keep_prob")
        keep_prob.get_shape().assert_is_compatible_with(tensor_shape.scalar())

        alpha = ops.convert_to_tensor(alpha, dtype=x.dtype, name="alpha")
        keep_prob.get_shape().assert_is_compatible_with(tensor_shape.scalar())

        if tensor_util.constant_value(keep_prob) == 1:
            return x

        noise_shape = noise_shape if noise_shape is not None else array_ops.shape(x)
        random_tensor = keep_prob
        random_tensor += random_ops.random_uniform(noise_shape, seed=seed, dtype=x.dtype)
        binary_tensor = math_ops.floor(random_tensor)
        ret = x * binary_tensor + alpha * (1-binary_tensor)

        a = tf.sqrt(fixedPointVar / (keep_prob *((1-keep_prob) * tf.pow(alpha-fixedPointMean,2) + fixedPointVar)))

        b = fixedPointMean - a * (keep_prob * fixedPointMean + (1 - keep_prob) * alpha)
        ret = a * ret + b
        ret.set_shape(x.get_shape())
        return ret

    with ops.name_scope(name, "dropout", [x]) as name:
        return utils.smart_cond(training,
            lambda: dropout_selu_impl(x, rate, alpha, noise_shape, seed, name),
            lambda: array_ops.identity(x))

# Scale input to zero mean and unit variance

In [7]:
scaler = StandardScaler().fit(mnist.train.images)

# Tensorboard to read the summarized data

In [8]:
logs_path = '~/tmp'

# Neuarl network model for Relu/Sigmoid activation functionwith or without drop out

In [29]:
def nn_model(x, layers, layer_nodes, n_classes,rate, is_training):
    layers_list = []
    input_layer = {'weights':tf.Variable(tf.random_normal([784, layer_nodes],stddev=np.sqrt(1/784))),
                      'biases':tf.Variable(tf.random_normal([layer_nodes],stddev=0))}

    output_layer = {'weights':tf.Variable(tf.random_normal([layer_nodes, n_classes],stddev=np.sqrt(1/layer_nodes))),
                    'biases':tf.Variable(tf.random_normal([n_classes],stddev=0))}
    if layers-1 > 0:
        l = tf.add(tf.matmul(x,input_layer['weights']), input_layer['biases'])
        l = tf.nn.sigmoid(l)
        l = tf.nn.dropout(l,drop_out)
        #l = dropout_selu(l,rate, training=is_training)
        
        for i in range(layers-1):
            
            hidden_layer = {'weights':tf.Variable(tf.random_normal([layer_nodes, layer_nodes],stddev=np.sqrt(1/layer_nodes))),
                  'biases':tf.Variable(tf.random_normal([layer_nodes],stddev=0))}
            
            l = tf.add(tf.matmul(l,hidden_layer['weights']), hidden_layer['biases'])
            l = tf.nn.sigmoid(l)
            l = tf.nn.dropout(l,drop_out)
            #l = dropout_selu(l,rate, training=is_training)
        
        l = tf.matmul(l,output_layer['weights']) + output_layer['biases']
        return l
    return None

# Neuarl network model for Selu activation function with or without drop out

In [9]:
def nn_model(x, layers, layer_nodes, n_classes,rate, is_training):
    layers_list = []
    input_layer = {'weights':tf.Variable(tf.random_normal([784, layer_nodes],stddev=np.sqrt(1/784))),
                      'biases':tf.Variable(tf.random_normal([layer_nodes],stddev=0))}

    output_layer = {'weights':tf.Variable(tf.random_normal([layer_nodes, n_classes],stddev=np.sqrt(1/layer_nodes))),
                    'biases':tf.Variable(tf.random_normal([n_classes],stddev=0))}
    if layers-1 > 0:
        l = tf.add(tf.matmul(x,input_layer['weights']), input_layer['biases'])
        l = selu(l)
        #l = dropout_selu(l,rate, training=is_training)
        
        for i in range(layers-1):
            
            hidden_layer = {'weights':tf.Variable(tf.random_normal([layer_nodes, layer_nodes],stddev=np.sqrt(1/layer_nodes))),
                  'biases':tf.Variable(tf.random_normal([layer_nodes],stddev=0))}
            
            l = tf.add(tf.matmul(l,hidden_layer['weights']), hidden_layer['biases'])
            l = selu(l)
            #l = dropout_selu(l,rate, training=is_training)
        
        l = tf.matmul(l,output_layer['weights']) + output_layer['biases']
        return l
    return None

# Construct, define loss and optimizer, test, calculate accuracy, iniyializing the variables of the model 

In [10]:
# Construct model
pred = nn_model(x, 17, 784, n_classes,rate=dropoutRate, is_training= is_training)

# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost)

 # Test model
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
# Calculate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
         
# Initializing the variables
init = tf.global_variables_initializer()

# summary for loss  and accuracy then merge them

In [11]:
# Create a summary to monitor cost tensor
tf.summary.scalar("loss", cost)
# Create a summary to monitor accuracy tensor
tf.summary.scalar("accuracy", accuracy)
# Merge all summaries into a single op
merged_summary_op = tf.summary.merge_all()

#  The graph that shows number of epoch, cost,training accuracy and loss, Validation accuracy and loss

In [12]:
# Launch the graph
gpu_options = tf.GPUOptions(allow_growth=True)
with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
    sess.run(init)

    summary_writer = tf.summary.FileWriter(logs_path, graph=tf.get_default_graph())

    # Training cycle
    for epoch in range(training_epochs):
        avg_cost = 0.
        total_batch = int(mnist.train.num_examples/batch_size)
        # Loop over all batches
        for i in range(total_batch):
            batch_x, batch_y = mnist.train.next_batch(batch_size)
            batch_x = scaler.transform(batch_x)
            # Run optimization op (backprop) and cost op (to get loss value)
            _, c = sess.run([optimizer, cost], feed_dict={x: batch_x,
                                                          y: batch_y, dropoutRate: 0.05, is_training:True})

            # Compute average loss
            avg_cost += c / total_batch
        # Display logs per epoch step
        if epoch % display_step == 0:
            print ("Epoch:", '%04d' % (epoch+1), "cost=","{:.9f}".format(avg_cost))
            
            accTrain, costTrain, summary = sess.run([accuracy, cost, merged_summary_op], 
                                                        feed_dict={x: batch_x, y: batch_y, 
                                                                   dropoutRate: 0.0, is_training:False})
            summary_writer.add_summary(summary, epoch)
            
            print("Train-Accuracy:", accTrain,"Train-Loss:", costTrain)

            batch_x_test, batch_y_test = mnist.test.next_batch(512)
            batch_x_test = scaler.transform(batch_x_test)

            accTest, costVal = sess.run([accuracy, cost], feed_dict={x: batch_x_test, y: batch_y_test, 
                                                                   dropoutRate: 0.0, is_training:False})

            print("Validation-Accuracy:", accTest,"Val-Loss:", costVal,"\n")

Epoch: 0001 cost= nan
Train-Accuracy: 0.09 Train-Loss: nan
Validation-Accuracy: 0.0917969 Val-Loss: nan 

Epoch: 0002 cost= nan
Train-Accuracy: 0.14 Train-Loss: nan
Validation-Accuracy: 0.0859375 Val-Loss: nan 

Epoch: 0003 cost= nan
Train-Accuracy: 0.06 Train-Loss: nan
Validation-Accuracy: 0.0898438 Val-Loss: nan 

Epoch: 0004 cost= nan
Train-Accuracy: 0.17 Train-Loss: nan
Validation-Accuracy: 0.111328 Val-Loss: nan 

Epoch: 0005 cost= nan
Train-Accuracy: 0.06 Train-Loss: nan
Validation-Accuracy: 0.09375 Val-Loss: nan 

Epoch: 0006 cost= nan
Train-Accuracy: 0.12 Train-Loss: nan
Validation-Accuracy: 0.09375 Val-Loss: nan 

Epoch: 0007 cost= nan
Train-Accuracy: 0.06 Train-Loss: nan
Validation-Accuracy: 0.119141 Val-Loss: nan 

Epoch: 0008 cost= nan
Train-Accuracy: 0.2 Train-Loss: nan
Validation-Accuracy: 0.0761719 Val-Loss: nan 

Epoch: 0009 cost= nan
Train-Accuracy: 0.08 Train-Loss: nan
Validation-Accuracy: 0.0761719 Val-Loss: nan 

Epoch: 0010 cost= nan
Train-Accuracy: 0.15 Train-Loss