# Batch Normalization:

#### We normalized layers' outputs, before to transform them into activation functions to avoid vanishing/exploding gradients problems during the traning phase<br><br> We also apply zero-centered to outputs. Then we apply scaling and shifting (same two values per layers).<br><br> At the end the model learn the optimale scale and mean of the outputs of each layer.<br> <br>With this techniques we avoid to normalized the input data <br><br> This kind of implementation takes more time for each layer that non implementation.

## --- Construction phase: ---

In [2]:
from datetime import datetime
import numpy as np
from functools import partial
import tensorflow as tf

n_inputs = 28*28  #MNIST dataset.  . 
n_hidden1 = 300  #number of hidden neurons for layer 1 and 2 
n_hidden2 = 100
n_outputs = 10

now = datetime.utcnow().strftime("%Y%m%d%H%M%S") #now = time in a specific format (YYYYMMDDHHMMSS)
root_logdir = "tf_logs"
logdir = "{}/run-{}".format(root_logdir, now ) # The path where we save the data: tf_logs/run-YYYYMMDDHHMMSS

X = tf.placeholder(tf.float32, shape=(None,n_inputs), name = "X")

y = tf.placeholder(tf.int64, shape=(None), name="y")

training = tf.placeholder_with_default(False, shape = (), name = "training") # If we don't provide data to feed placeholder data are feed with "False"

  from ._conv import register_converters as _register_converters


### Creating different layer:

In [3]:
my_batch_norm_layer = partial (tf.layers.batch_normalization, training = training, momentum = 0.9) # Training (True/False).
                                                            # It returns the output in training mode (normalized with stats of current patch)
                                                            # Or inference mode: normalized with moving statistics
                                                            # Momentum: for the moving average(During the training). The average is updated with: vnew<-vnew*0.9 + vold*(1-0.9)
                                                            # Good momentum 0.9, 0.99, 0.999. It increase with larger datasets and smaller mini-batches

with tf.name_scope("dnn_w_bn"):
    hidden1 = tf.layers.dense(X, n_hidden1, name = "hidden1")
    bn1 = my_batch_norm_layer(hidden1)
    bn1_act = tf.nn.elu(bn1)
    hidden2 = tf.layers.dense(bn1_act, n_hidden2, name = "hidden2")
    bn2 = my_batch_norm_layer(hidden2)
    bn2_act = tf.nn.elu(bn2)
    logits_before_bn = tf.layers.dense(bn2_act, n_outputs, name = "outputs")
    logits = my_batch_norm_layer(logits_before_bn)

### Calculating the Cost function (Cross Entropy)/ Gradient Optimizer / Accuracy:

In [4]:
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits = logits)
    # It calculates the cross entropy based on the "logits" (the outpute before going through the softmax activation function)
    # It expects labels in the form of integers ranging from 0 to the number of classes minus 1 (0-9)
    #The output is 1D tensor with the cross entropy for each istance.
    loss = tf.reduce_mean(xentropy, name="loss") #it computes the mean cross entropy over all instances

learning_rate = 0.01

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)

with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y,1) #For each instance it determines if the nn's prediction is correct by checking wheter 
                                          #or not the highest logit corresponds to the target class
                                          #It returns a 1d Tensor full of boolean values
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32)) 

### Inizializing / save phase:

In [5]:
init = tf.global_variables_initializer() #We create a node to initialize all variables
saver = tf.train.Saver() # We will create a Saver to save our trained model parameters to disk

loss_summary = tf.summary.scalar('LOSS', loss) # Creates a node in the graph that will evaluate the MSE value and writes it to a TensoBoard-compatible binary log string called Summary
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph()) # We create a filewriter that you use to write summaries to logfiles in the log directory
                                                                    # logdir = the patph of the logdir diretory. Second parameter = the graph we want to visualize

## --- Execution phase: ---

#### Importing data:

In [7]:
from  tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/") # the minst object has different method. Some methods can extract 
                                                             # the training istance (50k instance), a validation set(5k instances)
                                                             # a test set(10k)
n_epochs = 40  # numbers of eboch we want to run
batch_size = 50 

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


#### Finally we begin the execution phase!!!

In [13]:
extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) # Batch_normalization creates few operations that must be evaluated
                                         # at each step during training in order tu update the moving averages (moving averages are 
                                         # needed in order to evaluate the training set's mean and standard deviation). These operations
                                         # are automatically added to UPDATE_OPS collection. So we get the list of operations in that collection
                                         # and run them at each training iteration.

n_batches = mnist.train.num_examples // batch_size

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            summary_str = loss_summary.eval(feed_dict={X: X_batch, y: y_batch})
            step = epoch * n_batches + iteration  
            file_writer.add_summary(summary_str, step)
            sess.run([training_op, extra_update_ops],feed_dict={training: True, X: X_batch, y: y_batch})
        acc_train = accuracy.eval(feed_dict = {X: X_batch, y: y_batch})
        accuracy_val = accuracy.eval(feed_dict= {X: mnist.test.images, y: mnist.test.labels})
        print(epoch,"Train accuracy",acc_train, "Test accuracy:" , accuracy_val)
        save_patch = saver.save(sess, "/tmp/my_model_final3.ckpt")
        file_writer.close()

# For running Tensorboard, in the Anaconda Prompt IN /Progetto Forex/Python (our working directory) -> tensorboard --logdir tf_logs

0 Train accuracy 0.94 Test accuracy: 0.924
1 Train accuracy 0.96 Test accuracy: 0.9461
2 Train accuracy 0.96 Test accuracy: 0.9559
3 Train accuracy 1.0 Test accuracy: 0.9617
4 Train accuracy 0.96 Test accuracy: 0.9656
5 Train accuracy 1.0 Test accuracy: 0.9674
6 Train accuracy 0.98 Test accuracy: 0.9686
7 Train accuracy 1.0 Test accuracy: 0.9724
8 Train accuracy 1.0 Test accuracy: 0.9735
9 Train accuracy 1.0 Test accuracy: 0.9728
10 Train accuracy 0.98 Test accuracy: 0.9742
11 Train accuracy 1.0 Test accuracy: 0.9759
12 Train accuracy 1.0 Test accuracy: 0.9756
13 Train accuracy 1.0 Test accuracy: 0.976
14 Train accuracy 1.0 Test accuracy: 0.9776
15 Train accuracy 1.0 Test accuracy: 0.9765
16 Train accuracy 1.0 Test accuracy: 0.9785
17 Train accuracy 1.0 Test accuracy: 0.977
18 Train accuracy 1.0 Test accuracy: 0.9801
19 Train accuracy 1.0 Test accuracy: 0.98
20 Train accuracy 1.0 Test accuracy: 0.9788
21 Train accuracy 1.0 Test accuracy: 0.9795
22 Train accuracy 1.0 Test accuracy: 0.98

#### Printing the list of operations in the graph :

In [14]:
for op in tf.get_default_graph().get_operations():
    print(op.name)

X
y
training/input
training
hidden1/kernel/Initializer/random_uniform/shape
hidden1/kernel/Initializer/random_uniform/min
hidden1/kernel/Initializer/random_uniform/max
hidden1/kernel/Initializer/random_uniform/RandomUniform
hidden1/kernel/Initializer/random_uniform/sub
hidden1/kernel/Initializer/random_uniform/mul
hidden1/kernel/Initializer/random_uniform
hidden1/kernel
hidden1/kernel/Assign
hidden1/kernel/read
hidden1/bias/Initializer/zeros
hidden1/bias
hidden1/bias/Assign
hidden1/bias/read
dnn_w_bn/hidden1/MatMul
dnn_w_bn/hidden1/BiasAdd
batch_normalization/gamma/Initializer/ones
batch_normalization/gamma
batch_normalization/gamma/Assign
batch_normalization/gamma/read
batch_normalization/beta/Initializer/zeros
batch_normalization/beta
batch_normalization/beta/Assign
batch_normalization/beta/read
batch_normalization/moving_mean/Initializer/zeros
batch_normalization/moving_mean
batch_normalization/moving_mean/Assign
batch_normalization/moving_mean/read
batch_normalization/moving_varian