## Imports

In [30]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import math

import argparse
import os.path
import sys
import time
import numpy as np

from six.moves import xrange  # pylint: disable=redefined-builtin
import tensorflow as tf

from tensorflow.examples.tutorials.mnist import input_data
#from tensorflow.examples.tutorials.mnist import mnist

## Constants

In [5]:
# The MNIST dataset has 10 classes, representing the digits 0 through 9.
NUM_CLASSES = 10

# The MNIST images are always 28x28 pixels.
IMAGE_SIZE = 28
IMAGE_PIXELS = IMAGE_SIZE * IMAGE_SIZE

class Flags:
    def __init__(self):
        self.learning_rate = 0.01
        self.max_steps = 2000
        self.hidden1 = 128
        self.hidden2 = 32
        self.batch_size = 100
        #self.input_data_dir = '/tmp/tensorflow/mnist/input_data'
        self.input_data_dir = '/notebooks/datasets/mnist'
        self.log_dir = '/tmp/tensorflow/mnist/logs/fully_connected_feed'
        self.fake_data = False
        

FLAGS = Flags()




## Model

In [None]:
def inference(images, hidden1_units, hidden2_units):
    """Build the MNIST model up to where it may be used for inference. """
    # Hidden 1
    with tf.name_scope('hidden1'):
        weights = tf.Variable(tf.truncated_normal([IMAGE_PIXELS, hidden1_units], stddev=1.0 / math.sqrt(float(IMAGE_PIXELS))),
            name='weights')
        biases = tf.Variable(tf.zeros([hidden1_units]),name='biases')
        hidden1 = tf.nn.relu(tf.matmul(images, weights) + biases)
    # Hidden 2
    with tf.name_scope('hidden2'):
        weights = tf.Variable(tf.truncated_normal([hidden1_units, hidden2_units],stddev=1.0 / math.sqrt(float(hidden1_units))), 
            name='weights')
        biases = tf.Variable(tf.zeros([hidden2_units]), name='biases')
        hidden2 = tf.nn.relu(tf.matmul(hidden1, weights) + biases)
    # Linear
    with tf.name_scope('softmax_linear'):
        weights = tf.Variable(
            tf.truncated_normal([hidden2_units, NUM_CLASSES], stddev=1.0 / math.sqrt(float(hidden2_units))), name='weights')
        biases = tf.Variable(tf.zeros([NUM_CLASSES]), name='biases')
        logits = tf.matmul(hidden2, weights) + biases
    return logits


def calculate_loss(logits, labels):
    """Calculates the loss from the logits and the labels."""
    labels = tf.to_int64(labels)
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits, name='xentropy')
    return tf.reduce_mean(cross_entropy, name='xentropy_mean')


def training(loss, learning_rate):
    """Sets up the training Ops. """
    # Add a scalar summary for the snapshot loss.
    tf.summary.scalar('loss', loss)
    # Create the gradient descent optimizer with the given learning rate.
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    # Create a variable to track the global step.
    global_step = tf.Variable(0, name='global_step', trainable=False)
    # Use the optimizer to apply the gradients that minimize the loss
    # (and also increment the global step counter) as a single training step.
    train_op = optimizer.minimize(loss, global_step=global_step)
    return train_op


def evaluation(logits, labels):
    """Evaluate the quality of the logits at predicting the label."""
    correct = tf.nn.in_top_k(logits, labels, 1)
    return tf.reduce_sum(tf.cast(correct, tf.int32))

## Train

In [9]:
tf.reset_default_graph()

def placeholder_inputs(batch_size):
    """Generate placeholder variables to represent the input tensors. """
    images_placeholder = tf.placeholder(tf.float32, shape=(batch_size, IMAGE_PIXELS))
    labels_placeholder = tf.placeholder(tf.int32, shape=(batch_size))
    return images_placeholder, labels_placeholder


def fill_feed_dict(data_set, images_pl, labels_pl):
    """Fills the feed_dict for training the given step."""
    images_feed, labels_feed = data_set.next_batch(FLAGS.batch_size,
                                                 FLAGS.fake_data)
    feed_dict = {
      images_pl: images_feed,
      labels_pl: labels_feed,
    }
    return feed_dict

def do_eval(sess,
            eval_correct,
            images_placeholder,
            labels_placeholder,
            data_set):
    """Runs one evaluation against the full epoch of data."""
    # And run one epoch of eval.
    true_count = 0  # Counts the number of correct predictions.
    steps_per_epoch = data_set.num_examples // FLAGS.batch_size
    num_examples = steps_per_epoch * FLAGS.batch_size
    for step in xrange(steps_per_epoch):
        feed_dict = fill_feed_dict(data_set,images_placeholder,labels_placeholder)
        true_count += sess.run(eval_correct, feed_dict=feed_dict)
        precision = float(true_count) / num_examples
    print('  Num examples: %d  Num correct: %d  Precision @ 1: %0.04f' % (num_examples, true_count, precision))
    

def run_training():
    """Train MNIST for a number of steps."""
    data_sets = input_data.read_data_sets(FLAGS.input_data_dir, FLAGS.fake_data)

    with tf.Graph().as_default():
        images_placeholder, labels_placeholder = placeholder_inputs(FLAGS.batch_size)

        logits = inference(images_placeholder,
                                 FLAGS.hidden1,
                                 FLAGS.hidden2)
        loss = calculate_loss(logits, labels_placeholder)
        train_op = training(loss, FLAGS.learning_rate)
        eval_correct = evaluation(logits, labels_placeholder)
        summary = tf.summary.merge_all()
        init = tf.global_variables_initializer()
        saver = tf.train.Saver()
        sess = tf.Session()
        summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph)
        sess.run(init)

        for step in xrange(FLAGS.max_steps):
            start_time = time.time()
            feed_dict = fill_feed_dict(data_sets.train,
                                     images_placeholder,
                                     labels_placeholder)
            _, loss_value = sess.run([train_op, loss],
                                   feed_dict=feed_dict)

            duration = time.time() - start_time
            if step % 100 == 0:
                print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration))
                summary_str = sess.run(summary, feed_dict=feed_dict)
                summary_writer.add_summary(summary_str, step)
                summary_writer.flush()

            if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps:
                checkpoint_file = os.path.join(FLAGS.log_dir, 'model.ckpt')
                saver.save(sess, checkpoint_file, global_step=step)
                print('Training Data Eval:')
                do_eval(sess,
                        eval_correct,
                        images_placeholder,
                        labels_placeholder,
                        data_sets.train)
                print('Validation Data Eval:')
                do_eval(sess,
                        eval_correct,
                        images_placeholder,
                        labels_placeholder,
                        data_sets.validation)
                print('Test Data Eval:')
                do_eval(sess,
                        eval_correct,
                        images_placeholder,
                        labels_placeholder,
                        data_sets.test)
                
if tf.gfile.Exists(FLAGS.log_dir):
    tf.gfile.DeleteRecursively(FLAGS.log_dir)
    tf.gfile.MakeDirs(FLAGS.log_dir)
run_training()

Extracting /tmp/tensorflow/mnist/input_data/train-images-idx3-ubyte.gz
Extracting /tmp/tensorflow/mnist/input_data/train-labels-idx1-ubyte.gz
Extracting /tmp/tensorflow/mnist/input_data/t10k-images-idx3-ubyte.gz
Extracting /tmp/tensorflow/mnist/input_data/t10k-labels-idx1-ubyte.gz
Step 0: loss = 2.30 (0.142 sec)
Step 100: loss = 2.14 (0.002 sec)
Step 200: loss = 1.85 (0.002 sec)
Step 300: loss = 1.60 (0.002 sec)
Step 400: loss = 1.26 (0.002 sec)
Step 500: loss = 0.82 (0.002 sec)
Step 600: loss = 0.78 (0.002 sec)
Step 700: loss = 0.72 (0.002 sec)
Step 800: loss = 0.55 (0.002 sec)
Step 900: loss = 0.48 (0.002 sec)
Training Data Eval:
  Num examples: 55000  Num correct: 47839  Precision @ 1: 0.8698
Validation Data Eval:
  Num examples: 5000  Num correct: 4393  Precision @ 1: 0.8786
Test Data Eval:
  Num examples: 10000  Num correct: 8775  Precision @ 1: 0.8775
Step 1000: loss = 0.66 (0.003 sec)
Step 1100: loss = 0.50 (0.076 sec)
Step 1200: loss = 0.49 (0.002 sec)
Step 1300: loss = 0.46 (0

## extracting code from functions

In [12]:
def do_eval(sess,
            eval_correct,
            images_placeholder,
            labels_placeholder,
            data_set):
    """Runs one evaluation against the full epoch of data."""
    # And run one epoch of eval.
    true_count = 0  # Counts the number of correct predictions.
    steps_per_epoch = data_set.num_examples // FLAGS.batch_size
    num_examples = steps_per_epoch * FLAGS.batch_size
    for step in xrange(steps_per_epoch):
        feed_dict = fill_feed_dict(data_set,images_placeholder,labels_placeholder)
        true_count += sess.run(eval_correct, feed_dict=feed_dict)
        precision = float(true_count) / num_examples
    print('  Num examples: %d  Num correct: %d  Precision @ 1: %0.04f' % (num_examples, true_count, precision))


tf.reset_default_graph()

if tf.gfile.Exists(FLAGS.log_dir):
    tf.gfile.DeleteRecursively(FLAGS.log_dir)
    tf.gfile.MakeDirs(FLAGS.log_dir)
    
with tf.Graph().as_default():
    """Generate placeholder variables to represent the input tensors. """
    images_placeholder = tf.placeholder(tf.float32, shape=(FLAGS.batch_size, IMAGE_PIXELS))
    labels_placeholder = tf.placeholder(tf.int32, shape=(FLAGS.batch_size))
    
    """Build the MNIST model up to where it may be used for inference. """
    # Hidden 1
    with tf.name_scope('hidden1'):
        weights1 = tf.Variable(tf.truncated_normal([IMAGE_PIXELS, FLAGS.hidden1], stddev=1.0 / math.sqrt(float(IMAGE_PIXELS))),
            name='weights')
        biases1 = tf.Variable(tf.zeros([FLAGS.hidden1]),name='biases')
        hidden1 = tf.nn.relu(tf.matmul(images_placeholder, weights1) + biases1)
    # Hidden 2
    with tf.name_scope('hidden2'):
        weights2 = tf.Variable(tf.truncated_normal([FLAGS.hidden1, FLAGS.hidden2],stddev=1.0 / math.sqrt(float(FLAGS.hidden1))), 
            name='weights')
        biases2 = tf.Variable(tf.zeros([FLAGS.hidden2]), name='biases')
        hidden2 = tf.nn.relu(tf.matmul(hidden1, weights2) + biases2)
    # Linear
    with tf.name_scope('softmax_linear'):
        weights3 = tf.Variable(
            tf.truncated_normal([FLAGS.hidden2, NUM_CLASSES], stddev=1.0 / math.sqrt(float(FLAGS.hidden2))), name='weights')
        biases3 = tf.Variable(tf.zeros([NUM_CLASSES]), name='biases')
        logits = tf.matmul(hidden2, weights3) + biases3
        
    """Calculates the loss from the logits and the labels."""
    labels = tf.to_int64(labels_placeholder)
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits, name='xentropy')
    loss = tf.reduce_mean(cross_entropy, name='xentropy_mean')
    
    """Sets up the training Ops. """
    tf.summary.scalar('loss', loss)
    optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate)
    global_step = tf.Variable(0, name='global_step', trainable=False)
    train_op = optimizer.minimize(loss, global_step=global_step)
    
    """Evaluate the quality of the logits at predicting the label."""
    correct = tf.nn.in_top_k(logits, labels_placeholder, 1)
    eval_correct = tf.reduce_sum(tf.cast(correct, tf.int32))
    
    """Prepare session, dataset and environment"""
    data_sets = input_data.read_data_sets(FLAGS.input_data_dir, FLAGS.fake_data)
    summary = tf.summary.merge_all()
    saver = tf.train.Saver()
    sess = tf.Session()
    summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph)
    sess.run(tf.global_variables_initializer())
    
    """Train"""
    for step in xrange(FLAGS.max_steps):
        start_time = time.time()
        images_feed, labels_feed = data_sets.train.next_batch(FLAGS.batch_size, FLAGS.fake_data)
        feed_dict = {
          images_placeholder: images_feed,
          labels_placeholder: labels_feed,
        }
        _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict)
        
        duration = time.time() - start_time
        
        if step % 100 == 0:
            print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration))
            summary_str = sess.run(summary, feed_dict=feed_dict)
            summary_writer.add_summary(summary_str, step)
            summary_writer.flush()
            
        if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps:
            checkpoint_file = os.path.join(FLAGS.log_dir, 'model.ckpt')
            saver.save(sess, checkpoint_file, global_step=step)
            """I kept the do_eval() function cause I don't see the point in rewriting the same stuff again and gain so..."""
            print('Training Data Eval:')
            do_eval(sess,
                    eval_correct,
                    images_placeholder,
                    labels_placeholder,
                    data_sets.train)
            print('Validation Data Eval:')
            do_eval(sess,
                    eval_correct,
                    images_placeholder,
                    labels_placeholder,
                    data_sets.validation)
            print('Test Data Eval:')
            do_eval(sess,
                    eval_correct,
                    images_placeholder,
                    labels_placeholder,
                    data_sets.test)

Extracting /tmp/tensorflow/mnist/input_data/train-images-idx3-ubyte.gz
Extracting /tmp/tensorflow/mnist/input_data/train-labels-idx1-ubyte.gz
Extracting /tmp/tensorflow/mnist/input_data/t10k-images-idx3-ubyte.gz
Extracting /tmp/tensorflow/mnist/input_data/t10k-labels-idx1-ubyte.gz
Step 0: loss = 2.30 (0.010 sec)
Step 100: loss = 2.17 (0.002 sec)
Step 200: loss = 1.90 (0.002 sec)
Step 300: loss = 1.58 (0.002 sec)
Step 400: loss = 1.31 (0.002 sec)
Step 500: loss = 0.86 (0.002 sec)
Step 600: loss = 0.79 (0.002 sec)
Step 700: loss = 0.61 (0.002 sec)
Step 800: loss = 0.58 (0.002 sec)
Step 900: loss = 0.78 (0.002 sec)
Training Data Eval:
  Num examples: 55000  Num correct: 46705  Precision @ 1: 0.8492
Validation Data Eval:
  Num examples: 5000  Num correct: 4292  Precision @ 1: 0.8584
Test Data Eval:
  Num examples: 10000  Num correct: 8569  Precision @ 1: 0.8569
Step 1000: loss = 0.56 (0.003 sec)
Step 1100: loss = 0.55 (0.078 sec)
Step 1200: loss = 0.44 (0.002 sec)
Step 1300: loss = 0.65 (0

## Gradients analysis

In [39]:
def do_eval(sess,
            eval_correct,
            images_placeholder,
            labels_placeholder,
            data_set):
    """Runs one evaluation against the full epoch of data."""
    # And run one epoch of eval.
    true_count = 0  # Counts the number of correct predictions.
    steps_per_epoch = data_set.num_examples // FLAGS.batch_size
    num_examples = steps_per_epoch * FLAGS.batch_size
    for step in xrange(steps_per_epoch):
        feed_dict = fill_feed_dict(data_set,images_placeholder,labels_placeholder)
        true_count += sess.run(eval_correct, feed_dict=feed_dict)
        precision = float(true_count) / num_examples
    print('  Num examples: %d  Num correct: %d  Precision @ 1: %0.04f' % (num_examples, true_count, precision))


tf.reset_default_graph()

if tf.gfile.Exists(FLAGS.log_dir):
    tf.gfile.DeleteRecursively(FLAGS.log_dir)
    tf.gfile.MakeDirs(FLAGS.log_dir)
    
with tf.Graph().as_default():
    """Generate placeholder variables to represent the input tensors. """
    images_placeholder = tf.placeholder(tf.float32, shape=(FLAGS.batch_size, IMAGE_PIXELS))
    labels_placeholder = tf.placeholder(tf.int32, shape=(FLAGS.batch_size))
    
    """Build the MNIST model up to where it may be used for inference. """
    # Hidden 1
    with tf.name_scope('hidden1'):
        weights1 = tf.Variable(tf.truncated_normal([IMAGE_PIXELS, FLAGS.hidden1], stddev=1.0 / math.sqrt(float(IMAGE_PIXELS))),
            name='weights')
        print("weights1", weights1.get_shape().as_list())
        biases1 = tf.Variable(tf.zeros([FLAGS.hidden1]),name='biases')
        print("biases1", biases1.get_shape().as_list())
        hidden1 = tf.nn.relu(tf.matmul(images_placeholder, weights1) + biases1)
        print("hidden1", hidden1.get_shape().as_list())
    # Hidden 2
    with tf.name_scope('hidden2'):
        weights2 = tf.Variable(tf.truncated_normal([FLAGS.hidden1, FLAGS.hidden2],stddev=1.0 / math.sqrt(float(FLAGS.hidden1))), 
            name='weights')
        print("weights2", weights2.get_shape().as_list())
        biases2 = tf.Variable(tf.zeros([FLAGS.hidden2]), name='biases')
        print("biases2", biases2.get_shape().as_list())
        hidden2 = tf.nn.relu(tf.matmul(hidden1, weights2) + biases2)
        print("hidden2", hidden2.get_shape().as_list())
    # Linear
    with tf.name_scope('softmax_linear'):
        weights3 = tf.Variable(
            tf.truncated_normal([FLAGS.hidden2, NUM_CLASSES], stddev=1.0 / math.sqrt(float(FLAGS.hidden2))), name='weights')
        biases3 = tf.Variable(tf.zeros([NUM_CLASSES]), name='biases')
        logits = tf.matmul(hidden2, weights3) + biases3
        
    """Calculates the loss from the logits and the labels."""
    labels = tf.to_int64(labels_placeholder)
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits, name='xentropy')
    loss = tf.reduce_mean(cross_entropy, name='xentropy_mean')
    
    """Sets up the training Ops. """
    tf.summary.scalar('loss', loss)
    optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate)
    global_step = tf.Variable(0, name='global_step', trainable=False)
    #train_op = optimizer.minimize(loss, global_step=global_step)
    grads_and_vars = optimizer.compute_gradients(loss)
    print(len(grads_and_vars))
    print( [ (x[1].name, x[0].get_shape().as_list()) for x in grads_and_vars])
    train_op = optimizer.apply_gradients(grads_and_vars)
    
    raise
    
    """Evaluate the quality of the logits at predicting the label."""
    correct = tf.nn.in_top_k(logits, labels_placeholder, 1)
    eval_correct = tf.reduce_sum(tf.cast(correct, tf.int32))
    
    """Prepare session, dataset and environment"""
    data_sets = input_data.read_data_sets(FLAGS.input_data_dir, FLAGS.fake_data)
    summary = tf.summary.merge_all()
    saver = tf.train.Saver()
    sess = tf.Session()
    summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph)
    sess.run(tf.global_variables_initializer())
    
    """Train"""
    for step in xrange(FLAGS.max_steps):
        start_time = time.time()
        images_feed, labels_feed = data_sets.train.next_batch(FLAGS.batch_size, FLAGS.fake_data)
        feed_dict = {
          images_placeholder: images_feed,
          labels_placeholder: labels_feed,
        }
        _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict)
        
        duration = time.time() - start_time
        
        if step % 100 == 0:
            print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration))
            summary_str = sess.run(summary, feed_dict=feed_dict)
            summary_writer.add_summary(summary_str, step)
            summary_writer.flush()
            
        if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps:
            checkpoint_file = os.path.join(FLAGS.log_dir, 'model.ckpt')
            saver.save(sess, checkpoint_file, global_step=step)
            """I kept the do_eval() function cause I don't see the point in rewriting the same stuff again and gain so..."""
            print('Training Data Eval:')
            do_eval(sess,
                    eval_correct,
                    images_placeholder,
                    labels_placeholder,
                    data_sets.train)
            print('Validation Data Eval:')
            do_eval(sess,
                    eval_correct,
                    images_placeholder,
                    labels_placeholder,
                    data_sets.validation)
            print('Test Data Eval:')
            do_eval(sess,
                    eval_correct,
                    images_placeholder,
                    labels_placeholder,
                    data_sets.test)

weights1 [784, 128]
biases1 [128]
hidden1 [100, 128]
weights2 [128, 32]
biases2 [32]
hidden2 [100, 32]
6
[(u'hidden1/weights:0', [784, 128]), (u'hidden1/biases:0', [128]), (u'hidden2/weights:0', [128, 32]), (u'hidden2/biases:0', [32]), (u'softmax_linear/weights:0', [32, 10]), (u'softmax_linear/biases:0', [10])]


TypeError: exceptions must be old-style classes or derived from BaseException, not NoneType

## decoupling layers

In [None]:
def do_eval(sess,
            eval_correct,
            images_placeholder,
            labels_placeholder,
            data_set):
    """Runs one evaluation against the full epoch of data."""
    # And run one epoch of eval.
    true_count = 0  # Counts the number of correct predictions.
    steps_per_epoch = data_set.num_examples // FLAGS.batch_size
    num_examples = steps_per_epoch * FLAGS.batch_size
    for step in xrange(steps_per_epoch):
        feed_dict = fill_feed_dict(data_set,images_placeholder,labels_placeholder)
        true_count += sess.run(eval_correct, feed_dict=feed_dict)
        precision = float(true_count) / num_examples
    print('  Num examples: %d  Num correct: %d  Precision @ 1: %0.04f' % (num_examples, true_count, precision))


tf.reset_default_graph()

if tf.gfile.Exists(FLAGS.log_dir):
    tf.gfile.DeleteRecursively(FLAGS.log_dir)
    tf.gfile.MakeDirs(FLAGS.log_dir)
    
with tf.Graph().as_default():
    """Generate placeholder variables to represent the input tensors. """
    images_placeholder = tf.placeholder(tf.float32, shape=(FLAGS.batch_size, IMAGE_PIXELS))
    labels_placeholder = tf.placeholder(tf.int32, shape=(FLAGS.batch_size))
    
    """Build the MNIST model up to where it may be used for inference. """
    # Hidden 1
    with tf.name_scope('hidden1'):
        weights1 = tf.Variable(tf.truncated_normal([IMAGE_PIXELS, FLAGS.hidden1], stddev=1.0 / math.sqrt(float(IMAGE_PIXELS))),
            name='weights')
        biases1 = tf.Variable(tf.zeros([FLAGS.hidden1]), name='biases')
        hidden1 = tf.nn.relu(tf.matmul(images_placeholder, weights1) + biases1)
    
        
    """ACTUAL DECOUPLING"""
    # Hidden 2
    with tf.name_scope('hidden2'):
        weights2 = tf.Variable(tf.truncated_normal([FLAGS.hidden1, FLAGS.hidden2],stddev=1.0 / math.sqrt(float(FLAGS.hidden1))), 
            name='weights')
        biases2 = tf.Variable(tf.zeros([FLAGS.hidden2]), name='biases')
        #hidden2 = tf.nn.relu(tf.matmul(hidden1, weights2) + biases2)
        
        # we set a new placeholder, this will be used to inject the output of hidden1 (message) into hidden2:
        hidden1_shape = hidden1.get_shape()#.as_list()
        layer2_input = tf.placeholder(tf.float32, shape=hidden1_shape)
        hidden2 = tf.nn.relu(tf.matmul(layer2_input, weights2) + biases2)
        
    """optimizer"""
    optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate)
    
    """synthetic gradients model"""
    with tf.name_scope('synthetic_gradients'):
        # synthetic gradients model input
        synthetic_gradients_model_input = tf.concat([
            tf.reshape(hidden1,[-1]), # Hidden 1 output (message)
            tf.reshape(weights2,[-1]), # Hidden 2 weights (layer state)
            tf.reshape(biases2,[-1]), # Hidden 2 biases (layer state)
            tf.reshape(tf.to_float(labels_placeholder), [-1]) # labels (c parameter)
        ],0)

        #synthetic_gradients_model_input_lenght = synthetic_gradients_model_input.get_shape().as_list()[0]
        #synthetic_gradients_model_weights = tf.Variable(tf.truncated_normal([synthetic_gradients_model_input_lenght, 1],
        #    name='weights')
        synthetic_gradients_model_input_shape = synthetic_gradients_model_input.get_shape()#.as_list()[0]
        synthetic_gradients_model_weights = tf.Variable(tf.truncated_normal(synthetic_gradients_model_input_shape,name='weights'))
        synthetic_gradients_model_biases = tf.Variable(tf.zeros([FLAGS.synthetic_gradients_model_input_lenght]), name='biases')
        synthetic_gradients_model_logit = tf.matmul(synthetic_gradients_model_input, synthetic_gradients_model_weights) 
                                                        + synthetic_gradients_model_biases
            
        #layer1 gradients
        hidden1_model = [weights1, biases1]
        hidden1_gradients = tf.gradients(hidden1, hidden1_model, grad_ys=synthetic_gradients_model_logit)
        optimize_layer1 = optimizer.apply_gradients(zip(hidden1_gradients, hidden1_model))
        
        #CONTINUARE DA QUI
        
        #synthetic_gradients_model gradients
        
    
    #def layer1_forward_and_backward_pass(sess, feed_dict):
    #    synthetic_gradient = sess.run(synthetic_gradients_model_logit, feed_dict)
        
    # Linear
    with tf.name_scope('softmax_linear'):
        weights3 = tf.Variable(
            tf.truncated_normal([FLAGS.hidden2, NUM_CLASSES], stddev=1.0 / math.sqrt(float(FLAGS.hidden2))), name='weights')
        biases3 = tf.Variable(tf.zeros([NUM_CLASSES]), name='biases')
        logits = tf.matmul(hidden2, weights3) + biases3
        
    """Calculates the loss from the logits and the labels."""
    labels = tf.to_int64(labels_placeholder)
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits, name='xentropy')
    loss = tf.reduce_mean(cross_entropy, name='xentropy_mean')
    
    
    """Sets up the training Ops. """
    tf.summary.scalar('loss', loss)
    #optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate)
    global_step = tf.Variable(0, name='global_step', trainable=False)
    #train_op = optimizer.minimize(loss, global_step=global_step)
    
    hidden2_gradients = optimizer.compute_gradients(loss, var_list=[layer2_input])[0][1]
    if hidden2_gradients is None:
        print('WARNING! hidden2_gradients is None')
    
    synthetic_gradients_model_loss = tf.abs(tf.sub(hidden2_gradients, hidden1_gradients))
    
    """Evaluate the quality of the logits at predicting the label."""
    correct = tf.nn.in_top_k(logits, labels_placeholder, 1)
    eval_correct = tf.reduce_sum(tf.cast(correct, tf.int32))
    
    """Prepare session, dataset and environment"""
    data_sets = input_data.read_data_sets(FLAGS.input_data_dir, FLAGS.fake_data)
    summary = tf.summary.merge_all()
    saver = tf.train.Saver()
    sess = tf.Session()
    summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph)
    sess.run(tf.global_variables_initializer())
    
    """Train"""
    for step in xrange(FLAGS.max_steps):
        start_time = time.time()
        images_feed, labels_feed = data_sets.train.next_batch(FLAGS.batch_size, FLAGS.fake_data)
        feed_dict = {
          images_placeholder: images_feed,
          labels_placeholder: labels_feed,
        }
        _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict)
        
        duration = time.time() - start_time
        
        if step % 100 == 0:
            print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration))
            summary_str = sess.run(summary, feed_dict=feed_dict)
            summary_writer.add_summary(summary_str, step)
            summary_writer.flush()
            
        if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps:
            checkpoint_file = os.path.join(FLAGS.log_dir, 'model.ckpt')
            saver.save(sess, checkpoint_file, global_step=step)
            """I kept the do_eval() function cause I don't see the point in rewriting the same stuff again and gain so..."""
            print('Training Data Eval:')
            do_eval(sess,
                    eval_correct,
                    images_placeholder,
                    labels_placeholder,
                    data_sets.train)
            print('Validation Data Eval:')
            do_eval(sess,
                    eval_correct,
                    images_placeholder,
                    labels_placeholder,
                    data_sets.validation)
            print('Test Data Eval:')
            do_eval(sess,
                    eval_correct,
                    images_placeholder,
                    labels_placeholder,
                    data_sets.test)

## decoupling layers (step by step)

In [41]:
def do_eval(sess,
            eval_correct,
            images_placeholder,
            labels_placeholder,
            data_set):
    """Runs one evaluation against the full epoch of data."""
    # And run one epoch of eval.
    true_count = 0  # Counts the number of correct predictions.
    steps_per_epoch = data_set.num_examples // FLAGS.batch_size
    num_examples = steps_per_epoch * FLAGS.batch_size
    for step in xrange(steps_per_epoch):
        feed_dict = fill_feed_dict(data_set,images_placeholder,labels_placeholder)
        true_count += sess.run(eval_correct, feed_dict=feed_dict)
        precision = float(true_count) / num_examples
    print('  Num examples: %d  Num correct: %d  Precision @ 1: %0.04f' % (num_examples, true_count, precision))


tf.reset_default_graph()

if tf.gfile.Exists(FLAGS.log_dir):
    tf.gfile.DeleteRecursively(FLAGS.log_dir)
    tf.gfile.MakeDirs(FLAGS.log_dir)
    
with tf.Graph().as_default():
    """Generate placeholder variables to represent the input tensors. """
    images_placeholder = tf.placeholder(tf.float32, shape=(FLAGS.batch_size, IMAGE_PIXELS), name="net_input")
    labels_placeholder = tf.placeholder(tf.int32, shape=(FLAGS.batch_size), name="net_labels")
    
    #optimizer
    optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate)
    
    # Hidden 1
    with tf.name_scope('hidden1'):
        weights1 = tf.Variable(tf.truncated_normal([IMAGE_PIXELS, FLAGS.hidden1], stddev=1.0 / math.sqrt(float(IMAGE_PIXELS))),
            name='weights')
        biases1 = tf.Variable(tf.zeros([FLAGS.hidden1]), name='biases')
        hidden1 = tf.nn.relu(tf.matmul(images_placeholder, weights1) + biases1)
        
    # Hidden 2
    with tf.name_scope('hidden2'):
        weights2 = tf.Variable(tf.truncated_normal([FLAGS.hidden1, FLAGS.hidden2],stddev=1.0 / math.sqrt(float(FLAGS.hidden1))), 
            name='weights')
        biases2 = tf.Variable(tf.zeros([FLAGS.hidden2]), name='biases')
        #hidden2 = tf.nn.relu(tf.matmul(hidden1, weights2) + biases2)
        
        # we set a new placeholder, this will be used to inject the output of hidden1 (message) into hidden2:
        hidden1_shape = hidden1.get_shape()#.as_list()
        #print(hidden1_shape)
        layer2_input = tf.placeholder(tf.float32, shape=hidden1_shape, name="layer2_input")
        hidden2 = tf.nn.relu(tf.matmul(layer2_input, weights2) + biases2)
        #print(hidden2.get_shape())
        
    #synthetic gradients model
    with tf.name_scope('synthetic_gradients'):
        
        def print_shape(t):
            print(t.get_shape().as_list())
            
        print_shape(hidden1)
        print_shape(weights2)
        print_shape(biases2)
        #print_shape(labels_placeholder)
        
        labels_one_hot = tf.one_hot(labels_placeholder, 10)
        print_shape(labels_one_hot)
        
        input_and_labels = tf.concat([hidden1, labels_one_hot], 1)
        print_shape(input_and_labels)
        
        hidden_2_flattened_model = tf.concat([
            tf.reshape(weights2,[-1]), # Hidden 2 weights (layer state)
            tf.reshape(biases2,[-1]), # Hidden 2 biases (layer state)
        ],0)
        print("hidden_2_flattened_model")
        print_shape(hidden_2_flattened_model)
        
        hidden_2_replicated_model = tf.concat([
            tf.expand_dims(hidden_2_flattened_model,0) for _ in range(input_and_labels.get_shape().as_list()[0])
        ],0)
        print("hidden_2_replicated_model")
        print_shape(hidden_2_replicated_model)
        
        synthetic_gradients_model_input = tf.concat([
            input_and_labels,
            hidden_2_replicated_model
        ],1)
        print("synthetic_gradients_model_input")
        print_shape(synthetic_gradients_model_input)
        
        
        
        
        
        """
        # synthetic gradients model input
        synthetic_gradients_model_input = tf.concat([
            tf.reshape(hidden1,[-1]), # Hidden 1 output (message)
            tf.reshape(weights2,[-1]), # Hidden 2 weights (layer state)
            tf.reshape(biases2,[-1]), # Hidden 2 biases (layer state)
            tf.reshape(tf.to_float(labels_one_hot), [-1]) # labels (c parameter)
        ],0)
        """
        
        #print(synthetic_gradients_model_input.get_shape())
        
        #weights1_length = 1
        #for l in weights1.get_shape().as_list():
        #    weights1_length *= l
        
        #biases1_length = 1
        #for l in biases1.get_shape().as_list():
        #    biases1_length *= l
        
        #print(weights1.get_shape().as_list())
        #print(weights1_length)
        
        #print(biases1.get_shape().as_list())
        #print(biases1_length)
        
        #hidden1_model_length = weights1_length + biases1_length
        
        hidden1_shape = hidden1.get_shape().as_list()
        hidden1_output_length = 1
        for n in hidden1_shape:
            hidden1_output_length *= n
        
        synthetic_gradients_model_input_lenght = synthetic_gradients_model_input.get_shape().as_list()[1]
        
        
        synthetic_gradients_model_weights = tf.Variable(
            tf.truncated_normal([synthetic_gradients_model_input_lenght, hidden1_output_length]),name='weights')
        synthetic_gradients_model_biases = tf.Variable(tf.zeros([hidden1_output_length]), name='biases')
        synthetic_gradients_model_matmul = tf.matmul(synthetic_gradients_model_input, synthetic_gradients_model_weights)
        synthetic_gradients_model_logits = synthetic_gradients_model_matmul + synthetic_gradients_model_biases
        
        
        print_shape(synthetic_gradients_model_logits)
        hidden1_synthetic_gradient = tf.reshape(tf.reduce_mean(synthetic_gradients_model_logits,0), hidden1_shape)
        print_shape(hidden1_synthetic_gradient)
        
        """synthetic_gradients_model_optimization"""
        synthetic_gradients_model_train_input = tf.placeholder(tf.float32,
                                                    shape=synthetic_gradients_model_input.get_shape().as_list(),
                                                    name="train_input")
        synthetic_gradients_model_train_matmul = tf.matmul(synthetic_gradients_model_train_input, 
                                                            synthetic_gradients_model_weights)
        synthetic_gradients_model_train_logits = synthetic_gradients_model_train_matmul + synthetic_gradients_model_biases
        hidden1_synthetic_gradient_train = tf.reshape(tf.reduce_mean(synthetic_gradients_model_train_logits,0), hidden1_shape)
        
        true_gradients_placeholder = tf.placeholder(tf.float32, 
                                                    shape=(hidden1_synthetic_gradient_train.get_shape().as_list()), 
                                                    name="hidden2_true_gradients")
        synthetic_gradients_model_loss = tf.sqrt( tf.reduce_sum(tf.square(tf.subtract(
                                                                                true_gradients_placeholder,
                                                                                hidden1_synthetic_gradient_train))))
        print("synthetic_gradients_model_loss", synthetic_gradients_model_loss)
                                    #reduction_indices=1))
        synthetic_gradients_model_optimize_op = optimizer.minimize(synthetic_gradients_model_loss)
        
        
        """hidden1 optimizer"""
        hidden1_grad_vars = optimizer.compute_gradients(hidden1, grad_loss=hidden1_synthetic_gradient)
        hidden1_grad_vars_names = [ (x[1].name, x[0]) for x in hidden1_grad_vars ]
        print(hidden1_grad_vars_names)
        
        hidden1_appliable_grads = [ t for t in hidden1_grad_vars if t[0] is not None]
        hidden1_optimize = optimizer.apply_gradients(hidden1_appliable_grads)
        
        
        
        
    print("downloading data sets")
    data_sets = input_data.read_data_sets(FLAGS.input_data_dir, FLAGS.fake_data)
    def hidden1_test():
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())

            images_feed, labels_feed = data_sets.train.next_batch(FLAGS.batch_size, FLAGS.fake_data)
            feed_dict = {
              images_placeholder: images_feed,
              labels_placeholder: labels_feed,
            }
            hidden1_output_message, synt_grad, opt_output = sess.run(
                [hidden1, hidden1_synthetic_gradient, hidden1_optimize], feed_dict=feed_dict)
            print(hidden1_output_message)
            print(synt_grad)
            
    #hidden1_test()
        
        
        
    # Linear
    with tf.name_scope('softmax_linear'):
        weights3 = tf.Variable(
            tf.truncated_normal([FLAGS.hidden2, NUM_CLASSES], stddev=1.0 / math.sqrt(float(FLAGS.hidden2))), name='weights')
        biases3 = tf.Variable(tf.zeros([NUM_CLASSES]), name='biases')
        logits3 = tf.matmul(hidden2, weights3) + biases3
        
    """Calculates the loss from the logits and the labels."""
    labels = tf.to_int64(labels_placeholder)
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits3, name='xentropy')
    loss = tf.reduce_mean(cross_entropy, name='xentropy_mean')
    
    """
    hidden2_grad_vars = optimizer.compute_gradients(loss)
    hidden2_grad_vars_names = [ (x[1].name, x[0]) for x in hidden2_grad_vars ]
    print(hidden2_grad_vars_names)
    """
    
    hidden2_true_gradients_list = tf.gradients(loss, layer2_input)
    hidden2_true_gradients = hidden2_true_gradients_list[0]
    print("hidden2_true_gradients", hidden2_true_gradients)
        
    global_step = tf.Variable(0, name='global_step', trainable=False)
    train_op = optimizer.minimize(loss, global_step=global_step)
    
    """Evaluate the quality of the logits at predicting the label."""
    correct = tf.nn.in_top_k(logits3, labels_placeholder, 1)
    eval_correct = tf.reduce_sum(tf.cast(correct, tf.int32))
    
    
    with tf.name_scope('coherence_safety_assertion'):
        coherence_safety_assertion_input1 = tf.placeholder(tf.float32, 
                                                           shape=hidden1_synthetic_gradient.get_shape().as_list(),
                                                          name="input1")
        coherence_safety_assertion_input2 = tf.placeholder(tf.float32, 
                                                           shape=hidden1_synthetic_gradient_train.get_shape().as_list(),
                                                          name="input2")
        coherence_safety_assertion = tf.equal(coherence_safety_assertion_input1, coherence_safety_assertion_input2)
    
    def hidden2_test():
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())

            images_feed, labels_feed = data_sets.train.next_batch(FLAGS.batch_size, FLAGS.fake_data)
            feed_dict = {
              images_placeholder: images_feed,
              labels_placeholder: labels_feed,
            }
            hidden1_output_message, synt_grad, opt_output = sess.run(
                [hidden1, hidden1_synthetic_gradient, hidden1_optimize], feed_dict=feed_dict)
            print(hidden1_output_message.shape)
            print(hidden1_output_message)
            print(synt_grad)
            
            feed_dict2 = {
              layer2_input: hidden1_output_message,
              labels_placeholder: labels_feed,
            }
            _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict2)
            print(loss_value)
            
    #hidden2_test()
    
    def synthetic_gradients_model_test():
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())

            images_feed, labels_feed = data_sets.train.next_batch(FLAGS.batch_size, FLAGS.fake_data)
            feed_dict = {
              images_placeholder: images_feed,
              labels_placeholder: labels_feed
            }
            
            """
            hidden1 training step:
            inputs flow from the beginning to the end of hidden1
            synthetic_gradients_model generates a synthetic gradient for hidden1
            the synthetic gradient gets applied by the optimizer and hidden1 weights get updated
            """
            hidden1_output_message, synt_grad_input, synt_grad, opt_output = sess.run(
                [hidden1, synthetic_gradients_model_input, hidden1_synthetic_gradient, hidden1_optimize], 
                feed_dict=feed_dict)
            print(hidden1_output_message.shape)
            print(hidden1_output_message)
            #print(synt_grad)
            
            """
            hidden2 training step:
            inputs flow from the beginning of hidden2 to the end of hidden3, input is the output of hidden1
            optimizer minimizes the loss and updates hidden2 and hidden3 weights
            """
            feed_dict2 = {
              layer2_input: hidden1_output_message,
              labels_placeholder: labels_feed
            }
            _, loss_value, true_gradients = sess.run([train_op, loss, hidden2_true_gradients], feed_dict=feed_dict2)
            print("net loss:", loss_value)
            
            """
            synthetic_gradients_model training step
            """
            feed_dict3 = {
              synthetic_gradients_model_train_input: synt_grad_input,
              true_gradients_placeholder: true_gradients
            }
            _, synthetic_gradients_model_loss_value, synt_grad_train= sess.run(
                [synthetic_gradients_model_optimize_op, synthetic_gradients_model_loss, hidden1_synthetic_gradient_train], 
                                    feed_dict=feed_dict3)
            print("synthetic_gradients_model loss:", synthetic_gradients_model_loss_value)
            
            
            """
            coherence_safety_assertion
            """
            feed_dict4 = {
              coherence_safety_assertion_input1: synt_grad,
              coherence_safety_assertion_input2: synt_grad_train
            }
            assertion_result = sess.run([coherence_safety_assertion], feed_dict=feed_dict4)
            """
            result = True
            for x in assertion_result:
                for y in x:
                    result = result and y
            """
            print("coherence_asserion:", assertion_result)
            print("coherence_asserion:", np.asarray(assertion_result).all())
            
            synt_grad_train1 = sess.run(hidden1_synthetic_gradient_train, 
                                        feed_dict={synthetic_gradients_model_train_input: synt_grad_input})
            synt_grad_train2 = sess.run(hidden1_synthetic_gradient_train, 
                                        feed_dict={synthetic_gradients_model_train_input: synt_grad_input})
            
            #print(synt_grad_train1[0].shape)
            
            feed_dict5 = {
              coherence_safety_assertion_input1: synt_grad_train1,
              coherence_safety_assertion_input2: synt_grad_train2
            }
            assertion_result = sess.run([coherence_safety_assertion], feed_dict=feed_dict5)
            print("coherence_asserion2:", assertion_result)
            print("coherence_asserion2:", np.asarray(assertion_result).all())
            
            
            
    synthetic_gradients_model_test()
        
        
        
    

[100, 128]
[128, 32]
[32]
[100, 10]
[100, 138]
hidden_2_flattened_model
[4128]
hidden_2_replicated_model
[100, 4128]
synthetic_gradients_model_input
[100, 4266]
[100, 12800]
[100, 128]
synthetic_gradients_model_loss Tensor("synthetic_gradients/Sqrt:0", shape=(), dtype=float32)
[(u'hidden1/weights:0', <tf.Tensor 'synthetic_gradients/gradients_1/hidden1/MatMul_grad/tuple/control_dependency_1:0' shape=(784, 128) dtype=float32>), (u'hidden1/biases:0', <tf.Tensor 'synthetic_gradients/gradients_1/hidden1/add_grad/tuple/control_dependency_1:0' shape=(128,) dtype=float32>), (u'hidden2/weights:0', None), (u'hidden2/biases:0', None), (u'synthetic_gradients/weights:0', None), (u'synthetic_gradients/biases:0', None)]
downloading data sets
Extracting /notebooks/datasets/mnist/train-images-idx3-ubyte.gz
Extracting /notebooks/datasets/mnist/train-labels-idx1-ubyte.gz
Extracting /notebooks/datasets/mnist/t10k-images-idx3-ubyte.gz
Extracting /notebooks/datasets/mnist/t10k-labels-idx1-ubyte.gz
hidden2_t

In [None]:
#synthetic_gradients_model_output = tf.reshape(synthetic_gradients_model_logits, hidden1_shape)
            
        #weights1_gradients = tf.reshape(tf.slice(synthetic_gradients_model_logit, [0], [weights1_length]), weights1.get_shape())
        #biases1_gradients = tf.reshape(tf.slice(
        #    synthetic_gradients_model_logit, [weights1_length], [weights1_length+biases1_length]), biases1.get_shape())
        
        
            
        #print(tf.reshape(weights1,[-1]).get_shape())
        #layer1_lenght = tf.concat(tf.reshape(weights1,[-1]), tf.reshape(biases1[-1]))

        """
        #synthetic_gradients_model_input_lenght = synthetic_gradients_model_input.get_shape().as_list()[0]
        #synthetic_gradients_model_weights = tf.Variable(tf.truncated_normal([synthetic_gradients_model_input_lenght, 1],
        #    name='weights')
        synthetic_gradients_model_input_shape = synthetic_gradients_model_input.get_shape()#.as_list()[0]
        synthetic_gradients_model_weights = tf.Variable(tf.truncated_normal(synthetic_gradients_model_input_shape,name='weights'))
        synthetic_gradients_model_biases = tf.Variable(tf.zeros([FLAGS.synthetic_gradients_model_input_lenght]), name='biases')
        synthetic_gradients_model_logit = tf.matmul(synthetic_gradients_model_input, synthetic_gradients_model_weights) 
                                                        + synthetic_gradients_model_biases
            
        #layer1 gradients
        hidden1_model = [weights1, biases1]
        hidden1_gradients = tf.gradients(hidden1, hidden1_model, grad_ys=synthetic_gradients_model_logit)
        optimize_layer1 = optimizer.apply_gradients(zip(hidden1_gradients, hidden1_model))
        """