# Toy Network
### implementation of the fully-connected feed-forward network as specified in the paper (page 7, section 3.1) trained on the toy dataset of 4096 configurations of 12 bits

In [206]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import argparse
import sys
import random
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

# learning rate determines the step size in SGD
learning_rate = 0.001
# fix the seed in order to get the same training set each time
np.random.seed(111)

# define a function for weight and bias initialization (random)
def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)

##### ----- Data Directories ---- #####
data_folder = "network_data/"
dataset_file = ["configs.npy","labels.npy","labels64.npy"]
model_file = "toy_network_model.ckpt"


##### ----- Import data ---- #####
configs = np.load(data_folder+dataset_file[0])
num_configs = len(configs)
labels = np.empty([num_configs,2])
labels[:,0] = np.load(data_folder+dataset_file[1])

# convert labels into 1-hot representation
# y=0 --> (1,0)
# y=1 --> (0,1)
ind0 = (labels[:,0]==0)
ind1 = (labels[:,0]==1)
labels[ind0,:]=np.array([1,0])
labels[ind1,:]=np.array([0,1])
    
##### ----- Select training and test set ---- #####
# we choose the training set to be about 85% of the entire data set
num_train_configs = 3480
train_indices = random.sample(range(num_configs),num_train_configs)
test_indices = [ x for x in range(num_configs) if x not in train_indices]
    
configs_train = np.array( [configs[i] for i in train_indices] )
configs_test = np.array( [configs[i] for i in test_indices] )
labels_train = np.array( [labels[i] for i in train_indices] )
labels_test = np.array( [labels[i] for i in test_indices] )


##### ----- Create the model ---- #####
    
# input layer: 12 units
x = tf.placeholder(tf.float32, [None, 12])
    
# 1st hidden layer: 10
W1 = weight_variable([12, 10])
b1 = weight_variable([10])
y1 = tf.tanh(tf.matmul(x, W1) + b1)

# 2nd hidden layer: 7
W2 = weight_variable([10, 7])
b2 = weight_variable([7])
y2 = tf.tanh(tf.matmul(y1, W2) + b2)
    
# 3rd hidden layer: 5
W3 = weight_variable([7, 5])
b3 = weight_variable([5])
y3 = tf.tanh(tf.matmul(y2, W3) + b3)
    
# 4th hidden layer: 4
W4 = weight_variable([5, 4])
b4 = weight_variable([4])
y4 = tf.tanh(tf.matmul(y3, W4) + b4)
    
# 5th hidden layer: 3
W5 = weight_variable([4, 3])
b5 = weight_variable([3])
y5 = tf.tanh(tf.matmul(y4, W5) + b5)
    
# output layer: 2
W6 = weight_variable([3, 2])
b6 = weight_variable([2])
y = tf.matmul(y5, W6) + b6

# define placeholder for true labels
y_ = tf.placeholder(tf.float32, [None, 2])

# define loss function and training procedure
cross_entropy = tf.reduce_mean(
    tf.nn.sigmoid_cross_entropy_with_logits(labels=y_, logits=y))
#train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(cross_entropy)

global_step = tf.Variable(0, trainable=False)
starter_learning_rate = 0.2
learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,
                                          10000, 0.96, staircase=True)
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(cross_entropy,global_step=global_step)


# start the session
sess = tf.InteractiveSession()
# initialize all variables defined above
tf.global_variables_initializer().run()

# define functions for testing of the trained model
correct_prediction = tf.equal(tf.argmax(tf.sigmoid(y), 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))


# add ops to save and restore all the variables.
saver = tf.train.Saver()

##### ----- Train model ---- #####
minibatch_size = 20
num_epochs = 1
# define arrays to collect accuracy and error data during training
err_list  = np.zeros(num_epochs)
train_acc = np.zeros(num_epochs)
test_acc = []
# create a permutation of indices to shuffle the data set in each epoch
permut = np.arange(num_train_configs)

for epoch in range(num_epochs):
    np.random.shuffle(permut)
    train_configs = configs_train[permut,:]
    train_labels = labels_train[permut,:]
    for k in range(0, num_train_configs, minibatch_size):
        batch_xs = train_configs[k:k+minibatch_size,:]
        batch_ys = train_labels[k:k+minibatch_size,:]
        sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
        # for each epoch record one value of training accuracy and cross entropy
        if k==minibatch_size:
            train_acc[epoch] = accuracy.eval(feed_dict={x: batch_xs, y_: batch_ys})   
            err_list[epoch] = sess.run(cross_entropy, feed_dict={x: batch_xs, y_: batch_ys})
    # for every 10th epoch record the test accuracy
    if epoch%10==0:
        test_acc += [sess.run(accuracy, feed_dict={x: configs_test, y_: labels_test})]

# save the variables to disk
save_path = saver.save(sess,data_folder+model_file)
print("Model saved in file: %s" % data_folder+model_file)

# save arrays with accuracy and error data
np.savez_compressed(data_folder+'{}epochs_train_acc'.format(num_epochs), a=train_acc)
np.savez_compressed(data_folder+'{}epochs_test_acc'.format(num_epochs), a=test_acc)
np.savez_compressed(data_folder+'{}epochs_err_list'.format(num_epochs), a=err_list)

Model saved in file: network_data/toy_network_model.ckpt


In [207]:
#saver = tf.train.Saver()
# Restore variables from disk.
saver.restore(sess, data_folder+model_file)
print("Model restored.")


# Get sample of layer output data for MI calculation

t4 = y4.eval(feed_dict={x: configs})

INFO:tensorflow:Restoring parameters from network_data/toy_network_model.ckpt
Model restored.


In [287]:
# Calculate Mutual Information for Neural Network Input X, Output Y, Layers T

import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf


#######  datafolder+ dataset_file[2] = /path_to/"labels64.npy"  ###############

class MUT_INFO(object):
    def __init__(self, n_bins=1000, dataset_file=data_folder+dataset_file[2]):
        
        # Initialize Mutual Information Class for given dataset
        # mut_info method will be called with layer output array:
        #     t = (Nx by n_t) array, where Nx is the number of datasets, and n_t is the number of neurons in the layer
        
        
        # Define bin size
        self.n_bins = n_bins
        
        
        # Obtain constant p_xy distribution from dataset and calculate: p(x) and size of probability space: NX
        self.p_xy = self.prob_joint_X_binaryY(dataset_file)
        
        return
        
    def mut_info(self,t):
        # Estimate Mutual Information of between Random Variables (X,Y,T):    I(X,T) and I(Y,T)
        
        # Probability of p(t(x)) and delta(t(x),t(x'))
        p_tx,delta_tx = self.prob_t_x(t,n_bins=self.n_bins)      
        
        # Calculate Mutual Information of I(T,Y)
        # p_xy: (Ny(=2 for binary) by Nx) array,    p_tx = (Nx by 1) array,   delta_tx = (Nx by Nx) array,  p_x = (Nx by 1) array
        
        
#         print('pxy: ',self.p_xy)
#         print('delta: ',np.shape(delta_tx))
#         print('ptx: ',np.shape(p_tx))
#         print('dot(p_xy,delta_tx): ' ,np.dot(self.p_xy,delta_tx))
#         print('py: ',np.sum(self.p_xy,1))
#         print(np.dot(self.p_xy,delta_tx)/np.sum(self.p_xy,1)[:,np.newaxis])
#         print(p_tx)
#         print(self.p_xy*(np.log2(np.dot(self.p_xy,delta_tx)/np.sum(self.p_xy,1)[:,np.newaxis]/p_tx)))
#         print(True in np.isnan(self.p_xy*(np.log2(np.dot(self.p_xy,delta_tx)/np.sum(self.p_xy,1)[:,np.newaxis]/p_tx))))
    
    
        ###### GET nan in log2 part of I_TY: due to p_xy values rounded to 0's and 1's for unknown reason ###########
        I_TY = np.sum(self.p_xy*(np.log2(np.dot(self.p_xy,delta_tx)/np.sum(self.p_xy,1)[:,np.newaxis]/p_tx)))
    
    
    
        I_TX = -np.dot(self.p_x,np.log2(p_tx))


        return [I_TX, I_TY]
        
        
    def prob_joint_X_binaryY(self,dataset_file):
    
        def py_x(u,gamma=30.5,theta=34):
            return 1.0/(1.0 + np.exp(-gamma*(u-theta)))

        # Import Original X Data and calculate size of Probability Space NX
        X = np.load(dataset_file).astype(np.float)
        self.NX = np.size(X)
        
        # Calculate p(x)
        self.p_x = np.ones(self.NX)*1/self.NX
        
        
        ################ when printed, shows py_x values rounded to 0's and 1's for unknown reason ###########
        pyx = py_x(X) 
        #print('pyx sig: ',pyx)
        
        return np.array([(1-py_x(X))*self.px, py_x(X)*self.px])
    
    
    

    def prob_t_x(self,t, n_bins): # Thanks Lauren!
        # """Takes the layer's output t(x) and a number of bins
        #  Returns a probability p(t(x)) as a vector and a matrix for KroneckerDelta(t(x), t(x'))"""

        # Define bins
        bins = np.linspace(-1, 1, n_bins)
        
        # Count number of appearance of each vector
        _, indices, counts= np.unique(np.digitize(t, bins), 
                                return_inverse=True, return_counts=True, axis=0)
        # Create delta matrix from indices
        delta = (np.array([indices,] * len(indices)).T == indices).astype(np.int)
        
        # Return p(t_x), delta
        return counts[indices]/self.NX, delta



m = MUT_INFO()
print(m.mut_info(t4))

pyx sig:  [0. 1. 1. ... 1. 1. 1.]
pxy:  [[0.00024414 0.         0.         ... 0.         0.         0.        ]
 [0.         0.00024414 0.00024414 ... 0.00024414 0.00024414 0.00024414]]
delta:  (4096, 4096)
ptx:  (4096,)
dot(p_xy,delta_tx):  [[0.01586914 0.05480957 0.0012207  ... 0.01379395 0.02612305 0.01379395]
 [0.01391602 0.05285645 0.0012207  ... 0.01745605 0.02270508 0.01745605]]
py:  [0.4987793 0.5012207]
[[0.03181596 0.10988742 0.00244738 ... 0.02765541 0.05237396 0.02765541]
 [0.02776425 0.10545543 0.00243546 ... 0.03482708 0.04529956 0.03482708]]
[0.02978516 0.10766602 0.00244141 ... 0.03125    0.04882812 0.03125   ]
[[ 2.32316862e-05  0.00000000e+00  0.00000000e+00 ... -0.00000000e+00
   0.00000000e+00 -0.00000000e+00]
 [-0.00000000e+00 -7.30701732e-06 -8.58865261e-07 ...  3.81722228e-05
  -2.64197487e-05  3.81722228e-05]]
True
nan
