In [5]:
# library
import tensorflow as tf
import numpy as np,sys
import matplotlib.pyplot as plt
tf.set_random_seed(6789)
np.random.seed(678)

In [8]:
# create the normal distribution
def det(array_or_scalar):
    if array_or_scalar.size > 1:
        return np.linalg.det(array_or_scalar)
    else:
        return array_or_scalar
def get_h_mvn(x):

    """
    Computes the entropy of a multivariate Gaussian distribution:
    H(X) = (1/2) * log((2 * pi * e)^d * det(cov(X)))
    Arguments:
    ----------
    x: (n, d) ndarray
        n samples from a d-dimensional multivariate normal distribution
    Returns:
    --------
    h: float
        entropy H(X)
    """

    d = x.shape[1]
    h  = 0.5 * np.log((2 * np.pi * np.e)**d * det(np.cov(x.T)))
    return h
def get_mi_mvn(x, y):
    """
    Computes the mutual information I between two multivariate normal random
    variables, X and Y:
    I(X, Y) = H(X) + H(Y) - H(X, Y)
    Arguments:
    ----------
    x, y: (n, d) ndarrays
        n samples from d-dimensional multivariate normal distributions
    Returns:
    --------
    mi: float
        mutual information I(X, Y)
    """

    d = x.shape[1]

    # hx  = 0.5 * log((2 * np.pi * np.e)**d     * det(np.cov(x.T)))
    # hy  = 0.5 * log((2 * np.pi * np.e)**d     * det(np.cov(y.T)))
    # hxy = 0.5 * log((2 * np.pi * np.e)**(2*d) * det(np.cov(x.T, y=y.T)))
    # mi = hx + hy - hxy

    # hx  = 0.5 * log(det(2*np.pi*np.e*np.cov(x.T)))
    # hy  = 0.5 * log(det(2*np.pi*np.e*np.cov(y.T)))
    # hxy = 0.5 * log(det(2*np.pi*np.e*np.cov(np.c_[x,y].T)))
    hx  = get_h_mvn(x)
    hy  = get_h_mvn(y)
    hxy = get_h_mvn(np.c_[x,y])
    mi = hx + hy - hxy

    # mi = 0.5 * (log(det(np.cov(x.T))) + log(det(np.cov(y.T))) - log(det(np.cov(np.c_[x,y].T))))

    return mi

N=900000
dimension = 140
mean  = np.zeros(dimension)
sigma = np.ones((dimension,dimension)) * 0.9
np.fill_diagonal(sigma,1.0)

temp  = np.random.multivariate_normal(mean,sigma,10000)
x_sample = temp[:,:dimension//2]
y_sample = temp[:,dimension//2:]
mi = get_mi_mvn(x_sample,y_sample)
print(temp.shape)
print(mi)

(10000, 140)
3.122075475590634


In [34]:
# layers
def tf_relu(x):   return tf.nn.relu(x)
def d_tf_relu(x): return tf.cast(tf.greater(x,0),tf.float32)

# Func: Fully Connected Layer
class FNN():

    def __init__(self,inc,outc,act=tf_relu,d_act=d_tf_relu,special_init=False,which_reg=0.0):
        if special_init:
            interval = np.sqrt(6.0 / (inc + outc + 1.0))
            self.w = tf.Variable(tf.random_uniform(shape=(inc, outc),minval=-interval,maxval=interval,dtype=tf.float32,seed=2))
            self.b = tf.Variable(tf.random_uniform(shape=(outc),minval=-interval,maxval=interval,dtype=tf.float32,seed=2))
        else:
            self.w = tf.Variable(tf.random_normal([inc,outc], stddev=0.05,seed=2,dtype=tf.float32))
            self.b = tf.Variable(tf.random_normal([outc], stddev=0.05,seed=2,dtype=tf.float32))

        self.m,self.v = tf.Variable(tf.zeros_like(self.w)),tf.Variable(tf.zeros_like(self.w))
        self.m_b,self.v_b = tf.Variable(tf.zeros_like(self.b)),tf.Variable(tf.zeros_like(self.b))
        self.act,self.d_act = act,d_act
        self.which_reg = which_reg

    def getw(self): return self.w

    def feedforward(self,input=None):
        self.input = input
        self.layer = tf.matmul(input,self.w) + self.b
        self.layerA = self.act(self.layer)
        return self.layerA

    def backprop(self,gradient=None,which_reg=0):
        grad_part_1 = gradient
        grad_part_2 = self.d_act(self.layer)
        grad_part_3 = self.input

        grad_middle = grad_part_1 * grad_part_2
        grad  = tf.matmul(tf.transpose(grad_part_3),grad_middle)/batch_size
        grad_pass = tf.matmul(grad_middle,tf.transpose(self.w))

        update_w = []

        # Update the Weight First
        update_w.append(tf.assign( self.m,self.m*beta1 + (1-beta1) * (grad)   ))
        update_w.append(tf.assign( self.v,self.v*beta2 + (1-beta2) * (grad ** 2)   ))
        m_hat = self.m / (1-beta1)
        v_hat = self.v / (1-beta2)
        adam_middle = m_hat *  learning_rate/(tf.sqrt(v_hat) + adam_e)
        update_w.append(tf.assign(self.w,tf.subtract(self.w,adam_middle )))

        return grad_pass,update_w

In [20]:
# define layers
# sess = tf.InteractiveSession()
n_hidden = 10 
l1 = FNN(dimension//2,10)
l2 = FNN(10,1)

In [51]:
# create graph
x  = tf.placeholder(tf.float32, [None,dimension//2])
y  = tf.placeholder(tf.float32, [None,dimension//2])
y_ = tf.placeholder(tf.float32, [None,dimension//2])

Wx = tf.Variable(tf.random_normal(stddev=0.1,shape=[dimension//2,n_hidden]))
Wy = tf.Variable(tf.random_normal(stddev=0.1,shape=[dimension//2,n_hidden]))
Wout = tf.Variable(tf.random_normal(stddev=0.1,shape=[n_hidden,1]))

hidden_joint  = tf.matmul(x,Wx)+tf.matmul(y,Wy)
hidden_marg   = tf.matmul(x,Wx)+tf.matmul(y_,Wy)
hidden_jointa = tf_relu(hidden_joint)
hidden_marga  = tf_relu(hidden_marg)
out_joint = tf.matmul(hidden_joint,Wout)
out_marg  = tf.matmul(hidden_marg,Wout)
 
lower_bound=-(tf.reduce_mean(out_joint)-tf.log(tf.reduce_mean(tf.exp(out_marg))))

dwout         = tf.reduce_mean(-((1/N)*tf.transpose(hidden_joint)+(-N)*tf.transpose(hidden_marg)),1,True)
dhidden_joint = -(1/N) * tf.transpose(Wout) * d_tf_relu(hidden_joint)
dhidden_marg  = (-N)*tf.transpose(Wout)     * d_tf_relu(hidden_marg)
dWx = tf.transpose(x) @ dhidden_joint + tf.transpose(x) @ dhidden_marg 
dYx = tf.transpose(y) @ dhidden_joint + tf.transpose(y_) @ dhidden_marg 

updatewX = Wx.assign(Wx-0.000001 * dWx)
updatewY = Wy.assign(Wy-0.000001 * dYx)
updatewO = Wout.assign(Wout-0.000001 * dwout)
update_w = [updatewX,updatewY,updatewO]

In [None]:
values = []
# sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
for i in range(1000):
    #x_sample  = np.random.normal(0.,sig1,[N,1])
    #y_sample  = np.random.normal(0.,sig2,[N,1])
    y_shuffle = np.random.permutation(y_sample)
    number,_  = sess.run([lower_bound,update_w], feed_dict={x:x_sample,y:y_sample,y_:y_shuffle})
    sys.stdout.write("\n" + str(number))
    sys.stdout.flush()
    values.append(number)
    
plt.plot(values)
plt.plot(np.ones_like(values)*mi)
plt.show()

In [117]:
perm = np.arange(ground_truth_array.shape[0])
np.random.shuffle(perm)

In [144]:
ground_truth_array = np.random.randn(10,2)
ground_truth_array2= ground_truth_array[::-1]
idx  = np.arange(len(ground_truth_array))
np.random.shuffle(idx)
perm = np.arange(ground_truth_array.shape[0])
np.random.shuffle(perm)

print(ground_truth_array)
print(perm)
print(ground_truth_array[idx])
print(ground_truth_array[idx][idx])



[ 6  7  8  9 10 11 12 13 14 15 16 17 18]
[11  6  2 10  0  3  5  4  9  7  1  8 12]
[11 14 16 13  8 17 10  6  9 15 12  7 18]
[17  9 12  6 16  7  8 11 13 15 10 14 18]


In [151]:
ground_truth_array = np.around(np.random.randn(10,2),2)
perm = np.arange(ground_truth_array.shape[0])
np.random.shuffle(perm)

print(ground_truth_array)
print(perm)
print('------------')
print(ground_truth_array[perm])
print('------------')
print(ground_truth_array[perm][perm])

[[-1.08  0.1 ]
 [ 2.04  0.48]
 [ 0.24  0.89]
 [ 0.5   0.47]
 [ 1.23  0.51]
 [ 0.32 -0.35]
 [-0.33 -1.22]
 [ 1.37  0.26]
 [-0.3   0.72]
 [ 0.8  -0.59]]
[6 7 2 4 0 8 1 9 3 5]
------------
[[-0.33 -1.22]
 [ 1.37  0.26]
 [ 0.24  0.89]
 [ 1.23  0.51]
 [-1.08  0.1 ]
 [-0.3   0.72]
 [ 2.04  0.48]
 [ 0.8  -0.59]
 [ 0.5   0.47]
 [ 0.32 -0.35]]
------------
[[ 2.04  0.48]
 [ 0.8  -0.59]
 [ 0.24  0.89]
 [-1.08  0.1 ]
 [-0.33 -1.22]
 [ 0.5   0.47]
 [ 1.37  0.26]
 [ 0.32 -0.35]
 [ 1.23  0.51]
 [-0.3   0.72]]


In [152]:
ground_truth_array = np.arange(10,2)
idx  = np.arange(len(ground_truth_array))
np.random.shuffle(idx)
print(ground_truth_array)
print(idx)
print('-----------')
print(ground_truth_array)
print('-----------')
print(ground_truth_array[idx])
print('-----------')
print(ground_truth_array[idx][idx])
print('-----------')

[]
[]
-----------
[]
-----------
[]
-----------
[]
-----------


In [160]:
temp = np.random.randn(10,2)
idx  = np.arange(temp.shape[0])
np.random.shuffle(idx)
print(temp)
print(idx)
print('-----------')
print(temp[idx])
print('-----------')
print(temp[idx][idx])
print('-----------')

[[ 0.9081402  -1.03184633]
 [ 1.55161912 -1.56226472]
 [ 0.42969857  0.02593002]
 [ 1.14916804  0.7264392 ]
 [ 0.29103924 -0.26273769]
 [-0.18705406 -2.14406652]
 [-0.26405156 -0.510692  ]
 [ 0.14944704 -1.77838833]
 [-1.77157046 -0.06663116]
 [ 0.05713969 -0.65485001]]
[6 0 8 5 2 4 7 9 1 3]
-----------
[[-0.26405156 -0.510692  ]
 [ 0.9081402  -1.03184633]
 [-1.77157046 -0.06663116]
 [-0.18705406 -2.14406652]
 [ 0.42969857  0.02593002]
 [ 0.29103924 -0.26273769]
 [ 0.14944704 -1.77838833]
 [ 0.05713969 -0.65485001]
 [ 1.55161912 -1.56226472]
 [ 1.14916804  0.7264392 ]]
-----------
[[ 0.14944704 -1.77838833]
 [-0.26405156 -0.510692  ]
 [ 1.55161912 -1.56226472]
 [ 0.29103924 -0.26273769]
 [-1.77157046 -0.06663116]
 [ 0.42969857  0.02593002]
 [ 0.05713969 -0.65485001]
 [ 1.14916804  0.7264392 ]
 [ 0.9081402  -1.03184633]
 [-0.18705406 -2.14406652]]
-----------


In [150]:
temp    = tf.Variable(tf.random_normal((10,2,2)))
numbers = tf.Variable(np.array([3,2,1,0,4,5,6,7,8,9]))
ttemp   = tf.gather(temp,numbers)
tttemp  = tf.gather(ttemp,numbers)

sess.run(tf.global_variables_initializer())
print(temp.eval())
print('----------')
print(ttemp.eval())
print('----------')
print(tttemp.eval())
print('----------')
print(tttemp.eval()==temp.eval())

[[[-0.08145102  0.00568196]
  [ 1.1921625  -1.1483877 ]]

 [[-1.6468874   1.5500492 ]
  [ 0.37730742  1.9223727 ]]

 [[ 2.197381   -0.0582034 ]
  [-1.2526188   0.01525748]]

 [[ 0.42265046  0.10300288]
  [-0.9203869   0.7117603 ]]

 [[-2.4361715  -1.2111809 ]
  [-0.49766204  1.9758099 ]]

 [[ 0.9748405  -0.6046009 ]
  [-0.32467616 -0.6652155 ]]

 [[-0.40343404  2.5258133 ]
  [ 2.8018606  -1.0444173 ]]

 [[ 1.7095753  -1.1578435 ]
  [-0.18773709 -0.36860877]]

 [[-0.85149693 -0.5137977 ]
  [-0.46807152 -1.6043476 ]]

 [[ 0.4909838  -0.0936959 ]
  [ 1.400663    0.52848256]]]
----------
[[[ 0.42265046  0.10300288]
  [-0.9203869   0.7117603 ]]

 [[ 2.197381   -0.0582034 ]
  [-1.2526188   0.01525748]]

 [[-1.6468874   1.5500492 ]
  [ 0.37730742  1.9223727 ]]

 [[-0.08145102  0.00568196]
  [ 1.1921625  -1.1483877 ]]

 [[-2.4361715  -1.2111809 ]
  [-0.49766204  1.9758099 ]]

 [[ 0.9748405  -0.6046009 ]
  [-0.32467616 -0.6652155 ]]

 [[-0.40343404  2.5258133 ]
  [ 2.8018606  -1.0444173 ]]

 [[

In [161]:
! git all-go

[master 29282a1] commit
 2 files changed, 704 insertions(+), 18 deletions(-)
Counting objects: 4, done.
Delta compression using up to 4 threads.
Compressing objects: 100% (4/4), done.
Writing objects: 100% (4/4), 1.95 KiB | 1.96 MiB/s, done.
Total 4 (delta 3), reused 0 (delta 0)
remote: Resolving deltas: 100% (3/3), completed with 3 local objects.[K
To https://github.com/JaeDukSeo/Mututal-Information.git
   9064bcf..29282a1  master -> master
