In [36]:
# seeds for reproducibility
from numpy.random import seed
seed(1337)
from tensorflow import set_random_seed
set_random_seed(1337)

import numpy as np
import math
import binning
import npeet
from EDGE.EDGE_4_3_1 import EDGE
import KDE
import data_selection

### This notebook calculates the entropy of input and output and the mutual information between them for the "Opening the Blackbox" and MNIST dataset using different estimators.

# Tishby Data

##### loading dataset

In [38]:
set_name = "tishby"

X_train, X_test, y_train, y_test = data_selection.select_data(set_name, shuffle=False)

Loading tishby Data...


##### since all samples are different the entropy of the input has to be the same as the logarith base 2 of the amount of samples

In [39]:
print("Length of input data", len(X_train))
print("Logarithm of amount of input data",math.log(len(X_train),2))

Length of input data 3276
Logarithm of amount of input data 11.67771964164101


##### Binning estimator

In [3]:
unique_inverse_x, unique_inverse_y, px, py = binning.extract_inout_probs(X_train,
                                                                           y_train)
IX_Y, HX = binning.calc_information_between_in_out(px, py, X_train, y_train, unique_inverse_x,
                                                   unique_inverse_y, False)
print("Bin MI(X,Y)", IX_Y)
print("Bin H(X)", HX)

Bin MI(X,Y) 0.9976734295143714
Bin H(X) 11.677719641641012


##### KSG estimator

In [4]:
print("KSG MI(X,Y)",npeet.midd(X_train.tolist(), y_train.tolist()))
print("KSG H(X)",npeet.entropyd(X_train.tolist()))

KSG MI(X,Y) 0.9976734295143643
KSG H(X) 11.67771964164101


##### EDGE estimator

In [42]:
print("EDGE MI(X,Y)", EDGE(X_train, y_train))

EDGE MI(X,Y) 0.19461987597998315


##### KDE estimator


In [6]:
def KDEt(KDE_estimator_func, name):
    labelprobs = np.mean(y_train, axis=0)
    label_indices = {}
    for i in range(y_train.shape[1]):
         # return make non categorical to extract indices
         # from: https://github.com/keras-team/keras/issues/4981
         label_indices[i] = np.argmax(y_train, axis=1) == i

    entropy_X = KDE_estimator_func(X_train,noise_variance)[0]
    entropy_X_giv_Y = 0.
    for i in label_indices.keys():
        entropy_cond = KDE_estimator_func(X_train[label_indices[i],:],noise_variance)[0]
        entropy_X_giv_Y += labelprobs[i] * entropy_cond
        
    print(name+" H(X)", nats2bits * entropy_X)
    print(name+" H(X|Y)", nats2bits * entropy_X_giv_Y)
    print(name+" MI(X,Y)", nats2bits * (entropy_X - entropy_X_giv_Y))

noise_variance = 1e-3
nats2bits = 1.0/np.log(2)

KDEt(KDE.entropy_estimator_kl, "Upper")
KDEt(KDE.entropy_estimator_bd, "Lower")

Upper H(X) -23.55183904416382
Upper H(X|Y) -24.549512473678192
Upper MI(X,Y) 0.99767342951437
Lower H(X) -23.551839044163817
Lower H(X|Y) -24.54951247367818
Lower MI(X,Y) 0.9976734295143649


In [41]:
# Check entropy function results for KDE
# result should be 0.5
TestEntropyH = KDE.entropy_estimator_kl(np.array([[1], [2]]), noise_variance)[0]
print("Test entropy of [1, 1] (upper)", nats2bits * TestEntropyH)
TestEntropyL = KDE.entropy_estimator_bd(np.array([[1], [2]]), noise_variance)[0]
print("Test entropy of [1, 1] (lower)", nats2bits * TestEntropyL)

Test entropy of [1, 1] (upper) -1.935796557150402
Test entropy of [1, 1] (lower) -1.9357965571504028


# MNIST

##### Loading dataset

In [43]:
set_name = "mnist"

nrs = [3,8,1]
samples = 1000
X_train, X_test, y_train, y_test = data_selection.select_data(set_name, shuffle=False,
                                                              samples_per_class = samples,
                                                              list_of_nrs=nrs)

Loading mnist Data...
X_train shape  (3000, 28, 28)
y_train shape  (3000,)


In [44]:
print("Length of input data", len(X_train))
print("Logarithm of amount of input data",math.log(len(X_train),2))

Length of input data 3000
Logarithm of amount of input data 11.550746785383243


##### Binning estimator

In [45]:
unique_inverse_x, unique_inverse_y, px, py = binning.extract_inout_probs(X_train,
                                                                           y_train)
IX_Y, HX = binning.calc_information_between_in_out(px, py, X_train, y_train, unique_inverse_x,
                                                   unique_inverse_y, False)
print("Bin MI(X,Y)", IX_Y)
print("Bin H(X)", HX)

Bin MI(X,Y) 1.5849625007211543
Bin H(X) 11.550746785383241


##### KSG estimator

In [46]:
print("KSG MI(X,Y)",npeet.midd(X_train.tolist(), y_train.tolist()))
print("KSG H(X)",npeet.entropyd(X_train.tolist()))

KSG MI(X,Y) 1.584962500721156
KSG H(X) 11.55074678538324


##### EDGE estimator

In [47]:
print("EDGE", EDGE(X_train, y_train))

EDGE 0.2936402363927715


##### KDE estimator

In [48]:
def KDEt(KDE_estimator_func, name):
    labelprobs = np.mean(y_train, axis=0)
    label_indices = {}
    for i in range(y_train.shape[1]):
         # return make non categorical to extract indices
         # from: https://github.com/keras-team/keras/issues/4981
         label_indices[i] = np.argmax(y_train, axis=1) == i

    entropy_X = KDE_estimator_func(X_train,noise_variance)[0]
    entropy_X_giv_Y = 0.
    for i in label_indices.keys():
        entropy_cond = KDE_estimator_func(X_train[label_indices[i],:],noise_variance)[0]
        entropy_X_giv_Y += labelprobs[i] * entropy_cond
        
    print(name+" H(X)", nats2bits * entropy_X)
    print(name+" H(X|Y)", nats2bits * entropy_X_giv_Y)
    print(name+" MI(X,Y)", nats2bits * (entropy_X - entropy_X_giv_Y))

noise_variance = 1e-3
nats2bits = 1.0/np.log(2)

KDEt(KDE.entropy_estimator_kl, "Upper")
KDEt(KDE.entropy_estimator_bd, "Lower")

Upper H(X) -2290.107902704249
Upper H(X|Y) -2291.692610891892
Upper MI(X,Y) 1.5847081876426452
Lower H(X) -2290.1119841060085
Lower H(X|Y) -2291.6970445142524
Lower MI(X,Y) 1.5850604082438087
