# Information Theory Measures w/ RBIG

In [1]:
import sys

# MacOS
sys.path.insert(0, '/Users/eman/Documents/code_projects/rbig/')
sys.path.insert(0, '/home/emmanuel/code/py_packages/py_rbig/src')

# ERC server
sys.path.insert(0, '/home/emmanuel/code/rbig/')


import numpy as np
import warnings
from time import time
from rbig.rbig import RBIGKLD
from sklearn.model_selection import train_test_split
from sklearn.utils import check_random_state
import matplotlib.pyplot as plt
plt.style.use('ggplot')
%matplotlib inline

warnings.filterwarnings('ignore') # get rid of annoying warnings

%load_ext autoreload
%autoreload 2

## Kullback-Leibler Divergence (KLD)

In [18]:
#Parameters
n_samples = 10000
d_dimensions = 10
mu = 0.4          # how different the distributions are

seed = 123

rng = check_random_state(seed)

### Sample Data

In [19]:
# Generate random Data
A = rng.rand(d_dimensions, d_dimensions)

# covariance matrix
cov = A @ A.T

# Normalize cov mat
cov = A / A.max()

# create covariance matrices for x and y
cov_x = np.eye(d_dimensions)
cov_y = cov_x.copy()

mu_x = np.zeros(d_dimensions) + mu
mu_y = np.zeros(d_dimensions)

# generate multivariate gaussian data
X = rng.multivariate_normal(mu_x, cov_x, n_samples)
Y = rng.multivariate_normal(mu_y, cov_y, n_samples)


### KLD using Formula

In [20]:
kld_original = 0.5 * ((mu_y - mu_x) @ np.linalg.inv(cov_y) @ (mu_y - mu_x).T +
                      np.trace(np.linalg.inv(cov_y) @ cov_x) -
                      np.log(np.linalg.det(cov_x) / np.linalg.det(cov_y)) - d_dimensions)

print(f'KLD: {kld_original:.4f}')

KLD: 0.8000


### KLD Using RBIG

In [21]:
%%time

n_layers = 100000
rotation_type = 'PCA'
random_state = 0
zero_tolerance = 60
tolerance = None
pdf_extension = 10
pdf_resolution = None
verbose = 0

# Initialize RBIG class
rbig_model = RBIGKLD(n_layers=n_layers, 
                  rotation_type=rotation_type, 
                  random_state=random_state, 
                  zero_tolerance=zero_tolerance,
                  tolerance=tolerance,
                     pdf_resolution=pdf_resolution,
                    pdf_extension=pdf_extension,
                    verbose=verbose)

# fit model to the data
rbig_model.fit(X, Y);

# Save KLD value to data structure
kld_rbig= rbig_model.kld*np.log(2)

print(f'KLD (RBIG): {kld_rbig:.4f}')

KLD (RBIG): 0.8319
CPU times: user 4min 31s, sys: 1.97 ms, total: 4min 31s
Wall time: 17 s
