In [1]:
import numpy as np

import includes.bdcs as bdcs
import includes.isde as isde

from sklearn.covariance import empirical_covariance
import pandas as pd

import warnings
warnings.filterwarnings('ignore')

from time import time
import pickle
import os

np.random.seed(42)

In [2]:
import pandas as pd

def show_tables(dict_output, factor, prec, score, label="", caption="", resizebox=False):
    ''' Display tables : score_means, score_std and running times
    
    dict_output (dictionnary): dictionnary containing scores and running time for different methods and data
    factor (real): multiply scores by a constant factor
    prec (integer) : how many decimals are kept
    score (str) : dict_output key corresponding to score
    '''
    exps = [i for i  in dict_output]
    Ss = [i for i  in dict_output[exps[0]]]

    def _mean(exp, struct, what):
        return np.mean(dict_output[exp][struct][what])

    def _std(exp, struct, what, display=False):
        return np.std(dict_output[exp][struct][what])

    def _list(exp, struct, what, display=False):
        return dict_output[exp][struct][what]

    import pandas as pd

    df_means = pd.DataFrame(0.0, index=exps, columns=Ss)
    df_stds = pd.DataFrame(0.0, index=exps, columns=Ss)
    df_running_times = pd.DataFrame(0.0, index=exps, columns=Ss)

    for exp in df_means.index:
        for struct in df_means.columns:
            df_means[struct][exp] = round(_mean(exp=exp, struct=struct, what=score) * factor, prec)
            df_stds[struct][exp] = round(_std(exp=exp, struct=struct, what=score) * np.abs(factor), prec)
            df_running_times[struct][exp] = round(_mean(exp=exp, struct=struct, what='exec_time'), 3)


            
    dfmeanstd_to_latex(df_means=df_means, df_std=df_stds, prec=prec, label=label, caption=caption, resizebox=resizebox)

In [3]:
# Function to simulate data
def Sigma(struct, sigma_value):
    #Matrix abd structure as numpy array with values for alpha and sigma
    
    d = np.sum(struct)
    M = np.zeros(shape=(d, d))
    
    a = 0
    for i, s in enumerate(np.cumsum(struct)):
        b = s
        M[a:b, a:b] = sigma_value * np.ones(shape=(b - a, b - a))
        a = b
    
    np.fill_diagonal(M, 1)
    return M


# Compute KL loss
def KL(Sigma1, Sigma2):
    
    Prec1 = np.linalg.inv(Sigma1)
    Prec2 = np.linalg.inv(Sigma2)
    
    B = np.dot(Prec2 - Prec1, Sigma1)

    v = np.linalg.eig(B)[0]
    return np.sum(v - np.log(1 + v)) / 2

In [4]:
def dfmeanstd_to_latex(df_means, df_std, prec=5, label=None, caption="", resizebox=False, colindex='', rowindex=''):
    
    output = '\\begin{table}[ht]\n\\centering\n'
    if resizebox:
        output += '\\resizebox{' + str(resizebox) + 'pt}{!}{%\n'
    output += '\\begin{tabular}{' + '|l|' + len(df_means.columns) * 'c|' + '}\n\hline\n'
    
    if colindex != '' or colindex != '':
        output +=  '\diagbox{' + rowindex + '}{' + colindex + '}'
    else:
        output += ' '
    for i in df_means.columns:
        output += " & " + str(i)
    output += "\\\\\n\\hline\n"
    
    for j in df_means.index:
        output += str(j)
        for i in df_means.columns:
            output += " & " + "$" +  str(round(df_means[i][j], prec)) + " \\pm " + str(round(df_std[i][j], prec)) +  "$ " 
        output += "\\\\\n\\hline\n"
    
    output += "\\end{tabular}"
    if resizebox:
        output += '%\n}\n'
    else:
        output += '\n'
    output += "\\caption{" + caption + "}\n"
    if type(label) == str:
        output += '\\label{' + label + '}\n'
    output += "\\end{table}"
    print(output)

In [5]:
def table_wrt_sigma(sigma):

    Ss = [[2, 2], [4, 4, 1], [4, 3, 2, 3], [4, 4, 3, 3, 2]]
    N = 6000
    m = int(N/2)
    n = int(N/2)

    reps = 10

    output_ggm = {i : {} for i in ['ISDE', 'BDCS', 'Empirical Covariance']}

    for struct in Ss:

#         print(struct)
        d = np.sum(struct)

        output_ggm['ISDE'][str(struct)] = {'KL' : [], 'exec_time' : [], 'partition' : []}
        output_ggm['BDCS'][str(struct)] = {'KL' : [], 'exec_time' : [], 'partition' : []}
        output_ggm['Empirical Covariance'][str(struct)] = {'KL' : [], 'exec_time' : []}

        for j in range(reps):
#             print('{}/{}'.format(j+1, reps))

            cov = Sigma(struct=struct, sigma_value=sigma)
            X = np.random.multivariate_normal(mean=np.zeros(d), cov=cov, size=N)

            #Empirical Covariance
            start = time()
            cov_empcov = empirical_covariance(X)
            end = time()
            output_ggm['Empirical Covariance'][str(struct)]['KL'].append(KL(cov, cov_empcov))
            output_ggm['Empirical Covariance'][str(struct)]['exec_time'].append(end-start)

            #BDCS
            start = time()
            A = bdcs.BDCS()
            cov_bdcs, partition_bdcs = A.fit(X)
            end=time()
            output_ggm['BDCS'][str(struct)]['KL'].append(KL(cov, cov_bdcs))
            output_ggm['BDCS'][str(struct)]['exec_time'].append(end-start)
            output_ggm['BDCS'][str(struct)]['partition'].append(str(partition_bdcs))

            #ISDE
            start = time()
            partition_isde, param = isde.ISDE(X, m, n, isde.EmpCovariance)
            end = time()
            cov_isde = bdcs.empirical_covariance_partition(X, partition_isde)            
            output_ggm['ISDE'][str(struct)]['KL'].append(KL(cov, cov_isde))
            output_ggm['ISDE'][str(struct)]['exec_time'].append(end-start)
            output_ggm['ISDE'][str(struct)]['partition'].append(str(partition_isde))
            
    label = "GaussianSigma" + str(sigma)
    caption = "Gaussian: KL loss (.$10^3$) - $N=" + str(N) + "$, $\sigma = " + str(sigma) + "$"

    show_tables(dict_output=output_ggm, factor=1e3, prec=5, score='KL', label=label, caption=caption, resizebox=330)

sigmas =  [0.1, 0.3, 0.5, 0.7, 0.9]
for sigma in sigmas:
    table_wrt_sigma(sigma)

\begin{table}[ht]
\centering
\resizebox{330pt}{!}{%
\begin{tabular}{|l|c|c|c|c|}
\hline
  & [2, 2] & [4, 4, 1] & [4, 3, 2, 3] & [4, 4, 3, 3, 2]\\
\hline
ISDE & $0.90586 \pm 1.45386$  & $1.84765 \pm 0.7583$  & $3.6083 \pm 2.38983$  & $3.42761 \pm 1.15221$ \\
\hline
BDCS & $0.33574 \pm 0.21546$  & $1.78015 \pm 0.58581$  & $2.87573 \pm 1.20051$  & $4.4576 \pm 1.7183$ \\
\hline
Empirical Covariance & $0.64975 \pm 0.27391$  & $3.75359 \pm 0.75287$  & $6.8254 \pm 1.2289$  & $11.60758 \pm 1.74779$ \\
\hline
\end{tabular}%
}
\caption{Gaussian: KL loss (.$10^3$) - $N=6000$, $\sigma = 0.1$}
\label{GaussianSigma0.1}
\end{table}
\begin{table}[ht]
\centering
\resizebox{330pt}{!}{%
\begin{tabular}{|l|c|c|c|c|}
\hline
  & [2, 2] & [4, 4, 1] & [4, 3, 2, 3] & [4, 4, 3, 3, 2]\\
\hline
ISDE & $0.6496 \pm 0.36463$  & $2.10295 \pm 0.49494$  & $2.42813 \pm 0.93357$  & $3.67274 \pm 1.09725$ \\
\hline
BDCS & $0.53416 \pm 0.28393$  & $2.13123 \pm 0.52902$  & $2.70863 \pm 1.21694$  & $3.24106 \pm 0.53982$ \\
\h

In [6]:
def table_wrt_N(N):

    Ss = [[2, 2], [4, 4, 1], [4, 3, 2, 3], [4, 4, 3, 3, 2]]
    sigma = 0.6
    m = int(N/2)
    n = int(N/2)

    reps = 10

    output_ggm = {i : {} for i in ['ISDE', 'BDCS', 'Empirical Covariance']}

    for struct in Ss:

#         print(struct)
        d = np.sum(struct)

        output_ggm['ISDE'][str(struct)] = {'KL' : [], 'exec_time' : [], 'partition' : []}
        output_ggm['BDCS'][str(struct)] = {'KL' : [], 'exec_time' : [], 'partition' : []}
        output_ggm['Empirical Covariance'][str(struct)] = {'KL' : [], 'exec_time' : []}

        for j in range(reps):
#             print('{}/{}'.format(j+1, reps))

            cov = Sigma(struct=struct, sigma_value=sigma)
            X = np.random.multivariate_normal(mean=np.zeros(d), cov=cov, size=N)

            #Empirical Covariance
            start = time()
            cov_empcov = empirical_covariance(X)
            end = time()
            output_ggm['Empirical Covariance'][str(struct)]['KL'].append(KL(cov, cov_empcov))
            output_ggm['Empirical Covariance'][str(struct)]['exec_time'].append(end-start)

            #BDCS
            start = time()
            A = bdcs.BDCS()
            cov_bdcs, partition_bdcs = A.fit(X)
            end=time()
            output_ggm['BDCS'][str(struct)]['KL'].append(KL(cov, cov_bdcs))
            output_ggm['BDCS'][str(struct)]['exec_time'].append(end-start)
            output_ggm['BDCS'][str(struct)]['partition'].append(str(partition_bdcs))

            #ISDE
            start = time()
            partition_isde, param = isde.ISDE(X, m, n, isde.EmpCovariance)
            end = time()
            cov_isde = bdcs.empirical_covariance_partition(X, partition_isde)            
            output_ggm['ISDE'][str(struct)]['KL'].append(KL(cov, cov_isde))
            output_ggm['ISDE'][str(struct)]['exec_time'].append(end-start)
            output_ggm['ISDE'][str(struct)]['partition'].append(str(partition_isde))
            
    label = "GaussianN" + str(N)
    caption = "Gaussian: KL loss (.$10^3$) - $N=" + str(N) + "$, $\sigma = " + str(sigma) + "$"

    show_tables(dict_output=output_ggm, factor=1e3, prec=5, score='KL', label=label, caption=caption, resizebox=330)

Ns =  [200, 500, 1000, 2000, 8000]
for N in Ns:
    table_wrt_N(N)

\begin{table}[ht]
\centering
\resizebox{330pt}{!}{%
\begin{tabular}{|l|c|c|c|c|}
\hline
  & [2, 2] & [4, 4, 1] & [4, 3, 2, 3] & [4, 4, 3, 3, 2]\\
\hline
ISDE & $22.32724 \pm 23.06876$  & $60.58244 \pm 25.66192$  & $70.66376 \pm 11.11799$  & $106.88255 \pm 39.7745$ \\
\hline
BDCS & $13.40399 \pm 8.77627$  & $64.38877 \pm 28.91938$  & $80.27932 \pm 26.80539$  & $94.30464 \pm 20.77999$ \\
\hline
Empirical Covariance & $27.64968 \pm 20.79988$  & $123.24056 \pm 34.04314$  & $218.34033 \pm 47.68867$  & $384.82419 \pm 61.92759$ \\
\hline
\end{tabular}%
}
\caption{Gaussian: KL loss (.$10^3$) - $N=200$, $\sigma = 0.6$}
\label{GaussianN200}
\end{table}
\begin{table}[ht]
\centering
\resizebox{330pt}{!}{%
\begin{tabular}{|l|c|c|c|c|}
\hline
  & [2, 2] & [4, 4, 1] & [4, 3, 2, 3] & [4, 4, 3, 3, 2]\\
\hline
ISDE & $6.70797 \pm 4.35035$  & $26.92186 \pm 11.318$  & $33.62495 \pm 9.88967$  & $47.28646 \pm 14.996$ \\
\hline
BDCS & $5.93573 \pm 3.09991$  & $26.81976 \pm 10.81855$  & $38.61735 \pm 6.82126$