In [1]:
import numpy as np
import includes.isde as isde
import itertools
import more_itertools as mit
from time import time
import pandas as pd
from scipy.special import comb

np.random.seed(42)

In [2]:
def running_time_partition_optimization_pulp(d):
    
    # We generate a random dictionnary of log-likelihood per subset
    by_subsets = {}
    for i in range(1, d+1):
        for S in itertools.combinations(range(d), i):

            by_subsets[S] = {'log_likelihood': np.random.rand()}
    
    # We select best partition using linear programming formulation
    start = time()
    isde.find_optimal_partition(by_subsets, d)
    end = time()

    return end - start

def running_time_partition_optimization_naive(d):
    
    # We generate a random dictionnary of log-likelihood per subset
    by_subsets = {}
    for i in range(1, d+1):
        for S in itertools.combinations(range(d), i):
            by_subsets[S] = {'log_likelihood': np.random.rand()}


    best_score = -np.inf
    best_part = []

    # We select best partition using naive formulation
    start = time()
    ls = list(range(d))
    for p in mit.set_partitions(ls):
        score_p = np.sum([by_subsets[tuple(S)]['log_likelihood'] for S in p])
        if score_p > best_score:
            best_score = score_p
            best_part = p
    end = time()
    
    return end - start

def df_to_latex(df, prec=5, label=None, caption="", resizebox=False, colindex='', rowindex=''):
    
    output = '\\begin{table}[H]\n\\centering\n'
    if resizebox:
        output += '\\resizebox{' + str(resizebox) + 'pt}{!}{%\n'
    output += '\\begin{tabular}{' + '|l|' + len(df.columns) * 'c|' + '}\n\hline\n'
    output +=  '\diagbox{' + rowindex + '}{' + colindex + '}'
    for i in df.columns:
        output += " & " + str(i)
    output += "\\\\\n\\hline\n"
    
    for j in df.index:
        output += str(j)
        for i in df.columns:
            output += " & " + str(round(df[i][j], prec))
        output += "\\\\\n\\hline\n"
    
    output += "\\end{tabular}"
    if resizebox:
        output += '%\n}\n'
    else:
        output += '\n'
    output += "\\caption{" + caption + "}\n"
    if type(label) == str:
        output += '\\label{' + label + '}\n'
    output += "\\end{table}"
    print(output)
    
def running_time_estimate_f(m, d, estimator, estimation=True, **params_estimator):
    
    W = np.random.rand(m, d)
    
    if estimation:
        times = {i: 0 for i in range(1, d+1)}
        s = 0
        for i in range(1, d+1):
              
            start = time()
            f, f_params = estimator(W[:, 0:i], params_estimator)
            end = time()
            times[i] = end-start
            s += times[i] * comb(d, i) 

        return s
    
def running_time_loglikelihoods_kde(m, n, d, estimation=True):
    
    W = np.random.rand(m, d)
    Z = np.random.rand(n, d)
    
    if estimation:
        
        times = {i: 0 for i in range(1, d+1)}
        
        s = 0
        for i in range(1, d+1):
            
            
            start = time()
            f = isde.GaussianKDE(bandwidth=1)
            ll = np.mean( f.score_samples(grid_points = W[:, 0:i], eval_points = Z[:, 0:i]) )
            end = time()
            times[i] = end-start
            s += times[i] * comb(d, i) 

        return s
    
def running_time_ISDE(m, n, d, estimator, estimation=True, **params_estimator):
    
    W = np.random.rand(m, d)
    Z = np.random.rand(n, d)
    
    if estimation :
        
        times = {i: 0 for i in range(1, d+1)}
        
        s = 0
        for i in range(1, d+1):
            
            
            start = time()
            f, f_params = estimator(W[:, 0:i], params_estimator)
            ll = np.mean( f.score_samples(grid_points = W[:, 0:i], eval_points = Z[:, 0:i]) )
            
            end = time()
            times[i] = end-start
            s += times[i] * comb(d, i) 

        return s

In [3]:
ms = [500 * i for i in range(1, 11)]
ns = [500, 2500, 5000, 10000]


#ISDE fixed h
for d in range(2, 16):

    df = pd.DataFrame(0.0, columns=ms, index=ns)
    running_time_estimates = { m : running_time_estimate_f(m=m, d=d, estimator=isde.KDE_fixed_h, estimation=True, h=1) for m in ms }
    
    for m in df.columns:
        for n in df.index:
            
            df[m][n] = running_time_estimates[m] + running_time_loglikelihoods_kde(m=m, n=n, d=d, estimation=True)

    caption = "Estimated Running Times (seconds) : ISDE\\_Fixed\\_h, d={}".format(d)
    label = "runningtimefixed" + str(d)
    df_to_latex(df, prec=1, label=label, caption=caption, resizebox=330, rowindex="n", colindex="m" )
    print("\n\n")

\begin{table}[H]
\centering
\resizebox{330pt}{!}{%
\begin{tabular}{|l|c|c|c|c|c|c|c|c|c|c|}
\hline
\diagbox{n}{m} & 500 & 1000 & 1500 & 2000 & 2500 & 3000 & 3500 & 4000 & 4500 & 5000\\
\hline
500 & 0.2 & 0.0 & 0.0 & 0.0 & 0.0 & 0.0 & 0.0 & 0.1 & 0.1 & 0.1\\
\hline
2500 & 0.0 & 0.0 & 0.1 & 0.1 & 0.1 & 0.1 & 0.1 & 0.1 & 0.2 & 0.2\\
\hline
5000 & 0.0 & 0.1 & 0.1 & 0.1 & 0.2 & 0.2 & 0.2 & 0.3 & 0.3 & 0.3\\
\hline
10000 & 0.1 & 0.1 & 0.2 & 0.3 & 0.3 & 0.4 & 0.5 & 0.6 & 0.6 & 0.7\\
\hline
\end{tabular}%
}
\caption{Estimated Running Times (seconds) : ISDE\_Fixed\_h, d=2}
\label{runningtimefixed2}
\end{table}



\begin{table}[H]
\centering
\resizebox{330pt}{!}{%
\begin{tabular}{|l|c|c|c|c|c|c|c|c|c|c|}
\hline
\diagbox{n}{m} & 500 & 1000 & 1500 & 2000 & 2500 & 3000 & 3500 & 4000 & 4500 & 5000\\
\hline
500 & 0.0 & 0.0 & 0.0 & 0.1 & 0.1 & 0.1 & 0.1 & 0.1 & 0.1 & 0.2\\
\hline
2500 & 0.0 & 0.1 & 0.1 & 0.2 & 0.2 & 0.3 & 0.3 & 0.3 & 0.4 & 0.4\\
\hline
5000 & 0.1 & 0.2 & 0.2 & 0.3 & 0.4 & 0.5 & 0.6 & 

\begin{table}[H]
\centering
\resizebox{330pt}{!}{%
\begin{tabular}{|l|c|c|c|c|c|c|c|c|c|c|}
\hline
\diagbox{n}{m} & 500 & 1000 & 1500 & 2000 & 2500 & 3000 & 3500 & 4000 & 4500 & 5000\\
\hline
500 & 150.1 & 267.6 & 382.9 & 508.3 & 634.8 & 745.1 & 877.7 & 997.7 & 1107.2 & 1228.9\\
\hline
2500 & 390.8 & 749.9 & 1104.7 & 1461.3 & 1826.4 & 2173.9 & 2529.7 & 2890.7 & 3246.2 & 3603.2\\
\hline
5000 & 718.4 & 1377.1 & 2144.5 & 2869.8 & 3660.0 & 4265.7 & 5002.2 & 5796.6 & 6425.2 & 7196.5\\
\hline
10000 & 1322.7 & 2632.9 & 4030.6 & 5211.7 & 6450.3 & 7768.7 & 8966.9 & 10480.7 & 11823.0 & 12989.6\\
\hline
\end{tabular}%
}
\caption{Estimated Running Times (seconds) : ISDE\_Fixed\_h, d=15}
\label{runningtimefixed15}
\end{table}





In [4]:
ms = [500 * i for i in range(1, 11)]
ns = [500, 2500, 5000, 10000]


#ISDE CVKDE
for d in range(2, 16):

    df = pd.DataFrame(0.0, columns=ms, index=ns)
    running_time_estimates = { m : 10 * running_time_estimate_f(m=m, d=d, estimator=isde.CVKDE, estimation=True, hs=np.logspace(0,1,5)) for m in ms }
    
    for m in df.columns:
        for n in df.index:
            
            df[m][n] = running_time_estimates[m] + running_time_loglikelihoods_kde(m=m, n=n, d=d, estimation=True)

    caption = "Estimated Running Times (seconds) : ISDE\\_CVKDE, d={}".format(d)
    label = "runningtimecvkde" + str(d)
    df_to_latex(df, prec=1, label=label, caption=caption, resizebox=330, rowindex="n", colindex="m" )
    print("\n\n")

    

\begin{table}[H]
\centering
\resizebox{330pt}{!}{%
\begin{tabular}{|l|c|c|c|c|c|c|c|c|c|c|}
\hline
\diagbox{n}{m} & 500 & 1000 & 1500 & 2000 & 2500 & 3000 & 3500 & 4000 & 4500 & 5000\\
\hline
500 & 1.5 & 3.2 & 4.4 & 6.0 & 7.3 & 9.0 & 10.7 & 12.8 & 19.6 & 22.1\\
\hline
2500 & 1.5 & 3.2 & 4.4 & 6.0 & 7.4 & 9.1 & 10.8 & 12.9 & 19.7 & 22.2\\
\hline
5000 & 1.5 & 3.2 & 4.4 & 6.1 & 7.4 & 9.2 & 10.9 & 13.0 & 19.8 & 22.4\\
\hline
10000 & 1.6 & 3.3 & 4.5 & 6.2 & 7.6 & 9.3 & 11.1 & 13.2 & 20.1 & 22.7\\
\hline
\end{tabular}%
}
\caption{Estimated Running Times (seconds) : ISDE\_CVKDE, d=2}
\label{runningtimecvkde2}
\end{table}



\begin{table}[H]
\centering
\resizebox{330pt}{!}{%
\begin{tabular}{|l|c|c|c|c|c|c|c|c|c|c|}
\hline
\diagbox{n}{m} & 500 & 1000 & 1500 & 2000 & 2500 & 3000 & 3500 & 4000 & 4500 & 5000\\
\hline
500 & 3.3 & 7.2 & 10.7 & 14.1 & 17.9 & 22.2 & 26.1 & 30.4 & 46.9 & 53.0\\
\hline
2500 & 3.4 & 7.3 & 10.7 & 14.2 & 18.1 & 22.4 & 26.3 & 30.6 & 47.2 & 53.3\\
\hline
5000 & 3.4 & 7.3 & 1

\begin{table}[H]
\centering
\resizebox{330pt}{!}{%
\begin{tabular}{|l|c|c|c|c|c|c|c|c|c|c|}
\hline
\diagbox{n}{m} & 500 & 1000 & 1500 & 2000 & 2500 & 3000 & 3500 & 4000 & 4500 & 5000\\
\hline
500 & 10376.0 & 26807.2 & 39597.1 & 52911.4 & 66656.8 & 80807.5 & 95475.6 & 110589.9 & 176169.4 & 197859.1\\
\hline
2500 & 10491.3 & 27036.3 & 39934.9 & 53366.2 & 67226.6 & 81490.3 & 96270.9 & 111498.6 & 177190.2 & 198993.3\\
\hline
5000 & 10644.1 & 27345.4 & 40432.5 & 54030.0 & 68078.8 & 82520.1 & 97429.8 & 112820.9 & 178706.5 & 200733.6\\
\hline
10000 & 10955.9 & 27928.9 & 41271.2 & 55258.6 & 69473.9 & 84135.4 & 99366.6 & 115036.3 & 181195.7 & 203611.3\\
\hline
\end{tabular}%
}
\caption{Estimated Running Times (seconds) : ISDE\_CVKDE, d=14}
\label{runningtimecvkde14}
\end{table}



\begin{table}[H]
\centering
\resizebox{330pt}{!}{%
\begin{tabular}{|l|c|c|c|c|c|c|c|c|c|c|}
\hline
\diagbox{n}{m} & 500 & 1000 & 1500 & 2000 & 2500 & 3000 & 3500 & 4000 & 4500 & 5000\\
\hline
500 & 21298.0 & 55544.8 &

In [5]:
ms = [500 * i for i in range(1, 11)]
ns = [500, 2500, 5000, 10000]


#ISDE Empirical Covariance
for d in range(2, 21):

    df = pd.DataFrame(0.0, columns=ms, index=ns)

    for m in df.columns:
        for n in df.index:
            
            df[m][n] = running_time_ISDE(m=m, n=n, d=d, estimator=isde.EmpCovariance)

    caption = "Estimated Running Times (seconds) : ISDE Empirical Covariance, d={}".format(d)
    label = "runningtimecov" + str(d)
    df_to_latex(df, prec=1, label=label, caption=caption, resizebox=330, rowindex="n", colindex="m" )
    print("\n\n")

\begin{table}[H]
\centering
\resizebox{330pt}{!}{%
\begin{tabular}{|l|c|c|c|c|c|c|c|c|c|c|}
\hline
\diagbox{n}{m} & 500 & 1000 & 1500 & 2000 & 2500 & 3000 & 3500 & 4000 & 4500 & 5000\\
\hline
500 & 0.0 & 0.0 & 0.0 & 0.0 & 0.0 & 0.0 & 0.0 & 0.0 & 0.0 & 0.0\\
\hline
2500 & 0.0 & 0.0 & 0.0 & 0.0 & 0.0 & 0.0 & 0.0 & 0.0 & 0.0 & 0.0\\
\hline
5000 & 0.0 & 0.0 & 0.0 & 0.0 & 0.0 & 0.0 & 0.0 & 0.0 & 0.0 & 0.0\\
\hline
10000 & 0.0 & 0.0 & 0.0 & 0.0 & 0.0 & 0.0 & 0.0 & 0.0 & 0.0 & 0.0\\
\hline
\end{tabular}%
}
\caption{Estimated Running Times (seconds) : ISDE Empirical Covariance, d=2}
\label{runningtimecov2}
\end{table}



\begin{table}[H]
\centering
\resizebox{330pt}{!}{%
\begin{tabular}{|l|c|c|c|c|c|c|c|c|c|c|}
\hline
\diagbox{n}{m} & 500 & 1000 & 1500 & 2000 & 2500 & 3000 & 3500 & 4000 & 4500 & 5000\\
\hline
500 & 0.0 & 0.0 & 0.0 & 0.0 & 0.0 & 0.0 & 0.0 & 0.0 & 0.0 & 0.0\\
\hline
2500 & 0.0 & 0.0 & 0.0 & 0.0 & 0.0 & 0.0 & 0.0 & 0.0 & 0.0 & 0.0\\
\hline
5000 & 0.0 & 0.0 & 0.0 & 0.0 & 0.0 & 0.0

\begin{table}[H]
\centering
\resizebox{330pt}{!}{%
\begin{tabular}{|l|c|c|c|c|c|c|c|c|c|c|}
\hline
\diagbox{n}{m} & 500 & 1000 & 1500 & 2000 & 2500 & 3000 & 3500 & 4000 & 4500 & 5000\\
\hline
500 & 28.3 & 29.8 & 31.3 & 32.8 & 34.2 & 43.8 & 37.6 & 42.5 & 40.9 & 42.5\\
\hline
2500 & 40.2 & 42.0 & 44.0 & 44.9 & 46.3 & 49.0 & 49.9 & 54.8 & 53.1 & 55.1\\
\hline
5000 & 52.8 & 54.9 & 55.9 & 75.7 & 59.0 & 66.1 & 64.6 & 64.3 & 65.7 & 67.2\\
\hline
10000 & 76.6 & 78.4 & 80.9 & 81.6 & 83.0 & 85.1 & 86.6 & 88.2 & 90.1 & 91.3\\
\hline
\end{tabular}%
}
\caption{Estimated Running Times (seconds) : ISDE Empirical Covariance, d=16}
\label{runningtimecov16}
\end{table}



\begin{table}[H]
\centering
\resizebox{330pt}{!}{%
\begin{tabular}{|l|c|c|c|c|c|c|c|c|c|c|}
\hline
\diagbox{n}{m} & 500 & 1000 & 1500 & 2000 & 2500 & 3000 & 3500 & 4000 & 4500 & 5000\\
\hline
500 & 57.4 & 60.2 & 63.3 & 67.1 & 71.0 & 73.2 & 76.6 & 83.2 & 90.4 & 91.2\\
\hline
2500 & 82.1 & 85.7 & 88.5 & 206.3 & 94.4 & 98.4 & 101.2 & 104.

In [6]:
# Running time for partition selection

df = pd.DataFrame(0.0, columns=[9, 10, 11, 12, 13, 14, 15], index=['Naive', 'LP'])

for d in df.columns:
    df[d]['Naive'] = running_time_partition_optimization_naive(d)
    df[d]['LP'] = running_time_partition_optimization_pulp(d)
    
print("Running times (seconds) :\n\n{}".format(df))

Running times (seconds) :

             9         10        11         12          13           14  \
Naive  0.195260  0.835616  4.823390  30.043808  185.895725  1241.706594   
LP     0.095982  0.168136  0.378687   0.759250    1.712147     3.353611   

                15  
Naive  8978.706673  
LP        7.164827  
