In [14]:
import numpy as np
from sklearn.neighbors import KernelDensity
import pandas as pd
from time import time

import includes.isde as isde



In [15]:
# Running time study for KDE

ns = [100, 500, 2000, 5000, 10000, 20000]
running_time = pd.DataFrame(0.0, columns=ns, index=['pyKeOps', 'sklearn'])

for n in ns:
    X_1 = np.random.rand(n, 3)
    X_2 = np.random.rand(n, 3)
    isde.GaussianKDE(bandwidth=0.5).score_samples(grid_points=X_1, eval_points=X_2)

    #Running time for pyKeOps implementation 
    start = time()
    isde.GaussianKDE(bandwidth=0.5).score_samples(grid_points=X_1, eval_points=X_2)
    end = time()
    running_time[n]['pyKeOps'] = end - start

    start = time()
    logdensity_sklearn = KernelDensity(bandwidth=0.5).fit(X_1).score_samples(X_2)
    end = time()
    running_time[n]['sklearn'] = end - start


running_time

Unnamed: 0,100,500,2000,5000,10000,20000
pyKeOps,0.000557,0.001974,0.007307,0.017578,0.068412,0.116371
sklearn,0.00084,0.012578,0.195232,1.156373,4.915128,21.36305


In [19]:
import itertools
import pandas as pd
from time import time

def rnd_dict(d, k):
    #Compute a dictionnary with random entries to measur partition selection running time
    by_subsets = {}
    
    for i in range(1, k+1):
        for S in itertools.combinations(range(d), i):
            by_subsets[S] = {'log_likelihood': np.random.rand()}

    return by_subsets


ds = [5, 10, 20, 30, 40, 50]
ks = [2, 3, 4, 5]

running_time_partition_selection = pd.DataFrame(0.0, columns=ds, index=ks)

M = 1
for d in ds:
    for k in ks:

        tmp = rnd_dict(d, k)
        start = time()
        isde.find_optimal_partition(scores_by_subsets=tmp, max_size=k, min_size=1)
        end = time()
        running_time_partition_selection[d][k] += (end-start) / M


running_time_partition_selection

Unnamed: 0,5,10,20,30,40,50
2,0.017672,0.028136,0.084969,0.197647,0.469076,0.842412
3,0.018084,0.045602,0.412544,1.927046,6.200934,14.764756
4,0.018743,0.08693,1.975521,14.810436,61.628655,189.560663
5,0.01894,0.125942,7.289061,84.684407,481.581217,2045.178078


In [22]:
from scipy.special import comb

def estimated_time_ll_computations(d, k, m, n, cv, n_h):

    X_1 = np.random.rand(n, 3)
    X_2 = np.random.rand(n, 3)
    isde.GaussianKDE(bandwidth=0.5).score_samples(grid_points=X_1, eval_points=X_2)

    
    #Bandwidth selection
    time_per_KDE_bd_selection = {}
    for i in range(1, k+1):

        X_1 = np.random.rand(int(m / cv), i)
        X_2 = np.random.rand(m - int(m / cv), i)
 
        start = time()
        isde.GaussianKDE(bandwidth=0.5).score_samples(grid_points=X_1, eval_points=X_2)
        end = time()
        time_per_KDE_bd_selection[i] = end - start

    time_bd_selection = n_h * np.sum([ time_per_KDE_bd_selection[i] *  comb(i, d) for i in range(1, k+1)])
    print(time_per_KDE_bd_selection)
    print(time_bd_selection)

    #Likelihood computation
    time_per_KDE_ll_computation = {}
    for i in range(1, k+1):

        X_1 = np.random.rand(m, i)
        X_2 = np.random.rand(n, i)
 
        start = time()
        isde.GaussianKDE(bandwidth=0.5).score_samples(grid_points=X_1, eval_points=X_2)
        end = time()
        time_per_KDE_ll_computation[i] = end - start

    time_ll_computation = np.sum([ time_per_KDE_ll_computation[i] *  comb(i, d) for i in range(1, k+1)])

    return time_bd_selection + time_ll_computation


estimated_time_ll_computations(d=32, k=2, m=1000, n=1000, cv=5, n_h=30)

            

    

{1: 0.0009734630584716797, 2: 0.0008485317230224609}


0.0