In [14]:
%matplotlib inline
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from mpl_toolkits.mplot3d import proj3d
import pandas as pd
import numpy as np
import seaborn as sns
sns.set(color_codes=True)
from xgboost import XGBRegressor
from sklearn.neural_network import MLPRegressor
from matplotlib import patches
from sklearn import metrics
import os
import sys
import itertools
from pathlib import Path
import pickle
import logging
import time
import shutil
from codebase.prim_dens import PRIMdens
from Optimization_Methods.GlowWorm import GlowWorm,GlowWormDensity

DEBUG:matplotlib.pyplot:Loaded backend module://matplotlib_inline.backend_inline version unknown.


In [3]:
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG,)
logger = logging.getLogger(__name__)

In [4]:
dimensions = range(1,6)
size = 10**np.arange(5,8)

<h3>PRIM</h3>

In [5]:
prim_times = []

In [6]:
dd = None
for d in dimensions:
    for s in size:
        dd = np.random.uniform(size=(s,d))
        print(dd.shape)
        times = []
        for i in range(5):
            prim = PRIMdens(dd)
            start = time.time()
            prim.fit()
            end = (time.time() - start)*1000 #convert to ms
            times.append(end)
            print(end)
        prim_times.append([d,s,np.mean(times), np.std(times)])

(100000, 1)
11.992216110229492
12.00246810913086
10.002374649047852
10.002613067626953
10.00213623046875
(1000000, 1)
129.0287971496582
136.0304355621338
128.02910804748535
133.02993774414062
127.0287036895752
(10000000, 1)
1561.495304107666
1526.343822479248
1529.3447971343994
1606.3621044158936
1537.346363067627
(100000, 2)
35.00795364379883
34.00778770446777
34.008026123046875
33.006906509399414
34.00778770446777
(1000000, 2)
386.0912322998047
392.58432388305664
381.08325004577637
375.08463859558105
378.0853748321533
(10000000, 2)
4059.443473815918
3984.9159717559814
3973.266363143921
4126.297473907471
3936.893939971924
(100000, 3)
47.010183334350586
41.00942611694336
40.0090217590332
41.00918769836426
42.009830474853516
(1000000, 3)
474.1089344024658
472.1064567565918
493.11137199401855
464.10441398620605
460.10398864746094
(10000000, 3)
5392.474174499512
5670.421361923218
5453.244209289551
5402.260065078735
5337.709188461304
(100000, 4)
55.01270294189453
49.0107536315918
48.010826

In [7]:
np.array(prim_times)[:,2:]/1000

array([[1.08003616e-02, 9.77335855e-04],
       [1.30629396e-01, 3.38296403e-03],
       [1.55217848e+00, 2.97706791e-02],
       [3.40076923e-02, 6.32786707e-04],
       [3.82585764e-01, 6.18122061e-03],
       [4.01616344e+00, 6.79748245e-02],
       [4.22095299e-02, 2.48231585e-03],
       [4.72707033e-01, 1.14149450e-02],
       [5.45122180e+00, 1.15573514e-01],
       [5.02113342e-02, 2.48262324e-03],
       [5.78731012e-01, 1.74577116e-02],
       [6.64456153e+00, 5.87015346e-02],
       [6.18147850e-02, 2.03985423e-03],
       [7.57172918e-01, 1.45902897e-02],
       [8.78723464e+00, 4.88596051e-01]])

In [8]:
np.array(prim_times)[:,-1]/1000

array([0.00097734, 0.00338296, 0.02977068, 0.00063279, 0.00618122,
       0.06797482, 0.00248232, 0.01141495, 0.11557351, 0.00248262,
       0.01745771, 0.05870153, 0.00203985, 0.01459029, 0.48859605])

In [9]:
np.savetxt('output/prim_times',np.array(prim_times),delimiter=',')

<h3>Glowworm using approximation</h3>

In [15]:
with open('models/queries-uniform-1-multi_False-density-XGB-RMSE=54.76-R2=0.98.pkl', 'rb') as file:  
    m1 = pickle.load(file)
with open('models/queries-uniform-2-multi_False-density-XGB-RMSE=64.76-R2=0.98.pkl', 'rb') as file:  
    m2 = pickle.load(file)
with open('models/queries-uniform-3-multi_False-density-XGB-RMSE=47.83-R2=0.98.pkl', 'rb') as file:  
    m3 = pickle.load(file)
with open('models/queries-uniform-4-multi_False-density-XGB-RMSE=28.76-R2=0.99.pkl' , 'rb') as file:  
    m4 = pickle.load(file)
with open('models/queries-uniform-5-multi_False-density-XGB-RMSE=17.73-R2=0.99.pkl', 'rb') as file:  
    m5 = pickle.load(file)
                    



In [18]:
global dd
dd = None
glowworm_approx_details = []
for d in dimensions:
    for s in size:
        dd = np.random.uniform(size=(s,d))
        print(dd.shape)
        if d==1:
            m=m1
        elif d==2:
            m=m2
        elif d==3:
            m=m3
        elif d==4:
            m=m4
        else:
            m=m5
        def objective_density(X):
            res = np.log(m.predict(X) - 1500) - 2.9*np.sum(np.log(1+X[:,X.shape[1]//2:]),axis=1)
            res[np.isnan(res)] = -np.inf
            return res
        #Generate queries
        times = []
        for i in range(5):
            start = time.time()
            gw = GlowWorm(objective_density, dimensions=2*d, nt=5, iter_max=100, glowworms=100)
            gw.optimize()
            end = (time.time() - start)*1000 #convert to ms
            times.append(end)
            print(end)
        glowworm_approx_details.append([d,s,np.mean(times), np.std(times)])

(100000, 1)




2115.476369857788
2449.552536010742
2496.5624809265137
2895.1828479766846
2621.5898990631104
(1000000, 1)
2451.552629470825
2664.599657058716
2538.5725498199463
2763.622283935547
2541.0983562469482
(10000000, 1)
2826.160669326782
2478.064775466919
2758.6214542388916
2430.054187774658
2820.6355571746826
(100000, 2)
2239.5052909851074
2237.504005432129
2307.5196743011475
2250.507116317749
2372.5345134735107
(1000000, 2)
2313.5218620300293
2255.5081844329834
2426.257371902466
2278.5134315490723
2267.510414123535
(10000000, 2)
2267.5113677978516
2324.5229721069336
2400.540828704834
2279.022216796875
2369.5335388183594
(100000, 3)
2756.1287879943848
2795.6299781799316
2766.6232585906982
2546.5738773345947
2557.081699371338
(1000000, 3)
2577.580451965332
2636.646032333374
2335.52622795105
2506.5646171569824
2504.5652389526367
(10000000, 3)
2493.561267852783
2470.5564975738525
2471.5569019317627
2534.0898036956787
2183.4917068481445
(100000, 4)
2936.6610050201416
2793.6296463012695
2910.65597

In [19]:
glowworm_approx_details

[[1, 100000, 2515.672826766968, 253.03777769055608],
 [1, 1000000, 2591.8890953063965, 109.44934052529082],
 [1, 10000000, 2662.7073287963867, 172.6720567063211],
 [2, 100000, 2281.5141201019287, 52.19974532392546],
 [2, 1000000, 2308.262252807617, 62.095423457549316],
 [2, 10000000, 2328.2261848449707, 51.1013472898475],
 [3, 100000, 2684.4075202941895, 109.07225202360121],
 [3, 1000000, 2512.176513671875, 101.05947214458855],
 [3, 10000000, 2430.6512355804443, 125.70659939065821],
 [4, 100000, 2849.8419284820557, 97.59908369381807],
 [4, 1000000, 2951.475954055786, 41.89768356909365],
 [4, 10000000, 2805.431890487671, 178.82125001256293],
 [5, 100000, 2836.2388610839844, 141.50634217253014],
 [5, 1000000, 2658.200693130493, 134.76045494256545],
 [5, 10000000, 2845.8412170410156, 144.74185030340718]]

In [20]:
print(np.array(glowworm_approx_details)[:,2:]/1000)

[[2.51567283 0.25303778]
 [2.5918891  0.10944934]
 [2.66270733 0.17267206]
 [2.28151412 0.05219975]
 [2.30826225 0.06209542]
 [2.32822618 0.05110135]
 [2.68440752 0.10907225]
 [2.51217651 0.10105947]
 [2.43065124 0.1257066 ]
 [2.84984193 0.09759908]
 [2.95147595 0.04189768]
 [2.80543189 0.17882125]
 [2.83623886 0.14150634]
 [2.65820069 0.13476045]
 [2.84584122 0.14474185]]


In [22]:
np.savetxt('output/glowworm_approx_details',np.array(glowworm_approx_details),delimiter=',')

In [23]:
plt.rc('text', usetex=True)
font_size=22
params = {'backend': 'ps',
          'text.latex.preamble': ['\\usepackage{gensymb}'],
          'text.usetex': True,
          'font.family': 'serif'
}

plt.rcParams.update(params)

In [25]:
prim = np.loadtxt('output/prim_times', delimiter=',')
# naive = np.loadtxt('output/performance/naive_numpy_details', delimiter=',')
# naive_glow = np.loadtxt('output/performance/naive_glowworm_details', delimiter=',')
glow_approx = np.loadtxt('output/glowworm_approx_details', delimiter=',')

In [29]:
prim

array([[1.00000000e+00, 1.00000000e+05, 1.08003616e+01, 9.77335855e-01],
       [1.00000000e+00, 1.00000000e+06, 1.30629396e+02, 3.38296403e+00],
       [1.00000000e+00, 1.00000000e+07, 1.55217848e+03, 2.97706791e+01],
       [2.00000000e+00, 1.00000000e+05, 3.40076923e+01, 6.32786707e-01],
       [2.00000000e+00, 1.00000000e+06, 3.82585764e+02, 6.18122061e+00],
       [2.00000000e+00, 1.00000000e+07, 4.01616344e+03, 6.79748245e+01],
       [3.00000000e+00, 1.00000000e+05, 4.22095299e+01, 2.48231585e+00],
       [3.00000000e+00, 1.00000000e+06, 4.72707033e+02, 1.14149450e+01],
       [3.00000000e+00, 1.00000000e+07, 5.45122180e+03, 1.15573514e+02],
       [4.00000000e+00, 1.00000000e+05, 5.02113342e+01, 2.48262324e+00],
       [4.00000000e+00, 1.00000000e+06, 5.78731012e+02, 1.74577116e+01],
       [4.00000000e+00, 1.00000000e+07, 6.64456153e+03, 5.87015346e+01],
       [5.00000000e+00, 1.00000000e+05, 6.18147850e+01, 2.03985423e+00],
       [5.00000000e+00, 1.00000000e+06, 7.57172918e

In [40]:
source_names = ['prim'] * prim.shape[0] + ['glow_approx'] * glow_approx.shape[0]
data = np.column_stack((source_names, np.row_stack((prim[:,:3], glow_approx[:,:3]))))

In [41]:
eval_df = pd.DataFrame(data, columns=['Source', 'Dimensions', 'Size', 'Time(ms)'])

In [42]:
eval_df

Unnamed: 0,Source,Dimensions,Size,Time(ms)
0,prim,1.0,100000.0,10.80036163330078
1,prim,1.0,1000000.0,130.62939643859863
2,prim,1.0,10000000.0,1552.1784782409668
3,prim,2.0,100000.0,34.00769233703613
4,prim,2.0,1000000.0,382.5857639312744
5,prim,2.0,10000000.0,4016.163444519043
6,prim,3.0,100000.0,42.20952987670898
7,prim,3.0,1000000.0,472.70703315734863
8,prim,3.0,10000000.0,5451.221799850464
9,prim,4.0,100000.0,50.211334228515625


In [48]:
# Create the DataFrame eval_df as you mentioned before

# Convert the "Time(ms)" column to numeric
eval_df['Time(ms)'] = pd.to_numeric(eval_df['Time(ms)'], errors='coerce')

# Divide the "Time(ms)" values by 1000 and round to 2 decimal places
eval_df['Time(ms)'] = eval_df['Time(ms)'] / 1000
eval_df['Time(ms)'] = eval_df['Time(ms)'].round(2)

# Group the DataFrame by 'Source' and 'Dimensions' and aggregate the 'Time(ms)' values for each size
grouped_df = eval_df.groupby(['Source', 'Dimensions', 'Size'])['Time(ms)'].mean().unstack(fill_value='')

# Reset the index
grouped_df = grouped_df.reset_index()

# Rename the columns for sizes
grouped_df.columns = ['Source', 'Dimensions', 'Size=100000', 'Size=1000000', 'Size=10000000']

# Save the grouped DataFrame as a LaTeX table
latex_table = grouped_df.to_latex(index=False, escape=False)

# Print or save the LaTeX table
print(latex_table)

\begin{tabular}{llrrr}
\toprule
      Source & Dimensions &  Size=100000 &  Size=1000000 &  Size=10000000 \\
\midrule
 glow_approx &        1.0 &         2.52 &          2.59 &           2.66 \\
 glow_approx &        2.0 &         2.28 &          2.31 &           2.33 \\
 glow_approx &        3.0 &         2.68 &          2.51 &           2.43 \\
 glow_approx &        4.0 &         2.85 &          2.95 &           2.81 \\
 glow_approx &        5.0 &         2.84 &          2.66 &           2.85 \\
        prim &        1.0 &         0.01 &          0.13 &           1.55 \\
        prim &        2.0 &         0.03 &          0.38 &           4.02 \\
        prim &        3.0 &         0.04 &          0.47 &           5.45 \\
        prim &        4.0 &         0.05 &          0.58 &           6.64 \\
        prim &        5.0 &         0.06 &          0.76 &           8.79 \\
\bottomrule
\end{tabular}

