In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from mpl_toolkits.mplot3d import proj3d
import pandas as pd
import numpy as np
import seaborn as sns
sns.set(color_codes=True)
from xgboost import XGBRegressor
from sklearn.neural_network import MLPRegressor
from matplotlib import patches
from sklearn import metrics
import os
import sys
import itertools
from pathlib import Path
import pickle
import re
import logging
import time
import shutil
from prim_dens import PRIMdens
from Optimization_Methods.GlowWorm import GlowWorm,GlowWormDensity

In [None]:
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG,)
logger = logging.getLogger(__name__)

In [None]:
dimensions = range(1,6)
size = 10**np.arange(5,8)

<h3>PRIM</h3>

In [None]:
prim_times = []

dd = None
for d in dimensions:
    for s in size:
        dd = np.random.uniform(size=(s,d))
        print(dd.shape)
        times = []
        for i in range(5):
            prim = PRIMdens(dd)
            start = time.time()
            prim.fit()
            end = (time.time() - start)*1000 #convert to ms
            times.append(end)
            print(end)
        prim_times.append([d,s,np.mean(times), np.std(times)])

In [None]:
np.array(prim_times)[:,2:]/1000
np.array(prim_times)[:,-1]/1000
np.savetxt('output/prim_times',np.array(prim_times),delimiter=',')

<h3>Glowworm using approximation</h3>

In [None]:
# Define a regular expression to match the filename pattern
pattern = re.compile(r'models/queries-uniform-(\d+)-multi_False-density-XGB-RMSE=.*-R2=.*\.pkl')

# Iterate over all files in the 'models' directory
for filename in os.listdir('models'):
    match = pattern.match(filename)
    if match:
        # Extract the number from the filename
        number = int(match.group(1))
        
        # Load the model from the file and set it to a global variable
        with open(os.path.join('models', filename), 'rb') as file:
            globals()[f'm{number}'] = pickle.load(file)


In [None]:
global dd
dd = None
glowworm_approx_details = []
for d in dimensions:
    for s in size:
        dd = np.random.uniform(size=(s,d))
        print(dd.shape)
        if d==1:
            m=m1
        elif d==2:
            m=m2
        elif d==3:
            m=m3
        elif d==4:
            m=m4
        else:
            m=m5
        def objective_density(X):
            res = np.log(m.predict(X) - 1500) - 2.9*np.sum(np.log(1+X[:,X.shape[1]//2:]),axis=1)
            res[np.isnan(res)] = -np.inf
            return res
        #Generate queries
        times = []
        for i in range(5):
            start = time.time()
            gw = GlowWorm(objective_density, dimensions=2*d, nt=5, iter_max=100, glowworms=100)
            gw.optimize()
            end = (time.time() - start)*1000 #convert to ms
            times.append(end)
            print(end)
        glowworm_approx_details.append([d,s,np.mean(times), np.std(times)])

In [None]:
print(np.array(glowworm_approx_details)[:,2:]/1000)
np.savetxt('output/glowworm_approx_details',np.array(glowworm_approx_details),delimiter=',')

<h3>Results post-processing</h3>

In [None]:
prim = np.loadtxt('output/prim_times', delimiter=',')
glow_approx = np.loadtxt('output/glowworm_approx_details', delimiter=',')
source_names = ['prim'] * prim.shape[0] + ['glow_approx'] * glow_approx.shape[0]
data = np.column_stack((source_names, np.row_stack((prim[:,:3], glow_approx[:,:3]))))
eval_df = pd.DataFrame(data, columns=['Source', 'Dimensions', 'Size', 'Time(ms)'])
eval_df

In [None]:
# Create the DataFrame eval_df as you mentioned before

# Convert the "Time(ms)" column to numeric
eval_df['Time(ms)'] = pd.to_numeric(eval_df['Time(ms)'], errors='coerce')

# Divide the "Time(ms)" values by 1000 and round to 2 decimal places
eval_df['Time(ms)'] = eval_df['Time(ms)'] / 1000
eval_df['Time(ms)'] = eval_df['Time(ms)'].round(2)

# Group the DataFrame by 'Source' and 'Dimensions' and aggregate the 'Time(ms)' values for each size
grouped_df = eval_df.groupby(['Source', 'Dimensions', 'Size'])['Time(ms)'].mean().unstack(fill_value='')

# Reset the index
grouped_df = grouped_df.reset_index()

# Rename the columns for sizes
grouped_df.columns = ['Source', 'Dimensions', 'Size=100000', 'Size=1000000', 'Size=10000000']

# Save the grouped DataFrame as a LaTeX table
latex_table = grouped_df.to_latex(index=False, escape=False)

# Print or save the LaTeX table
print(latex_table)