In [32]:
%matplotlib inline
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from mpl_toolkits.mplot3d import proj3d
import pandas as pd
import numpy as np
import seaborn as sns
sns.set(color_codes=True)
from xgboost import XGBRegressor
from sklearn.neural_network import MLPRegressor
from matplotlib import patches
from sklearn import metrics
import os
import sys
import itertools
from pathlib import Path
import pickle
import logging
import time
import prim
import shutil

DEBUG:matplotlib.pyplot:Loaded backend module://ipykernel.pylab.backend_inline version unknown.


In [18]:
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG,)
logger = logging.getLogger(__name__)

In [None]:
dimensions = range(1,6)
size = 10**np.arange(5,8)

<h3>PRIM</h3>

In [None]:
prim_times = []

In [None]:
dd = None
for d in dimensions:
    for s in size:
        dd = np.random.uniform(size=(s,d+1))
        print(dd.shape)
        times = []
        for i in range(5):
            p = prim.Prim(dd[:,:dd.shape[1]-1], dd[:,-1],threshold=0.7,
                              threshold_type=">")
            start = time.time()
            p.find_all()
            end = (time.time() - start)*1000 #convert to ms
            times.append(end)
        prim_times.append([d,s,np.mean(times), np.std(times)])
        print(end)

In [None]:
np.array(prim_times)[:,2:]/1000

In [None]:
np.array(prim_times)[:,-1]/1000

In [None]:
np.savetxt('output/prim_times',np.array(prim_times),delimiter=',')

<h3>Naive</h3>

In [3]:
x = np.linspace(0,1,6)
naive_numpy_details = []

In [None]:
dd = None
threshold = 3000
for d in dimensions:
    for s in size:
        dd = np.random.uniform(size=(s,d))
        print(dd.shape)
        #Generate queries
        a = [x.tolist()]*2*d
        #Define cluster centers and covariance matrix
        queries = np.array(list(itertools.product(*a)))
        logger.debug("Total queries {0}".format(queries.shape[0]))
        executed = 0
        start = time.time()
        for q in queries:
            res = dd[np.all((dd>q[:d]) & (dd<q[:d]+q[d:]),axis=1)].shape[0]
            executed+=1
            if time.time()-start>threshold:
                logger.debug("Exceeded {0} seconds threshold".format(threshold))
                break;
        end = (time.time() - start)*1000 #convert to ms
        naive_numpy_details.append([d,s,end,queries.shape[0],executed])

In [None]:
naive_numpy_details

In [None]:
np.savetxt('output/naive_numpy_details',np.array(naive_numpy_details),delimiter=',')

<h3>Glowworm using real data</h3>

In [None]:
from Optimization_Methods.GlowWorm import GlowWorm,GlowWormDensity
naive_glowworm_details = []

In [None]:
global dd
dd = None
for d in dimensions:
    for s in size:
        dd = np.random.uniform(size=(s,d))
        print(dd.shape)
        def objective_aggr_over_real(X):
            assert X.shape[1]==2*d and X.shape[0]!=0
#             X = X.flatten()
            res = dd[np.all((dd>X.flatten()[:d]) & (dd<X.flatten()[:d]+X.flatten()[d:]),axis=1)].shape[0]
            ans = np.log(50000 - res) - 3*np.sum(np.log(1+X[:,X.shape[1]//2:]),axis=1)
            return ans
        #Generate queries
        start = time.time()
        gw = GlowWorm(objective_aggr_over_real, dimensions=2*d, nt=5, iter_max=100, glowworms=100)
        gw.optimize()
        end = (time.time() - start)*1000 #convert to ms
        naive_glowworm_details.append([d,s,end])

In [None]:
naive_glowworm_details

In [None]:
np.savetxt('output/performance/naive_glowworm_details',np.array(naive_glowworm_details),delimiter=',')

<h3>Glowworm using approximation</h3>

In [None]:
with open('models/queries-uniform-1-multi_False-aggr-XGB-RMSE=0.26-R2=0.97.pkl', 'rb') as file:  
    m1 = pickle.load(file)
with open('models/queries-uniform-2-multi_False-aggr-XGB-RMSE=0.16-R2=0.96.pkl', 'rb') as file:  
    m2 = pickle.load(file)
with open('models/queries-uniform-3-multi_False-aggr-XGB-RMSE=0.09-R2=0.90.pkl', 'rb') as file:  
    m3 = pickle.load(file)
with open('models/queries-uniform-4-multi_False-aggr-XGB-RMSE=0.14-R2=0.45.pkl' , 'rb') as file:  
    m4 = pickle.load(file)
with open('models/queries-uniform-5-multi_False-aggr-XGB-RMSE=0.15-R2=0.39.pkl', 'rb') as file:  
    m5 = pickle.load(file)
                    

In [None]:
global dd
dd = None
glowworm_approx_details = []
for d in dimensions:
    for s in size:
        dd = np.random.uniform(size=(s,d))
        print(dd.shape)
        if d==1:
            m=m1
        elif d==2:
            m=m2
        elif d==3:
            m=m3
        elif d==4:
            m=m4
        else:
            m=m5
        def objective_density(X):
            res = np.log(m.predict(X) - 1500) - 2.9*np.sum(np.log(1+X[:,X.shape[1]//2:]),axis=1)
            res[np.isnan(res)] = -np.inf
            return res
        #Generate queries
        times = []
        for i in range(5):
            start = time.time()
            gw = GlowWorm(objective_density, dimensions=2*d, nt=5, iter_max=100, glowworms=100)
            gw.optimize()
            end = (time.time() - start)*1000 #convert to ms
            times.append(end)
        glowworm_approx_details.append([d,s,np.mean(times), np.std(times)])

In [None]:
glowworm_approx_details

In [None]:
print(np.array(glowworm_approx_details)[:,2:]/1000)

In [None]:
np.savetxt('output/performance/glowworm_approx_details',np.array(glowworm_approx_details),delimiter=',')

In [None]:
plt.rc('text', usetex=True)
font_size=22
params = {'backend': 'ps',
          'text.latex.preamble': ['\\usepackage{gensymb}'],
          'text.usetex': True,
          'font.family': 'serif'
}

plt.rcParams.update(params)

In [None]:
prim = np.loadtxt('output/performance/prim_times', delimiter=',')
naive = np.loadtxt('output/performance/naive_numpy_details', delimiter=',')
naive_glow = np.loadtxt('output/performance/naive_glowworm_details', delimiter=',')
glow_approx = np.loadtxt('output/performance/glowworm_approx_details', delimiter=',')

In [None]:
prim.shape

In [None]:
eval_df = pd.DataFrame(np.row_stack((prim, naive[:,:3], naive_glow, glow_approx)),columns=['Dimensions', 'Size', 'Time(ms)'])

In [None]:
eval_df.count()

In [None]:
fraction = np.ones((60))
fraction[15:30] = naive[:,4]/naive[:,3]

In [None]:
eval_df['fraction'] = fraction

In [None]:
labels = ['prim' for i in range(15)]+['naive' for i in range(15)]+['naive_glow' for i in range(15)]+['glow_approx' for i in range(15)]

In [None]:
eval_df['labels'] = labels

In [None]:
eval_df['Dimensions']=eval_df['Dimensions'].astype(int)

In [None]:
eval_df['Time(ms)'] = eval_df['Time(ms)'].apply(lambda x : round(x/1000, 2))

In [None]:
ser = []
for i,x in eval_df.iterrows():
    ser.append(x['Time(ms)'] if x['fraction']== 1 else ' '.join([str(x['Time(ms)']),str(round(x['fraction'],3))]))

In [None]:
ser

In [None]:
eval_df['Time_str'] = ser

In [None]:
cm = sns.light_palette("seagreen", as_cmap=True)

In [None]:
print(eval_df.pivot_table(index=['labels', 'Dimensions'],columns=['Size'], values='Time_str', aggfunc=lambda x: x).to_latex())

In [None]:
sns.pointplot(x='Dimensions',y='Time(ms)',hue='labels',data=eval_df)