In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from mpl_toolkits.mplot3d import proj3d
import pandas as pd
import numpy as np
import seaborn as sns
sns.set(color_codes=True)
from xgboost import XGBRegressor
from sklearn.svm import SVR
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from matplotlib import patches
from sklearn import metrics
import os
import sys
import itertools
from pathlib import Path
import pickle
from codebase.utils import compute_boxes,compute_iou,min_dist
from codebase.Accuracy import AccuracyRunnerApprox
from Optimization_Methods.GlowWorm import GlowWorm

import logging
logging.basicConfig(stream=sys.stdout, level=logging.ERROR,)
logger = logging.getLogger(__name__)

import warnings
warnings.filterwarnings('ignore')

In [None]:
result_details = []
#Load models and run experiments
directory = os.fsencode('models')
for file in os.listdir(directory):
    filename = os.fsdecode(file)
    if not filename.startswith('queries'):
        continue;
    logger.debug(filename)
    a = filename.split('-')
    dims = int(a[2])
    multi = a[3].split('_')[1]=="True"
    aggr = a[4]
    m = None
    
    with open('models/%s' % filename, 'rb') as file:  
        m = pickle.load(file)
        
    ac = AccuracyRunnerApprox(dims,multi, aggr,m)
    proposed = ac.run_test()
    iou_metric, avg_min_dist = ac.check_accuracy(proposed)
    rmse =  float(a[6].split('=')[1])
    model_name = a[5]
    result_details.append([dims,multi,aggr,model_name, rmse, iou_metric ,avg_min_dist])

In [None]:
eval_df = pd.DataFrame(result_details, columns=['dimensions','multiple_regions','type of aggr', 'model','rmse', 'iou', 'avg_min_dist'])
eval_df

In [None]:
#Multi-modal boxes regions are : [0,0.2]^d, [0.3,0.5]^d, [0.6,0.8]^d
#Single regions : [0.6,0.9]^d
#Defined by Multi

#Evaluation metrics are F-Score and IOU
#IOU = Intersection Over Union, ie the overlap of the proposed region and the actual regions divided by the total covered region
#F-Score : the defined region covers a number of points F-Score, in which true positives are the points covered by the defined region

#Alternative methods are :
#1) PRIM
#2) Naive (exhaustive) without objective function. Get region with highest and then merge surrounding until no improvement or exceed coverage
# 3) Naive (exhaustive) with objective. Random Walk

<h2>Using PRIM</h2>

In [None]:
def load_csv_to_2d_array(filename):
    data = np.loadtxt(filename, delimiter=',')
    return data.reshape(1, -1) if len(data.shape) == 1 else data


directory = 'prim_boxes'
prim_details = []
for file in os.listdir(directory):
    filename = os.fsdecode(file)
    if filename.startswith("data"):
        a =filename.split('_')
        t = a[1]
        dim = int(a[2].split('=')[1])
        multi = a[-1]=="True.csv"
        print(multi)
        #Check if query file has been generated and skip
        logger.debug('Loading file {0}'.format(filename))
        
        full_path = os.path.join(directory, file)
        proposed = load_csv_to_2d_array(full_path)
        boxes = compute_boxes(multi,dim)
        
        logger.debug('Proposed '+ np.array2string(proposed))
        iou = compute_iou(boxes, proposed, multi, dim)
        avg_min_dist = min_dist(boxes, proposed)
        prim_details.append([dim,multi,t,'prim', iou ,avg_min_dist])        

In [None]:
eval_df_prim = pd.DataFrame(prim_details, columns=['dimensions','multiple_regions','type of aggr', 'model', 'iou', 'avg_min_dist'])
eval_df_prim

<h3>Prim + KDE</h3>

In [None]:
def load_csv_to_2d_array(filename):
    data = np.loadtxt(filename, delimiter=',')
    return data.reshape(1, -1) if len(data.shape) == 1 else data


directory = 'prim_kde'
prim_kde_details = []
for file in os.listdir(directory):
    filename = os.fsdecode(file)
    if filename.startswith("data"):
        a =filename.split('_')
        t = a[1]
        dim = int(a[2].split('=')[1])
        multi = a[-1]=="True.csv"
        print(multi)
        #Check if query file has been generated and skip
        logger.debug('Loading file {0}'.format(filename))
        
        full_path = os.path.join(directory, file)
        proposed = load_csv_to_2d_array(full_path)
        boxes = compute_boxes(multi,dim)
        
        logger.debug('Proposed '+ np.array2string(proposed))
        iou = compute_iou(boxes, proposed, multi, dim)
        avg_min_dist = min_dist(boxes, proposed)
        prim_kde_details.append([dim,multi,t,'prim_kde', iou ,avg_min_dist])        

In [None]:
eval_df_prim_kde = pd.DataFrame(prim_kde_details, columns=['dimensions','multiple_regions','type of aggr', 'model', 'iou', 'avg_min_dist'])
eval_df_prim_kde

<h3> PRIM modification to work with density </h3>

In [None]:
def load_csv_to_2d_array(filename):
    data = np.loadtxt(filename, delimiter=',')
    return data.reshape(1, -1) if len(data.shape) == 1 else data


directory = 'prim_dens'
prim_dens_details = []
for file in os.listdir(directory):
    filename = os.fsdecode(file)
    if filename.startswith("data"):
        a =filename.split('_')
        t = a[1]
        dim = int(a[2].split('=')[1])
        multi = a[-1]=="True.csv"
        print(multi)
        #Check if query file has been generated and skip
        logger.debug('Loading file {0}'.format(filename))
        
        full_path = os.path.join(directory, file)
        proposed = load_csv_to_2d_array(full_path)
        boxes = compute_boxes(multi,dim)
        
        logger.debug('Proposed '+ np.array2string(proposed))
        iou = compute_iou(boxes, proposed, multi, dim)
        avg_min_dist = min_dist(boxes, proposed)
        prim_dens_details.append([dim,multi,t,'prim_dens', iou ,avg_min_dist])    
        
eval_df_prim_dens = pd.DataFrame(prim_dens_details, columns=['dimensions','multiple_regions','type of aggr', 'model', 'iou', 'avg_min_dist'])
eval_df_prim_dens

<h3>Comparison All</h3>

In [None]:
total_df = pd.concat([eval_df,eval_df_prim_kde,eval_df_prim, eval_df_prim_dens],ignore_index=True,)
total_df = total_df.replace(np.nan, 0)
total_df['model']=total_df['model'].replace({'XGB':'SuRF','prim_kde': 'PRIM+kde', 'prim': 'PRIM', 'prim_dens': 'PRIMd'})
total_df
total_df['multiple'] = total_df['multiple_regions']
total_df = total_df.drop(columns=['multiple_regions'])
total_df['type of aggr']=total_df['type of aggr'].replace({'aggr':'Aggregate','density': 'Density'})
total_df['multiple']=total_df['multiple'].replace({True:'k=3',False: 'k=1'})
total_df.pivot_table(values='iou',index=['model','type of aggr','multiple'], columns=['dimensions'], aggfunc=lambda x:x)
total_df.groupby(['dimensions','model'])['iou'].mean()

In [None]:
plt.style.use('seaborn-ticks')
plt.rc('text', usetex=True)
font_size=24
params = {'backend': 'ps',
          'text.latex.preamble': ['\\usepackage{gensymb}'],
          'text.usetex': True,
          'font.family': 'serif'
}

plt.rcParams.update(params)

In [None]:
g= sns.catplot(x="dimensions", y="iou", hue="model",ci=False,markers=['v','^','*','o', '.'] ,scale=1.5 ,col="type of aggr",  row="multiple"
            ,despine=False,kind='point', data=total_df, legend=False,legend_out=False,height=3, aspect=1.75)
lgd = g.fig.get_axes()[0].legend(loc='upper center', bbox_to_anchor=(1, 1.5),prop={'size':24},
                                ncol=4, columnspacing=0.3, handletextpad=0.1,frameon=True,fancybox=True, shadow=True)
g.set_titles(template="Type: {col_name} - Regions: {row_name}", size=24)
g.set_xlabels(label="Dimensions", fontsize=font_size)
g.set_ylabels(label=r'IoU', fontsize=font_size)
g.set_xticklabels(labels=range(1,6),size=24)

for ax_row in g.axes:
    for ax in ax_row:
        for label in ax.get_yticklabels():
            label.set_size(24)

g.despine(top=False, right=False)

plt.savefig('output/synthetic-four.pdf',format='pdf',bbox_extra_artists=(lgd,), bbox_inches='tight')

In [None]:
total_df['multiple'].replace({True: "k=3"})
total_df = total_df[total_df['model'].isin(['PRIM', 'SuRF'])]

In [None]:
default_palette = sns.color_palette()
modified_palette = [default_palette[0], default_palette[2], default_palette[1]] + default_palette[3:]
sns.set_palette(modified_palette)

In [None]:
ax = sns.barplot(x='multiple',y='iou',hue='model', data=total_df)
ax.set_xlabel('Multiple Regions', fontsize=font_size)
ax.set_ylabel(r'IoU', fontsize=font_size)
ax.tick_params(labelsize=24)
plt.legend(prop={'size':24},shadow=True, frameon=True)
plt.tight_layout()
plt.savefig('output/synthetic_accuracy_multiple_regions.pdf',format='pdf')

In [None]:
ax = sns.barplot(x='type of aggr',y='iou',hue='model', data=total_df)
ax.set_xlabel('Aggregate Type', fontsize=font_size)
ax.set_ylabel(r'IoU', fontsize=font_size)
ax.tick_params(labelsize=24)
plt.legend(prop={'size':24},shadow=True, frameon=True)
plt.tight_layout()
plt.savefig('output/synthetic_accuracy_type.pdf',format='pdf')