# Results for RQ2

In [25]:
import os 
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
import prettytable
import pickle 
import sklearn
from sklearn.decomposition import PCA, KernelPCA
from sklearn import preprocessing
import sklearn.ensemble
import sklearn.neural_network
import matplotlib.pyplot as plt
import itertools
from contextlib import redirect_stdout, redirect_stderr
import time
import seaborn as sns 
import dill
cwd = os.getcwd()
os.chdir('../../')
import errorAPI
from errorAPI.dataset import Dataset
os.chdir(cwd)
f = open(os.devnull, 'w')


In [59]:
## Config

sql_string = 'postgresql://postgres:postgres@localhost:5432/error_detection'
profiles_path = '../../dataset_profiles.p'
dataset_path = '../../datasets'
results_df_path = '###_XXX_results_df_MSE.p'
experiment_data_path = '###_experiment_data.p'
all_regressors = errorAPI.Profiler.available_regressors

experiment = 3

experiment_results = {x: pickle.load(open(x, 'rb')) for x in os.listdir() if x.startswith(str(experiment)) and x.endswith("_MSE.p")}
experiment_data = pickle.load(open([x for x in os.listdir() if x.startswith(str(experiment)) and x.endswith("_data.p")][0], 'rb'))

In [60]:
number_of_strategies = experiment_data["number_of_strategies"]
number_of_filtered_strategies = experiment_data["number_of_filtered_strategies"]
all_configs = experiment_data["all_configs"]
dataset_profiles = experiment_data["dataset_profiles"]
performance_results = experiment_data["performance_results"]
f1_threshold = experiment_data["f1_threshold"]
max_human_cost = experiment_data["max_human_cost"]

In [61]:
import matplotlib

def save_to_latex(errors_estimation, chosen_metric, experiment):

    matplotlib.use("pgf")
    matplotlib.rcParams.update({
        "pgf.texsystem": "pdflatex",
        'font.family': 'serif',
        'text.usetex': True,
        'pgf.rcfonts': False,
    })

    sns.set_style("darkgrid")
    # darkgrid, whitegrid, dark, white, and ticks
    errors2 = errors_estimation.values.flatten()

    chosen_metric_str = chosen_metric.split("_")[0].capitalize() + " "
    if chosen_metric.endswith("prec"):
        chosen_metric_str += "precision"
    elif chosen_metric.endswith("rec"):
        chosen_metric_str += "recall"
    elif chosen_metric.endswith("f1"):
        chosen_metric_str += "F1"
        
    fig = sns.distplot(errors2, kde=False).set_title(chosen_metric_str + ' estimation error distribution')
    fig.figure.set_size_inches(w=5, h=2.5)
    fig.figure.savefig(str(experiment) + "_error_histogram" + chosen_metric + ".pgf")

def get_scores_df(results_df):
    trained_number = results_df.applymap(lambda x: x[0] if len(x) == 2 else x)
    num_trained = trained_number.iloc[:,1:].max().max()
    regression_columns = [x for x in results_df.columns if x != "Settings"]
    scores = results_df.copy()
    scores[regression_columns] = scores[regression_columns].applymap(lambda x: x[1] if (x[0] == num_trained) else None)
    min_val = scores[regression_columns].min().min()
    return scores, min_val

def get_best_settings(scores, min_val):
    val_cols = [x for x in scores.columns if x != "Settings"]
    best_settings_idx, best_regressor = scores[scores[val_cols] == min_val].stack().index.tolist()[0]
    best_normalize, best_pca, best_feature_selection = scores['Settings'][best_settings_idx]
    return best_regressor, best_normalize, best_pca, best_feature_selection

### Best regressor precision/recall MSE

In [66]:
prec_profiler = None
rec_profiler = None
f1_profiler = None

for key in experiment_results:
    print(key)
    chosen_metric = "_".join(key.split("_")[1:3])
    results_df = experiment_results[key]
    scores, min_val = get_scores_df(results_df)

    min_val = scores[all_regressors].min().min()
    best_regressor, best_normalize, best_pca, best_feature_selection = get_best_settings(scores, min_val)

    print("The best regressor to estimate the performance is:", best_regressor)
    _regressor = best_regressor
    _normalize = best_normalize
    _pca = best_pca
    _feat = best_feature_selection
    _extra_options = {}
    
    profiler = errorAPI.Profiler(_regressor, _normalize, _pca, _feat, extra_options=_extra_options, metric=chosen_metric)
    
    with redirect_stderr(f), redirect_stdout(f):
        profiler.train_all_configs(all_configs, dataset_profiles, performance_results)
        MSE = profiler.get_MSE()
        estimation_performance, real_performance, errors_estimation, squared_errors = profiler.get_fitted_results(all_configs, dataset_profiles, performance_results)
        MSE_fitted = profiler.get_MSE(squared_errors)

    if "_rec" in chosen_metric:
        rec_profiler = profiler
    if "_prec" in chosen_metric:
        prec_profiler = profiler
    if "_f1" in chosen_metric:
        f1_profiler = profiler
    
    print("MSE:", MSE)
    print("MSE fitted:", MSE_fitted)

    errorAPI.performance_prediction_info(profiler.errors_estimation, chosen_metric)
    save_to_latex(errors_estimation, chosen_metric, experiment)

3_cell_f1_results_df_MSE.p
The best regressor to estimate the performance is: SVR
MSE: 0.05452927015508892
MSE fitted: 0.018521964404235026
-=-=-=-=-= Performance estimation cell_f1-=-=-=-=-=

Mean square error:	 0.0545
Mean error:		 0.1636
Median error:		 0.1051
Error variance:		 0.0278
95th percentile:	 0.5282

-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
3_cell_prec_results_df_MSE.p
The best regressor to estimate the performance is: SVR
MSE: 0.10469683420385804
MSE fitted: 0.01922647059381914
-=-=-=-=-= Performance estimation cell_prec-=-=-=-=-=

Mean square error:	 0.1047
Mean error:		 0.2267
Median error:		 0.1082
Error variance:		 0.0533
95th percentile:	 0.7198

-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
3_cell_rec_results_df_MSE.p
The best regressor to estimate the performance is: SVR
MSE: 0.0926045382906821
MSE fitted: 0.04861670608543964
-=-=-=-=-= Performance estimation cell_rec-=-=-=-=-=

Mean square error:	 0.0926
Mean error:		 0.2101
Median error:		 0.1269

In [68]:
if prec_profiler is not None and rec_profiler is not None and f1_profiler is not None:
    combined_profiler = errorAPI.CombinedProfiler(prec_profiler, rec_profiler, f1_profiler)
    combined_f1 = combined_profiler.get_combined_f1_estimation()
    real_f1 = combined_profiler.get_real_performance("f1")
    errors_estimation = combined_f1 - real_f1
    
    errorAPI.performance_prediction_info(profiler.errors_estimation, "f1")
    

-=-=-=-=-= Performance estimation f1-=-=-=-=-=

Mean square error:	 0.0926
Mean error:		 0.2101
Median error:		 0.1269
Error variance:		 0.0484
95th percentile:	 0.7664

-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=


In [79]:
f1_profiler.real_performance

Unnamed: 0,beers,eeg,flights,hospital,movie,movies,rayyan,restaurant,restaurants,toy,university,uscensus,kdd
"('ForbiddenItemSets', ""{'Tau': 0.5}"")",0.153711,0.247853,0.069241,0.008368,0.019910,0.005887,0.000000,0.009856,0.002814,0.000000,0.014909,0.263724,
"('ForbiddenItemSets', ""{'Tau': 0.6}"")",0.234071,0.417445,0.166207,0.014168,0.042633,0.007619,0.000000,0.010256,0.002114,0.000000,0.019907,0.372740,
"('ForbiddenItemSets', ""{'Tau': 0.7}"")",0.318349,0.567501,0.243484,0.022979,0.074142,0.011033,0.000000,0.007713,0.002197,0.000000,0.025714,0.483977,
"('KATARA', ""{'folder': 'default_domain', 'frequency_threshold': 0.2}"")",,0.000000,0.093715,0.133763,0.408199,0.027925,0.008772,0.000000,,0.333333,0.082386,0.362678,
"('KATARA', ""{'folder': 'large_domain', 'frequency_threshold': 0.2}"")",,0.000000,0.086851,0.102422,0.430259,0.033901,0.011454,0.009282,0.003249,0.181818,0.103020,0.230744,0.222672
...,...,...,...,...,...,...,...,...,...,...,...,...,...
"('dBoost', ""{'Params': ['mixture', '3', '0.3']}"")",0.002647,0.966081,0.682196,0.054020,0.407273,0.000000,0.250466,0.000128,0.001720,0.363636,0.077399,0.475565,
"('dBoost', ""{'Params': ['mixture', '3', '0.4']}"")",0.002647,0.966081,0.682196,0.054020,0.407273,0.000000,0.233545,0.000128,0.001718,0.363636,0.077399,0.475565,
"('dBoost', ""{'Params': ['mixture', '3', '0.5']}"")",0.002647,0.966081,0.682196,0.054020,0.407273,0.000000,0.233545,0.000128,0.001637,0.363636,0.077399,0.475565,
"('dBoost', ""{'Params': ['mixture', '3', '0.7']}"")",0.002647,0.966081,0.682196,0.054020,0.407273,0.000000,0.215458,0.000128,0.001637,0.363636,0.077399,0.475565,


In [80]:
errors_estimation

Unnamed: 0,beers,eeg,flights,hospital,movie,movies,rayyan,restaurant,restaurants,toy,university,uscensus,kdd
"('ForbiddenItemSets', ""{'Tau': 0.5}"")",-0.034231,-0.134546,0.059828,-0.005673,0.116825,-0.003467,0.022485,0.064810,0.072230,0.139937,0.086185,-0.145500,
"('ForbiddenItemSets', ""{'Tau': 0.6}"")",-0.090277,-0.283610,-0.007899,-0.009777,0.160015,-0.006141,0.023209,0.080987,0.096750,0.171930,0.097475,-0.190486,
"('ForbiddenItemSets', ""{'Tau': 0.7}"")",-0.172368,-0.390237,-0.046763,-0.020730,0.234073,-0.011033,0.023403,0.101823,0.115470,0.254286,0.092745,-0.215266,
"('KATARA', ""{'folder': 'default_domain', 'frequency_threshold': 0.2}"")",,0.287961,0.298238,-0.051327,-0.236961,0.014642,0.016707,0.186575,,-0.057394,0.116641,-0.212885,
"('KATARA', ""{'folder': 'large_domain', 'frequency_threshold': 0.2}"")",,0.300325,0.170344,-0.009276,-0.254221,0.043879,0.060808,0.137318,0.117103,-0.015887,0.048937,-0.069664,-0.081455
...,...,...,...,...,...,...,...,...,...,...,...,...,...
"('dBoost', ""{'Params': ['mixture', '3', '0.3']}"")",0.434099,-0.531766,-0.175092,0.088201,0.186668,0.119076,-0.027643,0.144435,0.161007,0.299498,0.145452,-0.230287,
"('dBoost', ""{'Params': ['mixture', '3', '0.4']}"")",0.434101,-0.531766,-0.175092,0.088201,0.186667,0.119076,-0.010722,0.144469,0.161010,0.299498,0.145452,-0.230287,
"('dBoost', ""{'Params': ['mixture', '3', '0.5']}"")",0.434170,-0.531766,-0.175092,0.088201,0.186642,0.119076,-0.010722,0.144436,0.161090,0.299498,0.145452,-0.230287,
"('dBoost', ""{'Params': ['mixture', '3', '0.7']}"")",0.434170,-0.531766,-0.175092,0.088201,0.186642,0.119076,0.007365,0.144436,0.161090,0.299498,0.145452,-0.230287,
