# Results for RQ1

In [1]:
import os 
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
import prettytable

cwd = os.getcwd()
os.chdir('../')
import errorAPI
from errorAPI.dataset import Dataset
os.chdir(cwd)

In [36]:
sql_string = 'postgresql://postgres:postgres@localhost:5432/error_detection'
performance_results = pd.read_sql_table("results", create_engine(sql_string)).drop_duplicates(['dataset', 'tool_name', 'tool_configuration'], keep='last')

In [41]:
group_by_cols = ["dataset", "tool_name"]
show_cols = ["cell_prec", "cell_rec", "cell_f1", "error_text"]
max_col = "cell_f1"

human_cost_allowed = False

print(len(performance_results))

if not human_cost_allowed:
    performance_results = performance_results[performance_results["human_cost"].fillna(0) == 0]

print(len(performance_results))

max_idx = performance_results.groupby(group_by_cols)[max_col].transform(max) == performance_results[max_col]

1687
1330


In [42]:
results_df = performance_results[max_idx].groupby(group_by_cols)[show_cols].max()

In [43]:
dataset_names = list(set([x[0] for x in results_df.index]))
dataset_names.sort()
tool_names = list(set([x[1] for x in results_df.index]))
tool_names.sort()

## Filtered tools
filtered_tools = [
     'ActiveClean',
     'FAHES',
     'ForbiddenItemSets',
     'KATARA',
     'Raha',
     'dBoost'
]

exclude_datasets = [
    "eeg_minor",
    "eeg_major",
    "uscensus_major",
    "company"
]

tool_names = [x for x in tool_names if x in filtered_tools]
dataset_names = [x for x in dataset_names if x not in exclude_datasets]

In [44]:
## Columns = datasets


data_dict = []
for tool_name in tool_names:
    row = {}
    for dataset_name in dataset_names:
        try:
            values = results_df.loc[(dataset_name, tool_name)]
            result_string = ""
            for i, show_col in enumerate(show_cols):
                if show_col == "error_text":
                    continue
                
                is_max = results_df.loc[dataset_name][show_col].max() == values[i]
                if values["error_text"] != "":
                    if "Timeout" in values["error_text"]:
                        result_string += values["error_text"]
                    else:
                        result_string += "Other error"
                    break
                elif is_max:
                    result_string += "\textbf{" + "{:.2f}".format(values[i]) + "}"
                else:
                    result_string += "{:.2f} ".format(values[i])
                    
                result_string += " "
            row[dataset_name] = result_string
        except KeyError as e:
            row[dataset_name] = ""
    data_dict.append(row)

output_df_datacols = pd.DataFrame(data_dict, columns = dataset_names, index = tool_names)

In [45]:
output_df_datacols = output_df_datacols.reindex(sorted(output_df_datacols.columns), axis=1)
output_df_datacols

Unnamed: 0,beers,eeg,flights,hospital,kdd,movie,movies,rayyan,restaurant,restaurants,toy,university,uscensus
FAHES,\textbf{0.83} 0.02 0.04,0.00 0.00 0.00,0.23 0.01 0.02,0.02 0.09 0.04,0.67 0.07 0.13,0.00 0.00 0.00,0.01 0.10 0.02,0.07 0.04 0.05,0.00 0.00 0.00,\textbf{0.00} 0.07 \textbf{0.01},0.00 0.00 0.00,Other error,0.49 0.15 0.23
ForbiddenItemSets,0.34 0.30 0.32,\textbf{0.96} 0.40 0.57,0.56 0.16 0.24,0.01 0.06 0.02,Timeout 1800,0.05 0.14 0.07,0.01 0.06 0.01,Other error,0.01 0.07 0.01,Other error,0.00 0.00 0.00,Other error,0.77 0.35 0.48
KATARA,0.14 0.26 0.18,0.00 0.00 0.00,0.09 0.09 0.09,\textbf{0.08} 0.37 \textbf{0.13},0.70 0.13 0.22,0.28 0.98 0.43,\textbf{0.02} 0.16 \textbf{0.03},0.01 0.02 0.01,0.00 0.13 0.01,0.00 0.22 0.00,0.21 0.75 0.33,0.06 0.29 0.10,0.69 0.25 0.36
Raha,0.16 \textbf{1.00} 0.28,0.95 \textbf{1.00} \textbf{0.98},0.30 \textbf{1.00} 0.46,0.03 \textbf{1.00} 0.05,0.78 \textbf{1.00} \textbf{0.88},0.06 \textbf{1.00} 0.12,0.01 \textbf{1.00} 0.02,0.09 \textbf{1.00} 0.16,0.00 \textbf{1.00} 0.01,0.00 \textbf{1.00} 0.00,0.22 \textbf{1.00} 0.36,0.03 \textbf{1.00} 0.05,0.57 \textbf{1.00} 0.73
dBoost,0.68 0.55 \textbf{0.61},0.95 1.00 0.98,\textbf{0.94} 0.59 \textbf{0.72},0.03 0.43 0.06,\textbf{0.95} 0.42 0.58,\textbf{0.35} \textbf{1.00} \textbf{0.52},0.01 0.09 0.03,\textbf{0.22} 0.77 \textbf{0.34},\textbf{0.03} 0.03 \textbf{0.03},0.00 0.08 0.00,\textbf{0.38} \textbf{1.00} \textbf{0.50},\textbf{0.32} \textbf{1.00} \textbf{0.49},\textbf{0.85} 0.86 \textbf{0.85}


In [46]:
output_df_datacols.T

Unnamed: 0,FAHES,ForbiddenItemSets,KATARA,Raha,dBoost
beers,\textbf{0.83} 0.02 0.04,0.34 0.30 0.32,0.14 0.26 0.18,0.16 \textbf{1.00} 0.28,0.68 0.55 \textbf{0.61}
eeg,0.00 0.00 0.00,\textbf{0.96} 0.40 0.57,0.00 0.00 0.00,0.95 \textbf{1.00} \textbf{0.98},0.95 1.00 0.98
flights,0.23 0.01 0.02,0.56 0.16 0.24,0.09 0.09 0.09,0.30 \textbf{1.00} 0.46,\textbf{0.94} 0.59 \textbf{0.72}
hospital,0.02 0.09 0.04,0.01 0.06 0.02,\textbf{0.08} 0.37 \textbf{0.13},0.03 \textbf{1.00} 0.05,0.03 0.43 0.06
kdd,0.67 0.07 0.13,Timeout 1800,0.70 0.13 0.22,0.78 \textbf{1.00} \textbf{0.88},\textbf{0.95} 0.42 0.58
movie,0.00 0.00 0.00,0.05 0.14 0.07,0.28 0.98 0.43,0.06 \textbf{1.00} 0.12,\textbf{0.35} \textbf{1.00} \textbf{0.52}
movies,0.01 0.10 0.02,0.01 0.06 0.01,\textbf{0.02} 0.16 \textbf{0.03},0.01 \textbf{1.00} 0.02,0.01 0.09 0.03
rayyan,0.07 0.04 0.05,Other error,0.01 0.02 0.01,0.09 \textbf{1.00} 0.16,\textbf{0.22} 0.77 \textbf{0.34}
restaurant,0.00 0.00 0.00,0.01 0.07 0.01,0.00 0.13 0.01,0.00 \textbf{1.00} 0.01,\textbf{0.03} 0.03 \textbf{0.03}
restaurants,\textbf{0.00} 0.07 \textbf{0.01},Other error,0.00 0.22 0.00,0.00 \textbf{1.00} 0.00,0.00 0.08 0.00


In [47]:
captionstr1 = "|Precision Recall F1-score| for dataset as columns \& tool as row"
print(output_df_datacols.to_latex(escape=False, caption=captionstr1))

\begin{table}
\centering
\caption{|Precision Recall F1-score| for dataset as columns \& tool as row}
\begin{tabular}{llllllllllllll}
\toprule
{} &                       beers &                                 eeg &                             flights &                            hospital &                                 kdd &                                       movie &                              movies &                              rayyan &                          restaurant &                         restaurants &                                         toy &                                  university &                            uscensus \\
\midrule
FAHES             &  \textbf{0.83} 0.02  0.04   &                  0.00  0.00  0.00   &                  0.23  0.01  0.02   &                  0.02  0.09  0.04   &                  0.67  0.07  0.13   &                          0.00  0.00  0.00   &                  0.01  0.10  0.02   &                  0.07  0.04  0.05   &          

In [48]:
captionstr2 = "|Precision Recall F1-score| for tool as column \& dataset as row"
print(output_df_datacols.T.to_latex(escape=False, caption=captionstr2))

\begin{table}
\centering
\caption{|Precision Recall F1-score| for tool as column \& dataset as row}
\begin{tabular}{llllll}
\toprule
{} &                               FAHES &           ForbiddenItemSets &                              KATARA &                                Raha &                                      dBoost \\
\midrule
beers       &          \textbf{0.83} 0.02  0.04   &          0.34  0.30  0.32   &                  0.14  0.26  0.18   &          0.16  \textbf{1.00} 0.28   &                  0.68  0.55  \textbf{0.61}  \\
eeg         &                  0.00  0.00  0.00   &  \textbf{0.96} 0.40  0.57   &                  0.00  0.00  0.00   &  0.95  \textbf{1.00} \textbf{0.98}  &                          0.95  1.00  0.98   \\
flights     &                  0.23  0.01  0.02   &          0.56  0.16  0.24   &                  0.09  0.09  0.09   &          0.30  \textbf{1.00} 0.46   &          \textbf{0.94} 0.59  \textbf{0.72}  \\
hospital    &                  0.02  0.09  0.04