In [1]:
import json
import pandas as pd
import data_analysis_helpers

In [2]:
with open('../evaluation_results_20.json') as f:
    data = json.load(f)

In [3]:
# Initialize empty lists for each metric
gini_rows = []
uniqueness_rows = []
ent_rat_rows = []
time_rows = []
nb_rows = []
nmi_rows = []
tree_rows = []

# Iterate over datasets and algorithms
for dataset, algs in data.items():
    for alg, metrics in algs.items():
        gini_rows.append([dataset, alg, metrics["gini_mean"], metrics["gini_std"]])
        uniqueness_rows.append([dataset, alg, metrics["uniqueness_mean"], metrics["uniqueness_std"]])
        ent_rat_rows.append([dataset, alg, metrics["ent_rat_mean"], metrics["ent_rat_std"]])
        time_rows.append([dataset, alg, metrics["time_mean"], metrics["time_std"]])
        nb_rows.append([dataset, alg, metrics["table_acc_nb_mean"], metrics["table_acc_nb_std"]])
        nmi_rows.append([dataset, alg, metrics["nmi_mean"], metrics["nmi_std"]])
        tree_rows.append([dataset, alg, metrics["table_acc_tree_mean"], metrics["table_acc_tree_std"]])


# Create DataFrames
time_df = pd.DataFrame(time_rows, columns=["Dataset", "Algorithm", "Mean", "Std"])
dataframes = {}
for i, k in {'gini' : gini_rows, 'pdp' : uniqueness_rows, 'ent_ratio' : ent_rat_rows, 'nb':nb_rows, 'tree':tree_rows, 'nmi':nmi_rows}.items():
    dataframes[i] = pd.DataFrame(k, columns=["Dataset", "Algorithm", "Mean", "Std"])

In [4]:
avg_ranks = {}
latex_tables = {}

desired_algorithms = ['PROP', 'EGCFS', 'FSDK', 'CNAFS', 'NDFS', 'MCFS', 'RUSLP', 'RUFS', 'UDFS']

for eval_meas in dataframes:
    df = dataframes[eval_meas]

    df = df.loc[df['Algorithm'].isin(desired_algorithms)]
    # avg_ranks[eval_meas] = data_analysis_helpers.compute_average_rankings(df)
    latex_tables[eval_meas] = data_analysis_helpers.to_latex_table(df, 'PROP')


df = time_df.loc[time_df['Algorithm'].isin(desired_algorithms)]
latex_tables['time'] = data_analysis_helpers.to_latex_table(df, 'PROP', choose_max=False)


In [5]:
for caption, latex_str in latex_tables.items():
    print("\\begin{table*}[!t]")
    print(f"\\caption{{{caption}}}")
    print("\\centering")
    print("\\resizebox{\\textwidth}{!}{%")
    print(latex_str)
    print("}%")  # close resizebox
    print("\\end{table*}")
    print("\n")  # optional spacing between tables

\begin{table*}[!t]
\caption{gini}
\centering
\resizebox{\textwidth}{!}{%
\begin{tabular}{lccccccccc}
\toprule
 & PROP & CNAFS & EGCFS & FSDK & MCFS & NDFS & RUFS & RUSLP & UDFS \\
\midrule
Allaml & \textbf{0.9828 $\pm$ 0.0000} & 0.9828 $\pm$ 0.0000 & 0.9828 $\pm$ 0.0000 & 0.9826 $\pm$ 0.0003 & 0.9813 $\pm$ 0.0015 & 0.9811 $\pm$ 0.0012 & 0.9796 $\pm$ 0.0013 & 0.9828 $\pm$ 0.0000 & 0.9828 $\pm$ 0.0000 \\
Arcene & \textbf{0.9875 $\pm$ 0.0000} & 0.9875 $\pm$ 0.0000 & 0.9875 $\pm$ 0.0000 & 0.9066 $\pm$ 0.1103 & 0.9375 $\pm$ 0.0261 & 0.9805 $\pm$ 0.0089 & 0.9027 $\pm$ 0.1133 & 0.9875 $\pm$ 0.0000 & 0.9867 $\pm$ 0.0010 \\
Audiology & \textbf{0.9903 $\pm$ 0.0004} & 0.2902 $\pm$ 0.0206 & 0.2484 $\pm$ 0.2326 & 0.9890 $\pm$ 0.0011 & 0.9012 $\pm$ 0.0389 & 0.9877 $\pm$ 0.0025 & 0.7123 $\pm$ 0.1076 & 0.2715 $\pm$ 0.0428 & 0.3893 $\pm$ 0.1880 \\
Ba & \textbf{0.9990 $\pm$ 0.0000} & 0.9975 $\pm$ 0.0012 & 0.9936 $\pm$ 0.0019 & 0.9935 $\pm$ 0.0028 & 0.9986 $\pm$ 0.0002 & 0.9987 $\pm$ 0.0001 & 0.9980 $\pm

In [6]:
# desired_algorithms = ['GRSSLSF', 'FSDK', 'MAX_VAR']  # remove others, and order these
# df = original df
# wide_df = data_analysis_helpers.reshape_df(df)
# wide_df = data_analysis_helpers.reorder_and_filter_columns(wide_df, desired_algorithms)

# # Recompute rankings and LaTeX after filtering
# filtered_df = df[df['Algorithm'].isin(desired_algorithms)]
# avg_ranks = data_analysis_helpers.compute_average_rankings(filtered_df)
# latex_code = data_analysis_helpers.to_latex_table(wide_df, filtered_df)

In [7]:
# algorithm_order = ["PROPQ", "PROP_REV"]
# result_table = data_analysis_helpers.combine_measures([dataframes['ent_ratio'], dataframes['pdp']], ["Entropy", "PDP"], algorithm_order)

In [8]:
selected_algorithms = ['PROP', 'PROP_REV']
measure_dfs = [dataframes['ent_ratio'], dataframes['pdp']]
measure_names = ['Entropy Ratio', 'PDP']

final_df = data_analysis_helpers.create_latex_table(measure_dfs, measure_names, selected_algorithms)
latex_code = data_analysis_helpers.df_to_latex(final_df, caption="Entropy and PDP comparison", label="tab:entropy_pdp")
print(latex_code)

\begin{table}[htbp]
\centering
\caption{Entropy and PDP comparison}
\label{tab:entropy_pdp}
\begin{tabular}{lcccc}
\toprule
Dataset & Entropy Ratio PROP & Entropy Ratio PROP_REV & PDP PROP & PDP PROP_REV \\
\midrule
Allaml & \textbf{1.00} & 0.71 & \textbf{1.00} & 0.54 \\
Arcene & \textbf{1.00} & 0.01 & \textbf{1.00} & 0.02 \\
Audiology & \textbf{0.95} & 0.09 & \textbf{0.76} & 0.08 \\
Ba & \textbf{0.99} & 0.74 & \textbf{0.95} & 0.35 \\
Cll\_Sub\_111 & \textbf{1.00} & 0.75 & \textbf{1.00} & 0.59 \\
Coil20 & \textbf{0.87} & 0.25 & \textbf{0.60} & 0.11 \\
Colon & \textbf{1.00} & 0.52 & \textbf{1.00} & 0.41 \\
Glioma & \textbf{1.00} & 0.32 & \textbf{1.00} & 0.29 \\
Isolet & \textbf{1.00} & 0.26 & \textbf{1.00} & 0.15 \\
Leukemia & \textbf{1.00} & 0.20 & \textbf{1.00} & 0.18 \\
Lsvt & \textbf{1.00} & 0.08 & \textbf{1.00} & 0.07 \\
Lymphoma & \textbf{1.00} & 0.55 & \textbf{1.00} & 0.44 \\
Mushrooms & \textbf{0.76} & 0.04 & \textbf{0.12} & 0.00 \\
Nci9 & \textbf{1.00} & 0.01 & \textbf{1.00} & 

  avg_ranks = total_ranks.groupby(level=0, axis=1).mean().mean().round(2)


In [9]:
selected_algorithms = ['PROP', 'PROPS']
measure_dfs = [dataframes['ent_ratio'], dataframes['pdp']]
measure_names = ['Entropy Ratio', 'PDP']

final_df = data_analysis_helpers.create_latex_table(measure_dfs, measure_names, selected_algorithms)
latex_code = data_analysis_helpers.df_to_latex(final_df, caption="Entropy and PDP comparison", label="tab:entropy_pdp")
print(latex_code)

\begin{table}[htbp]
\centering
\caption{Entropy and PDP comparison}
\label{tab:entropy_pdp}
\begin{tabular}{lcccc}
\toprule
Dataset & Entropy Ratio PROP & Entropy Ratio PROPS & PDP PROP & PDP PROPS \\
\midrule
Allaml & \textbf{1.00} & \textbf{1.00} & \textbf{1.00} & \textbf{1.00} \\
Arcene & \textbf{1.00} & \textbf{1.00} & \textbf{1.00} & \textbf{1.00} \\
Audiology & \textbf{0.95} & \textbf{0.95} & 0.76 & \textbf{0.77} \\
Ba & \textbf{0.99} & \textbf{0.99} & \textbf{0.95} & \textbf{0.95} \\
Cll\_Sub\_111 & \textbf{1.00} & \textbf{1.00} & \textbf{1.00} & \textbf{1.00} \\
Coil20 & \textbf{0.87} & \textbf{0.87} & \textbf{0.60} & 0.59 \\
Colon & \textbf{1.00} & \textbf{1.00} & \textbf{1.00} & \textbf{1.00} \\
Glioma & \textbf{1.00} & \textbf{1.00} & \textbf{1.00} & \textbf{1.00} \\
Isolet & \textbf{1.00} & \textbf{1.00} & \textbf{1.00} & \textbf{1.00} \\
Leukemia & \textbf{1.00} & \textbf{1.00} & \textbf{1.00} & \textbf{1.00} \\
Lsvt & \textbf{1.00} & \textbf{1.00} & \textbf{1.00} & \textb

  avg_ranks = total_ranks.groupby(level=0, axis=1).mean().mean().round(2)


In [10]:
# selected_algorithms = ['ORIG', 'VAR1']
# measure_dfs = [df_entropy, df_pdp]
# measure_names = ['Entropy Ratio', 'PDP']

# final_df = create_latex_table(measure_dfs, measure_names, selected_algorithms)
# latex_code = df_to_latex(final_df, caption="Entropy and PDP comparison", label="tab:entropy_pdp")
# print(latex_code)