In [1]:
import json
import pandas as pd
import data_analysis_helpers

In [2]:
with open('final.json') as f:
    data = json.load(f)

In [3]:
# Initialize empty lists for each metric
gini_rows = []
uniqueness_rows = []
ent_rat_rows = []
time_rows = []
nb_rows = []
nmi_rows = []
tree_rows = []

# Iterate over datasets and algorithms
for dataset, algs in data.items():
    for alg, metrics in algs.items():
        gini_rows.append([dataset, alg, metrics["gini_mean"], metrics["gini_std"]])
        uniqueness_rows.append([dataset, alg, metrics["uniqueness_mean"], metrics["uniqueness_std"]])
        ent_rat_rows.append([dataset, alg, metrics["ent_rat_mean"], metrics["ent_rat_std"]])
        time_rows.append([dataset, alg, metrics["time_mean"], metrics["time_std"]])
        nb_rows.append([dataset, alg, metrics["table_acc_nb_mean"], metrics["table_acc_nb_std"]])
        nmi_rows.append([dataset, alg, metrics["nmi_mean"], metrics["nmi_std"]])
        tree_rows.append([dataset, alg, metrics["table_acc_tree_mean"], metrics["table_acc_tree_std"]])


# Create DataFrames
time_df = pd.DataFrame(time_rows, columns=["Dataset", "Algorithm", "Mean", "Std"])
dataframes = {}
for i, k in {'gini' : gini_rows, 'pdp' : uniqueness_rows, 'ent_ratio' : ent_rat_rows, 'nb':nb_rows, 'tree':tree_rows, 'nmi':nmi_rows}.items():
    dataframes[i] = pd.DataFrame(k, columns=["Dataset", "Algorithm", "Mean", "Std"])

In [4]:
avg_ranks = {}
latex_tables = {}

desired_algorithms = ['PROPQ', 'EGCFS', 'FSDK', 'GRSSLSF', 'LS', 'MAX_VAR', 'RUSLP', 'VCSDFS']

for eval_meas in dataframes:
    df = dataframes[eval_meas]

    df = df.loc[df['Algorithm'].isin(desired_algorithms)]
    # avg_ranks[eval_meas] = data_analysis_helpers.compute_average_rankings(df)
    latex_tables[eval_meas] = data_analysis_helpers.to_latex_table(df, 'PROPQ')


df = time_df.loc[time_df['Algorithm'].isin(desired_algorithms)]
latex_tables['time'] = data_analysis_helpers.to_latex_table(df, 'PROPQ', choose_max=False)


In [5]:
for caption, latex_str in latex_tables.items():
    print("\\begin{table*}[!t]")
    print(f"\\caption{{{caption}}}")
    print("\\centering")
    print("\\resizebox{\\textwidth}{!}{%")
    print(latex_str)
    print("}%")  # close resizebox
    print("\\end{table*}")
    print("\n")  # optional spacing between tables

\begin{table*}[!t]
\caption{gini}
\centering
\resizebox{\textwidth}{!}{%
\begin{tabular}{lcccccccc}
\toprule
 & PROPQ & EGCFS & FSDK & GRSSLSF & LS & MAX\_VAR & RUSLP & VCSDFS \\
\midrule
Allaml & \textbf{0.9822 $\pm$ 0.0008} & 0.9797 $\pm$ 0.0014 & 0.9737 $\pm$ 0.0120 & 0.8869 $\pm$ 0.0775 & 0.7718 $\pm$ 0.0312 & 0.9814 $\pm$ 0.0012 & 0.9813 $\pm$ 0.0013 & 0.9781 $\pm$ 0.0032 \\
Arcene & \textbf{0.9863 $\pm$ 0.0004} & 0.9832 $\pm$ 0.0035 & 0.8308 $\pm$ 0.1014 & 0.4260 $\pm$ 0.1884 & 0.6469 $\pm$ 0.0097 & 0.9638 $\pm$ 0.0186 & 0.9855 $\pm$ 0.0012 & 0.3690 $\pm$ 0.1902 \\
Audiology & \textbf{0.9834 $\pm$ 0.0028} & 0.7195 $\pm$ 0.0944 & 0.9664 $\pm$ 0.0121 & 0.9369 $\pm$ 0.0846 & 0.8765 $\pm$ 0.0221 & 0.9829 $\pm$ 0.0026 & 0.1439 $\pm$ 0.0128 & 0.9598 $\pm$ 0.0235 \\
Ba & 0.9926 $\pm$ 0.0012 & 0.9644 $\pm$ 0.0120 & 0.9484 $\pm$ 0.0317 & 0.9316 $\pm$ 0.0366 & 0.9293 $\pm$ 0.0199 & \textbf{0.9932 $\pm$ 0.0015} & 0.9891 $\pm$ 0.0041 & 0.9898 $\pm$ 0.0034 \\
Cll\_sub\_111 & \textbf{0.9879 $\

In [6]:
# desired_algorithms = ['GRSSLSF', 'FSDK', 'MAX_VAR']  # remove others, and order these
# df = original df
# wide_df = data_analysis_helpers.reshape_df(df)
# wide_df = data_analysis_helpers.reorder_and_filter_columns(wide_df, desired_algorithms)

# # Recompute rankings and LaTeX after filtering
# filtered_df = df[df['Algorithm'].isin(desired_algorithms)]
# avg_ranks = data_analysis_helpers.compute_average_rankings(filtered_df)
# latex_code = data_analysis_helpers.to_latex_table(wide_df, filtered_df)

In [7]:
# algorithm_order = ["PROPQ", "PROP_REV"]
# result_table = data_analysis_helpers.combine_measures([dataframes['ent_ratio'], dataframes['pdp']], ["Entropy", "PDP"], algorithm_order)

In [8]:
selected_algorithms = ['PROPQ', 'PROP_REV']
measure_dfs = [dataframes['ent_ratio'], dataframes['pdp']]
measure_names = ['Entropy Ratio', 'PDP']

final_df = data_analysis_helpers.create_latex_table(measure_dfs, measure_names, selected_algorithms)
latex_code = data_analysis_helpers.df_to_latex(final_df, caption="Entropy and PDP comparison", label="tab:entropy_pdp")
print(latex_code)

\begin{table}[htbp]
\centering
\caption{Entropy and PDP comparison}
\label{tab:entropy_pdp}
\begin{tabular}{lcccc}
\toprule
Dataset & Entropy Ratio PROPQ & Entropy Ratio PROP_REV & PDP PROPQ & PDP PROP_REV \\
\midrule
Allaml & \textbf{0.99} & 0.40 & \textbf{0.98} & 0.27 \\
Arcene & \textbf{0.98} & 0.00 & \textbf{0.95} & 0.01 \\
Audiology & \textbf{0.87} & 0.03 & \textbf{0.55} & 0.03 \\
Ba & \textbf{0.76} & 0.46 & \textbf{0.30} & 0.11 \\
Cll\_Sub\_111 & \textbf{0.99} & 0.43 & \textbf{0.96} & 0.25 \\
Coil20 & \textbf{0.52} & 0.15 & \textbf{0.15} & 0.04 \\
Colon & \textbf{0.99} & 0.29 & \textbf{0.97} & 0.23 \\
Glioma & \textbf{0.99} & 0.15 & \textbf{0.98} & 0.14 \\
Isolet & \textbf{0.78} & 0.13 & \textbf{0.38} & 0.05 \\
Leukemia & \textbf{0.99} & 0.06 & \textbf{0.98} & 0.06 \\
Lsvt & \textbf{0.98} & 0.06 & \textbf{0.95} & 0.05 \\
Lymphoma & \textbf{0.99} & 0.46 & \textbf{0.98} & 0.28 \\
Mushrooms & \textbf{0.60} & 0.02 & \textbf{0.03} & 0.00 \\
Nci9 & \textbf{1.00} & 0.00 & \textbf{0.99} 

  avg_ranks = total_ranks.groupby(level=0, axis=1).mean().mean().round(2)


In [9]:
selected_algorithms = ['PROPQ', 'PROPS']
measure_dfs = [dataframes['ent_ratio'], dataframes['pdp']]
measure_names = ['Entropy Ratio', 'PDP']

final_df = data_analysis_helpers.create_latex_table(measure_dfs, measure_names, selected_algorithms)
latex_code = data_analysis_helpers.df_to_latex(final_df, caption="Entropy and PDP comparison", label="tab:entropy_pdp")
print(latex_code)

\begin{table}[htbp]
\centering
\caption{Entropy and PDP comparison}
\label{tab:entropy_pdp}
\begin{tabular}{lcccc}
\toprule
Dataset & Entropy Ratio PROPQ & Entropy Ratio PROPS & PDP PROPQ & PDP PROPS \\
\midrule
Allaml & 0.99 & \textbf{1.00} & 0.98 & \textbf{0.99} \\
Arcene & 0.98 & \textbf{0.99} & 0.95 & \textbf{0.98} \\
Audiology & \textbf{0.87} & \textbf{0.87} & 0.55 & \textbf{0.56} \\
Ba & 0.76 & \textbf{0.89} & 0.30 & \textbf{0.52} \\
Cll\_Sub\_111 & \textbf{0.99} & \textbf{0.99} & 0.96 & \textbf{0.98} \\
Coil20 & 0.52 & \textbf{0.73} & 0.15 & \textbf{0.28} \\
Colon & 0.99 & \textbf{1.00} & 0.97 & \textbf{0.99} \\
Glioma & 0.99 & \textbf{1.00} & 0.98 & \textbf{0.99} \\
Isolet & 0.78 & \textbf{0.89} & 0.38 & \textbf{0.54} \\
Leukemia & 0.99 & \textbf{1.00} & 0.98 & \textbf{0.99} \\
Lsvt & 0.98 & \textbf{0.99} & 0.95 & \textbf{0.96} \\
Lymphoma & 0.99 & \textbf{1.00} & 0.98 & \textbf{0.99} \\
Mushrooms & 0.60 & \textbf{0.61} & \textbf{0.03} & \textbf{0.03} \\
Nci9 & \textbf{1.00} & 

  avg_ranks = total_ranks.groupby(level=0, axis=1).mean().mean().round(2)


In [10]:
# selected_algorithms = ['ORIG', 'VAR1']
# measure_dfs = [df_entropy, df_pdp]
# measure_names = ['Entropy Ratio', 'PDP']

# final_df = create_latex_table(measure_dfs, measure_names, selected_algorithms)
# latex_code = df_to_latex(final_df, caption="Entropy and PDP comparison", label="tab:entropy_pdp")
# print(latex_code)