In [23]:
import os
import pandas as pd
import pickle

# Define the base directory where the results are stored
base_dir = "results"

# Initialize an empty DataFrame to store all results
df = pd.DataFrame(columns=['dataset_name', 'model_name', 'n_permutations', 'n_ensemble'])


def load_data(model_path):
    result_dict = {}
    with open(model_path, 'rb') as f:
        data = pickle.load(f)
        model_name = list(data.keys())[0]
        print(data[model_name].columns, data[model_name].index)
        if "mean" in data[model_name].columns:
            df = data[model_name][["mean", "stddev"]]
            train_scores = False
        elif "mean_train" in data[model_name].columns:
            df = data[model_name][["mean_train", "stddev_train"]]
            # rename mean_train to mean and stddev_train to stddev
            df.rename(columns={"mean_train": "mean", "stddev_train": "stddev"}, inplace=True)
            train_scores = True
        else:
            raise ValueError(f"No mean or mean_train in {model_name}")
    for idx in df.index:
        mean_col_name = f"{idx.replace('.', '_')}_mean"
        stddev_col_name = f"{idx.replace('.', '_')}_stddev"
        # if train_scores:
        #     mean_col_name = f"{mean_col_name}_train"
        #     stddev_col_name = f"{stddev_col_name}_train"
        
        result_dict[mean_col_name] = df.at[idx, 'mean']
        result_dict[stddev_col_name] = df.at[idx, 'stddev']

    result_dict["model_name"] = model_name
    result_dict["use_X_as_train"] = train_scores
    
    return result_dict

# Walk through the directory structure
for dataset_name in os.listdir(base_dir):
    dataset_path = os.path.join(base_dir, dataset_name)
    if not os.path.isdir(dataset_path):
        continue
    
    for model_name in os.listdir(dataset_path):
        model_name_clean = model_name.replace('.pkl', '')
        model_path = os.path.join(dataset_path, model_name)
        # Handle the case for "tabpfn_points" with permutations and ensembles
        if "normalization" not in model_name_clean:
            continue
        model_name_clean = model_name_clean.replace('_train', '')
        model_name_clean = model_name_clean.replace('normalization_quantile', '')
        #if "train" in model_name_clean:
        #    continue
        if model_name.startswith("tabpfn_points") and len(model_name_clean.split('_')) == 4:
            continue
            n_permutations, n_ensemble = model_name_clean.split('_')[2], model_name_clean.split('_')[3]
            model_name_clean = "tabpfn_points"
        else:
            n_permutations = None
            n_ensemble = None
        row = {'dataset_name': dataset_name, 'n_permutations': n_permutations, 'n_ensemble': n_ensemble}
        # 
        row.update(load_data(model_path))
        print(pd.DataFrame(row, index=[0]))
        df = pd.concat([df, pd.DataFrame(row, index=[0])], ignore_index=True)

# Save the consolidated DataFrame to a CSV file
df.to_csv('consolidated_results.csv', index=False)

# remove MiniBooNE from the results as some results are missing
#df = df[df['dataset_name'] != 'MiniBooNE']

# # if model_name contains "smote_imblearn", rename to "smote_imblearn_mean"
# df["model_name"] = df["model_name"].apply(lambda x: "smote_imblearn" if "smote_imblearn" in x else x)
# # if model_name contains gaussian_noise_noise_std_{number}, rename togaussian_noise_noise_std_{number}
# df["model_name"] = df["model_name"].apply(lambda x: "gaussian_noise_noise_std" + "_" + x.split("_")[4] if "gaussian_noise_noise_std" in x else x)
# # remove duplicates
df = df.drop_duplicates(subset=["model_name", "dataset_name", "n_permutations", "n_ensemble", "use_X_as_train"])
# count datasets per model
#df.groupby("model_name").count()
# count rows with sanity_nearest_real_neighbor_distance_no_norm_mean_mean not nan
#df[df["sanity_nearest_real_neighbor_distance_no_norm_mean_mean_train"].notna()].groupby("model_name").count()


Index(['min', 'max', 'mean', 'stddev', 'median', 'iqr', 'rounds', 'errors',
       'durations', 'direction'],
      dtype='object') Index(['sanity.data_mismatch.score', 'sanity.common_rows_proportion.score',
       'sanity.nearest_syn_neighbor_distance.mean',
       'sanity.nearest_real_neighbor_distance.mean',
       'sanity.nearest_syn_neighbor_distance_no_norm.mean',
       'sanity.nearest_real_neighbor_distance_no_norm.mean',
       'sanity.close_values_probability.score',
       'sanity.distant_values_probability.score',
       'stats.jensenshannon_dist.marginal', 'stats.chi_squared_test.marginal',
       'stats.inv_kl_divergence.marginal', 'stats.ks_test.marginal',
       'stats.max_mean_discrepancy.joint', 'stats.wasserstein_dist.joint',
       'stats.prdc.precision', 'stats.prdc.recall', 'stats.prdc.density',
       'stats.prdc.coverage', 'detection.detection_xgb.mean',
       'detection.detection_gmm.mean', 'detection.detection_linear.mean',
       'privacy.delta-presence.scor



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.


In a future version, object-dtype columns with all-bool values will not be included in reductions wit

Index(['min_train', 'max_train', 'mean_train', 'stddev_train', 'median_train',
       'iqr_train', 'rounds_train', 'errors_train', 'durations_train',
       'direction_train'],
      dtype='object') Index(['sanity.data_mismatch.score', 'sanity.common_rows_proportion.score',
       'sanity.nearest_syn_neighbor_distance.mean',
       'sanity.nearest_real_neighbor_distance.mean',
       'sanity.nearest_syn_neighbor_distance_no_norm.mean',
       'sanity.nearest_real_neighbor_distance_no_norm.mean',
       'sanity.close_values_probability.score',
       'sanity.distant_values_probability.score',
       'stats.jensenshannon_dist.marginal', 'stats.chi_squared_test.marginal',
       'stats.inv_kl_divergence.marginal', 'stats.ks_test.marginal',
       'stats.max_mean_discrepancy.joint', 'stats.wasserstein_dist.joint',
       'stats.prdc.precision', 'stats.prdc.recall', 'stats.prdc.density',
       'stats.prdc.coverage', 'detection.detection_xgb.mean',
       'detection.detection_gmm.mean', 'de


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions wit

     dataset_name n_permutations n_ensemble  sanity_data_mismatch_score_mean  \
0  bank-marketing           None       None                              0.0   

   sanity_data_mismatch_score_stddev  \
0                                0.0   

   sanity_common_rows_proportion_score_mean  \
0                                       0.0   

   sanity_common_rows_proportion_score_stddev  \
0                                         0.0   

   sanity_nearest_syn_neighbor_distance_mean_mean  \
0                                        0.372345   

   sanity_nearest_syn_neighbor_distance_mean_stddev  \
0                                          0.007527   

   sanity_nearest_real_neighbor_distance_mean_mean  ...  \
0                                         0.176833  ...   

   privacy_distinct l-diversity_gt_mean  \
0                                 595.0   

   privacy_distinct l-diversity_gt_stddev  \
0                                     0.0   

   privacy_distinct l-diversity_syn_mean  \
0    



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.


In a future version, object-dtype columns with all-bool values will not be included in reductions wit

Index(['min', 'max', 'mean', 'stddev', 'median', 'iqr', 'rounds', 'errors',
       'durations', 'direction'],
      dtype='object') Index(['sanity.data_mismatch.score', 'sanity.common_rows_proportion.score',
       'sanity.nearest_syn_neighbor_distance.mean',
       'sanity.nearest_real_neighbor_distance.mean',
       'sanity.nearest_syn_neighbor_distance_no_norm.mean',
       'sanity.nearest_real_neighbor_distance_no_norm.mean',
       'sanity.close_values_probability.score',
       'sanity.distant_values_probability.score',
       'stats.jensenshannon_dist.marginal', 'stats.chi_squared_test.marginal',
       'stats.inv_kl_divergence.marginal', 'stats.ks_test.marginal',
       'stats.max_mean_discrepancy.joint', 'stats.wasserstein_dist.joint',
       'stats.prdc.precision', 'stats.prdc.recall', 'stats.prdc.density',
       'stats.prdc.coverage', 'performance.linear_model.gt',
       'performance.linear_model.syn_id', 'performance.linear_model.syn_ood',
       'performance.xgb.gt', 'pe



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



Index(['min', 'max', 'mean', 'stddev', 'median', 'iqr', 'rounds', 'errors',
       'durations', 'direction'],
      dtype='object') Index(['sanity.data_mismatch.score', 'sanity.common_rows_proportion.score',
       'sanity.nearest_syn_neighbor_distance.mean',
       'sanity.nearest_real_neighbor_distance.mean',
       'sanity.nearest_syn_neighbor_distance_no_norm.mean',
       'sanity.nearest_real_neighbor_distance_no_norm.mean',
       'sanity.close_values_probability.score',
       'sanity.distant_values_probability.score',
       'stats.jensenshannon_dist.marginal', 'stats.chi_squared_test.marginal',
       'stats.inv_kl_divergence.marginal', 'stats.ks_test.marginal',
       'stats.max_mean_discrepancy.joint', 'stats.wasserstein_dist.joint',
       'stats.prdc.precision', 'stats.prdc.recall', 'stats.prdc.density',
       'stats.prdc.coverage', 'performance.linear_model.gt',
       'performance.linear_model.syn_id', 'performance.linear_model.syn_ood',
       'performance.xgb.gt', 'pe


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions wit

Index(['min_train', 'max_train', 'mean_train', 'stddev_train', 'median_train',
       'iqr_train', 'rounds_train', 'errors_train', 'durations_train',
       'direction_train'],
      dtype='object') Index(['sanity.data_mismatch.score', 'sanity.common_rows_proportion.score',
       'sanity.nearest_syn_neighbor_distance.mean',
       'sanity.nearest_real_neighbor_distance.mean',
       'sanity.nearest_syn_neighbor_distance_no_norm.mean',
       'sanity.nearest_real_neighbor_distance_no_norm.mean',
       'sanity.close_values_probability.score',
       'sanity.distant_values_probability.score',
       'stats.jensenshannon_dist.marginal', 'stats.chi_squared_test.marginal',
       'stats.inv_kl_divergence.marginal', 'stats.ks_test.marginal',
       'stats.max_mean_discrepancy.joint', 'stats.wasserstein_dist.joint',
       'stats.prdc.precision', 'stats.prdc.recall', 'stats.prdc.density',
       'stats.prdc.coverage', 'performance.linear_model.gt',
       'performance.linear_model.syn_id', '


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions wit

    dataset_name n_permutations n_ensemble  sanity_data_mismatch_score_mean  \
0  eye_movements           None       None                              0.0   

   sanity_data_mismatch_score_stddev  \
0                                0.0   

   sanity_common_rows_proportion_score_mean  \
0                                       0.0   

   sanity_common_rows_proportion_score_stddev  \
0                                         0.0   

   sanity_nearest_syn_neighbor_distance_mean_mean  \
0                                         0.41018   

   sanity_nearest_syn_neighbor_distance_mean_stddev  \
0                                          0.059183   

   sanity_nearest_real_neighbor_distance_mean_mean  ...  \
0                                         0.663564  ...   

   privacy_k-anonymization_syn_mean  privacy_k-anonymization_syn_stddev  \
0                         13.666667                            2.494438   

   privacy_k-map_score_mean  privacy_k-map_score_stddev  \
0                  


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions wit

  dataset_name n_permutations n_ensemble  sanity_data_mismatch_score_mean  \
0        heloc           None       None                              0.0   

   sanity_data_mismatch_score_stddev  \
0                                0.0   

   sanity_common_rows_proportion_score_mean  \
0                                    0.0001   

   sanity_common_rows_proportion_score_stddev  \
0                                         0.0   

   sanity_nearest_syn_neighbor_distance_mean_mean  \
0                                        0.420914   

   sanity_nearest_syn_neighbor_distance_mean_stddev  \
0                                           0.01535   

   sanity_nearest_real_neighbor_distance_mean_mean  ...  \
0                                         0.290324  ...   

   privacy_distinct l-diversity_gt_mean  \
0                                 263.0   

   privacy_distinct l-diversity_gt_stddev  \
0                                     0.0   

   privacy_distinct l-diversity_syn_mean  \
0          


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



  dataset_name n_permutations n_ensemble  sanity_data_mismatch_score_mean  \
0        heloc           None       None                              0.0   

   sanity_data_mismatch_score_stddev  \
0                                0.0   

   sanity_common_rows_proportion_score_mean  \
0                                    0.0005   

   sanity_common_rows_proportion_score_stddev  \
0                                         0.0   

   sanity_nearest_syn_neighbor_distance_mean_mean  \
0                                        0.460784   

   sanity_nearest_syn_neighbor_distance_mean_stddev  \
0                                          0.002875   

   sanity_nearest_real_neighbor_distance_mean_mean  ...  \
0                                          0.54154  ...   

   privacy_distinct l-diversity_gt_mean  \
0                                  61.0   

   privacy_distinct l-diversity_gt_stddev  \
0                                     0.0   

   privacy_distinct l-diversity_syn_mean  \
0          



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



Index(['min', 'max', 'mean', 'stddev', 'median', 'iqr', 'rounds', 'errors',
       'durations', 'direction'],
      dtype='object') Index(['sanity.data_mismatch.score', 'sanity.common_rows_proportion.score',
       'sanity.nearest_syn_neighbor_distance.mean',
       'sanity.nearest_real_neighbor_distance.mean',
       'sanity.nearest_syn_neighbor_distance_no_norm.mean',
       'sanity.nearest_real_neighbor_distance_no_norm.mean',
       'sanity.close_values_probability.score',
       'sanity.distant_values_probability.score',
       'stats.jensenshannon_dist.marginal', 'stats.chi_squared_test.marginal',
       'stats.inv_kl_divergence.marginal', 'stats.ks_test.marginal',
       'stats.max_mean_discrepancy.joint', 'stats.wasserstein_dist.joint',
       'stats.prdc.precision', 'stats.prdc.recall', 'stats.prdc.density',
       'stats.prdc.coverage', 'detection.detection_xgb.mean',
       'detection.detection_gmm.mean', 'detection.detection_linear.mean',
       'privacy.delta-presence.scor


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions wit

Index(['min_train', 'max_train', 'mean_train', 'stddev_train', 'median_train',
       'iqr_train', 'rounds_train', 'errors_train', 'durations_train',
       'direction_train'],
      dtype='object') Index(['sanity.data_mismatch.score', 'sanity.common_rows_proportion.score',
       'sanity.nearest_syn_neighbor_distance.mean',
       'sanity.nearest_real_neighbor_distance.mean',
       'sanity.nearest_syn_neighbor_distance_no_norm.mean',
       'sanity.nearest_real_neighbor_distance_no_norm.mean',
       'sanity.close_values_probability.score',
       'sanity.distant_values_probability.score',
       'stats.jensenshannon_dist.marginal', 'stats.chi_squared_test.marginal',
       'stats.inv_kl_divergence.marginal', 'stats.ks_test.marginal',
       'stats.max_mean_discrepancy.joint', 'stats.wasserstein_dist.joint',
       'stats.prdc.precision', 'stats.prdc.recall', 'stats.prdc.density',
       'stats.prdc.coverage', 'detection.detection_xgb.mean',
       'detection.detection_gmm.mean', 'de


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions wit

Index(['min', 'max', 'mean', 'stddev', 'median', 'iqr', 'rounds', 'errors',
       'durations', 'direction'],
      dtype='object') Index(['sanity.data_mismatch.score', 'sanity.common_rows_proportion.score',
       'sanity.nearest_syn_neighbor_distance.mean',
       'sanity.nearest_real_neighbor_distance.mean',
       'sanity.nearest_syn_neighbor_distance_no_norm.mean',
       'sanity.nearest_real_neighbor_distance_no_norm.mean',
       'sanity.close_values_probability.score',
       'sanity.distant_values_probability.score',
       'stats.jensenshannon_dist.marginal', 'stats.chi_squared_test.marginal',
       'stats.inv_kl_divergence.marginal', 'stats.ks_test.marginal',
       'stats.max_mean_discrepancy.joint', 'stats.wasserstein_dist.joint',
       'stats.prdc.precision', 'stats.prdc.recall', 'stats.prdc.density',
       'stats.prdc.coverage', 'detection.detection_xgb.mean',
       'detection.detection_gmm.mean', 'detection.detection_linear.mean',
       'privacy.delta-presence.scor


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions wit

     dataset_name n_permutations n_ensemble  sanity_data_mismatch_score_mean  \
0  MagicTelescope           None       None                              0.0   

   sanity_data_mismatch_score_stddev  \
0                                0.0   

   sanity_common_rows_proportion_score_mean  \
0                                       0.0   

   sanity_common_rows_proportion_score_stddev  \
0                                         0.0   

   sanity_nearest_syn_neighbor_distance_mean_mean  \
0                                        0.307006   

   sanity_nearest_syn_neighbor_distance_mean_stddev  \
0                                          0.025141   

   sanity_nearest_real_neighbor_distance_mean_mean  ...  \
0                                          0.42266  ...   

   privacy_distinct l-diversity_gt_mean  \
0                                 543.0   

   privacy_distinct l-diversity_gt_stddev  \
0                                     0.0   

   privacy_distinct l-diversity_syn_mean  \
0    


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions wit

     dataset_name n_permutations n_ensemble  sanity_data_mismatch_score_mean  \
0  MagicTelescope           None       None                              0.0   

   sanity_data_mismatch_score_stddev  \
0                                0.0   

   sanity_common_rows_proportion_score_mean  \
0                                  0.038278   

   sanity_common_rows_proportion_score_stddev  \
0                                         0.0   

   sanity_nearest_syn_neighbor_distance_mean_mean  \
0                                        0.356873   

   sanity_nearest_syn_neighbor_distance_mean_stddev  \
0                                          0.005121   

   sanity_nearest_real_neighbor_distance_mean_mean  ...  \
0                                              0.0  ...   

   privacy_distinct l-diversity_gt_mean  \
0                                 543.0   

   privacy_distinct l-diversity_gt_stddev  \
0                                     0.0   

   privacy_distinct l-diversity_syn_mean  \
0    



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.


In a future version, object-dtype columns with all-bool values will not be included in reductions wit

Index(['min_train', 'max_train', 'mean_train', 'stddev_train', 'median_train',
       'iqr_train', 'rounds_train', 'errors_train', 'durations_train',
       'direction_train'],
      dtype='object') Index(['sanity.data_mismatch.score', 'sanity.common_rows_proportion.score',
       'sanity.nearest_syn_neighbor_distance.mean',
       'sanity.nearest_real_neighbor_distance.mean',
       'sanity.nearest_syn_neighbor_distance_no_norm.mean',
       'sanity.nearest_real_neighbor_distance_no_norm.mean',
       'sanity.close_values_probability.score',
       'sanity.distant_values_probability.score',
       'stats.jensenshannon_dist.marginal', 'stats.chi_squared_test.marginal',
       'stats.inv_kl_divergence.marginal', 'stats.ks_test.marginal',
       'stats.max_mean_discrepancy.joint', 'stats.wasserstein_dist.joint',
       'stats.prdc.precision', 'stats.prdc.recall', 'stats.prdc.density',
       'stats.prdc.coverage', 'detection.detection_xgb.mean',
       'detection.detection_gmm.mean', 'de


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions wit

  dataset_name n_permutations n_ensemble  sanity_data_mismatch_score_mean  \
0   california           None       None                              0.0   

   sanity_data_mismatch_score_stddev  \
0                                0.0   

   sanity_common_rows_proportion_score_mean  \
0                                       0.0   

   sanity_common_rows_proportion_score_stddev  \
0                                         0.0   

   sanity_nearest_syn_neighbor_distance_mean_mean  \
0                                        0.389077   

   sanity_nearest_syn_neighbor_distance_mean_stddev  \
0                                          0.011743   

   sanity_nearest_real_neighbor_distance_mean_mean  ...  \
0                                         0.374593  ...   

   privacy_distinct l-diversity_gt_mean  \
0                                 206.0   

   privacy_distinct l-diversity_gt_stddev  \
0                                     0.0   

   privacy_distinct l-diversity_syn_mean  \
0          


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions wit

  dataset_name n_permutations n_ensemble  sanity_data_mismatch_score_mean  \
0   california           None       None                              0.0   

   sanity_data_mismatch_score_stddev  \
0                                0.0   

   sanity_common_rows_proportion_score_mean  \
0                                       0.0   

   sanity_common_rows_proportion_score_stddev  \
0                                         0.0   

   sanity_nearest_syn_neighbor_distance_mean_mean  \
0                                        0.384571   

   sanity_nearest_syn_neighbor_distance_mean_stddev  \
0                                          0.033024   

   sanity_nearest_real_neighbor_distance_mean_mean  ...  \
0                                         0.461012  ...   

   privacy_distinct l-diversity_gt_mean  \
0                                 206.0   

   privacy_distinct l-diversity_gt_stddev  \
0                                     0.0   

   privacy_distinct l-diversity_syn_mean  \
0          


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions wit

  dataset_name n_permutations n_ensemble  sanity_data_mismatch_score_mean  \
0   california           None       None                              0.0   

   sanity_data_mismatch_score_stddev  \
0                                0.0   

   sanity_common_rows_proportion_score_mean  \
0                                       0.0   

   sanity_common_rows_proportion_score_stddev  \
0                                         0.0   

   sanity_nearest_syn_neighbor_distance_mean_mean  \
0                                        0.375458   

   sanity_nearest_syn_neighbor_distance_mean_stddev  \
0                                           0.02459   

   sanity_nearest_real_neighbor_distance_mean_mean  ...  \
0                                         0.298386  ...   

   privacy_distinct l-diversity_gt_mean  \
0                                 872.0   

   privacy_distinct l-diversity_gt_stddev  \
0                                     0.0   

   privacy_distinct l-diversity_syn_mean  \
0          


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions wit

Index(['min_train', 'max_train', 'mean_train', 'stddev_train', 'median_train',
       'iqr_train', 'rounds_train', 'errors_train', 'durations_train',
       'direction_train'],
      dtype='object') Index(['sanity.data_mismatch.score', 'sanity.common_rows_proportion.score',
       'sanity.nearest_syn_neighbor_distance.mean',
       'sanity.nearest_real_neighbor_distance.mean',
       'sanity.nearest_syn_neighbor_distance_no_norm.mean',
       'sanity.nearest_real_neighbor_distance_no_norm.mean',
       'sanity.close_values_probability.score',
       'sanity.distant_values_probability.score',
       'stats.jensenshannon_dist.marginal', 'stats.chi_squared_test.marginal',
       'stats.inv_kl_divergence.marginal', 'stats.ks_test.marginal',
       'stats.max_mean_discrepancy.joint', 'stats.wasserstein_dist.joint',
       'stats.prdc.precision', 'stats.prdc.recall', 'stats.prdc.density',
       'stats.prdc.coverage', 'detection.detection_xgb.mean',
       'detection.detection_gmm.mean', 'de


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions wit

  dataset_name n_permutations n_ensemble  sanity_data_mismatch_score_mean  \
0    house_16H           None       None                              0.0   

   sanity_data_mismatch_score_stddev  \
0                                0.0   

   sanity_common_rows_proportion_score_mean  \
0                                   0.03796   

   sanity_common_rows_proportion_score_stddev  \
0                                         0.0   

   sanity_nearest_syn_neighbor_distance_mean_mean  \
0                                        0.378316   

   sanity_nearest_syn_neighbor_distance_mean_stddev  \
0                                           0.02089   

   sanity_nearest_real_neighbor_distance_mean_mean  ...  \
0                                         0.146931  ...   

   privacy_distinct l-diversity_gt_mean  \
0                                 713.0   

   privacy_distinct l-diversity_gt_stddev  \
0                                     0.0   

   privacy_distinct l-diversity_syn_mean  \
0          


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions wit

Index(['min_train', 'max_train', 'mean_train', 'stddev_train', 'median_train',
       'iqr_train', 'rounds_train', 'errors_train', 'durations_train',
       'direction_train'],
      dtype='object') Index(['sanity.data_mismatch.score', 'sanity.common_rows_proportion.score',
       'sanity.nearest_syn_neighbor_distance.mean',
       'sanity.nearest_real_neighbor_distance.mean',
       'sanity.nearest_syn_neighbor_distance_no_norm.mean',
       'sanity.nearest_real_neighbor_distance_no_norm.mean',
       'sanity.close_values_probability.score',
       'sanity.distant_values_probability.score',
       'stats.jensenshannon_dist.marginal', 'stats.chi_squared_test.marginal',
       'stats.inv_kl_divergence.marginal', 'stats.ks_test.marginal',
       'stats.max_mean_discrepancy.joint', 'stats.wasserstein_dist.joint',
       'stats.prdc.precision', 'stats.prdc.recall', 'stats.prdc.density',
       'stats.prdc.coverage', 'detection.detection_xgb.mean',
       'detection.detection_gmm.mean', 'de


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions wit

Index(['min', 'max', 'mean', 'stddev', 'median', 'iqr', 'rounds', 'errors',
       'durations', 'direction'],
      dtype='object') Index(['sanity.data_mismatch.score', 'sanity.common_rows_proportion.score',
       'sanity.nearest_syn_neighbor_distance.mean',
       'sanity.nearest_real_neighbor_distance.mean',
       'sanity.nearest_syn_neighbor_distance_no_norm.mean',
       'sanity.nearest_real_neighbor_distance_no_norm.mean',
       'sanity.close_values_probability.score',
       'sanity.distant_values_probability.score',
       'stats.jensenshannon_dist.marginal', 'stats.chi_squared_test.marginal',
       'stats.inv_kl_divergence.marginal', 'stats.ks_test.marginal',
       'stats.max_mean_discrepancy.joint', 'stats.wasserstein_dist.joint',
       'stats.prdc.precision', 'stats.prdc.recall', 'stats.prdc.density',
       'stats.prdc.coverage', 'detection.detection_xgb.mean',
       'detection.detection_gmm.mean', 'detection.detection_linear.mean',
       'privacy.delta-presence.scor


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions wit

Index(['min_train', 'max_train', 'mean_train', 'stddev_train', 'median_train',
       'iqr_train', 'rounds_train', 'errors_train', 'durations_train',
       'direction_train'],
      dtype='object') Index(['sanity.data_mismatch.score', 'sanity.common_rows_proportion.score',
       'sanity.nearest_syn_neighbor_distance.mean',
       'sanity.nearest_real_neighbor_distance.mean',
       'sanity.nearest_syn_neighbor_distance_no_norm.mean',
       'sanity.nearest_real_neighbor_distance_no_norm.mean',
       'sanity.close_values_probability.score',
       'sanity.distant_values_probability.score',
       'stats.jensenshannon_dist.marginal', 'stats.chi_squared_test.marginal',
       'stats.inv_kl_divergence.marginal', 'stats.ks_test.marginal',
       'stats.max_mean_discrepancy.joint', 'stats.wasserstein_dist.joint',
       'stats.prdc.precision', 'stats.prdc.recall', 'stats.prdc.density',
       'stats.prdc.coverage', 'detection.detection_xgb.mean',
       'detection.detection_gmm.mean', 'de


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions wit

Index(['min_train', 'max_train', 'mean_train', 'stddev_train', 'median_train',
       'iqr_train', 'rounds_train', 'errors_train', 'durations_train',
       'direction_train'],
      dtype='object') Index(['sanity.data_mismatch.score', 'sanity.common_rows_proportion.score',
       'sanity.nearest_syn_neighbor_distance.mean',
       'sanity.nearest_real_neighbor_distance.mean',
       'sanity.nearest_syn_neighbor_distance_no_norm.mean',
       'sanity.nearest_real_neighbor_distance_no_norm.mean',
       'sanity.close_values_probability.score',
       'sanity.distant_values_probability.score',
       'stats.jensenshannon_dist.marginal', 'stats.chi_squared_test.marginal',
       'stats.inv_kl_divergence.marginal', 'stats.ks_test.marginal',
       'stats.max_mean_discrepancy.joint', 'stats.wasserstein_dist.joint',
       'stats.prdc.precision', 'stats.prdc.recall', 'stats.prdc.density',
       'stats.prdc.coverage', 'detection.detection_xgb.mean',
       'detection.detection_gmm.mean', 'de


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions wit

Index(['min_train', 'max_train', 'mean_train', 'stddev_train', 'median_train',
       'iqr_train', 'rounds_train', 'errors_train', 'durations_train',
       'direction_train'],
      dtype='object') Index(['sanity.data_mismatch.score', 'sanity.common_rows_proportion.score',
       'sanity.nearest_syn_neighbor_distance.mean',
       'sanity.nearest_real_neighbor_distance.mean',
       'sanity.nearest_syn_neighbor_distance_no_norm.mean',
       'sanity.nearest_real_neighbor_distance_no_norm.mean',
       'sanity.close_values_probability.score',
       'sanity.distant_values_probability.score',
       'stats.jensenshannon_dist.marginal', 'stats.chi_squared_test.marginal',
       'stats.inv_kl_divergence.marginal', 'stats.ks_test.marginal',
       'stats.max_mean_discrepancy.joint', 'stats.wasserstein_dist.joint',
       'stats.prdc.precision', 'stats.prdc.recall', 'stats.prdc.density',
       'stats.prdc.coverage', 'detection.detection_xgb.mean',
       'detection.detection_gmm.mean', 'de


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions wit

  dataset_name n_permutations n_ensemble  sanity_data_mismatch_score_mean  \
0  electricity           None       None                              0.0   

   sanity_data_mismatch_score_stddev  \
0                                0.0   

   sanity_common_rows_proportion_score_mean  \
0                                       0.0   

   sanity_common_rows_proportion_score_stddev  \
0                                         0.0   

   sanity_nearest_syn_neighbor_distance_mean_mean  \
0                                        0.217814   

   sanity_nearest_syn_neighbor_distance_mean_stddev  \
0                                          0.008688   

   sanity_nearest_real_neighbor_distance_mean_mean  ...  \
0                                         0.209879  ...   

   privacy_distinct l-diversity_gt_mean  \
0                                  51.0   

   privacy_distinct l-diversity_gt_stddev  \
0                                     0.0   

   privacy_distinct l-diversity_syn_mean  \
0          


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions wit

Index(['min', 'max', 'mean', 'stddev', 'median', 'iqr', 'rounds', 'errors',
       'durations', 'direction'],
      dtype='object') Index(['sanity.data_mismatch.score', 'sanity.common_rows_proportion.score',
       'sanity.nearest_syn_neighbor_distance.mean',
       'sanity.nearest_real_neighbor_distance.mean',
       'sanity.nearest_syn_neighbor_distance_no_norm.mean',
       'sanity.nearest_real_neighbor_distance_no_norm.mean',
       'sanity.close_values_probability.score',
       'sanity.distant_values_probability.score',
       'stats.jensenshannon_dist.marginal', 'stats.chi_squared_test.marginal',
       'stats.inv_kl_divergence.marginal', 'stats.ks_test.marginal',
       'stats.max_mean_discrepancy.joint', 'stats.wasserstein_dist.joint',
       'stats.prdc.precision', 'stats.prdc.recall', 'stats.prdc.density',
       'stats.prdc.coverage', 'detection.detection_xgb.mean',
       'detection.detection_gmm.mean', 'detection.detection_linear.mean',
       'privacy.delta-presence.scor


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions wit

  dataset_name n_permutations n_ensemble  sanity_data_mismatch_score_mean  \
0    MiniBooNE           None       None                              0.0   

   sanity_data_mismatch_score_stddev  \
0                                0.0   

   sanity_common_rows_proportion_score_mean  \
0                                   0.00025   

   sanity_common_rows_proportion_score_stddev  \
0                                         0.0   

   sanity_nearest_syn_neighbor_distance_mean_mean  \
0                                        0.522385   

   sanity_nearest_syn_neighbor_distance_mean_stddev  \
0                                          0.015627   

   sanity_nearest_real_neighbor_distance_mean_mean  ...  \
0                                         0.549811  ...   

   privacy_distinct l-diversity_gt_mean  \
0                                 183.0   

   privacy_distinct l-diversity_gt_stddev  \
0                                     0.0   

   privacy_distinct l-diversity_syn_mean  \
0          



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.


In a future version, object-dtype columns with all-bool values will not be included in reductions wit

  dataset_name n_permutations n_ensemble  sanity_data_mismatch_score_mean  \
0    MiniBooNE           None       None                              0.0   

   sanity_data_mismatch_score_stddev  \
0                                0.0   

   sanity_common_rows_proportion_score_mean  \
0                                       0.0   

   sanity_common_rows_proportion_score_stddev  \
0                                         0.0   

   sanity_nearest_syn_neighbor_distance_mean_mean  \
0                                        0.286595   

   sanity_nearest_syn_neighbor_distance_mean_stddev  \
0                                          0.015791   

   sanity_nearest_real_neighbor_distance_mean_mean  ...  \
0                                         0.408935  ...   

   privacy_distinct l-diversity_gt_mean  \
0                                 183.0   

   privacy_distinct l-diversity_gt_stddev  \
0                                     0.0   

   privacy_distinct l-diversity_syn_mean  \
0          


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable

  dataset_name n_permutations n_ensemble  sanity_data_mismatch_score_mean  \
0    MiniBooNE           None       None                              0.0   

   sanity_data_mismatch_score_stddev  \
0                                0.0   

   sanity_common_rows_proportion_score_mean  \
0                                   0.00025   

   sanity_common_rows_proportion_score_stddev  \
0                                         0.0   

   sanity_nearest_syn_neighbor_distance_mean_mean  \
0                                        0.559777   

   sanity_nearest_syn_neighbor_distance_mean_stddev  \
0                                          0.003417   

   sanity_nearest_real_neighbor_distance_mean_mean  ...  \
0                                         0.521328  ...   

   privacy_k-anonymization_syn_mean  privacy_k-anonymization_syn_stddev  \
0                         16.666667                            3.299832   

   privacy_k-map_score_mean  privacy_k-map_score_stddev  \
0                 16.66


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable

  dataset_name n_permutations n_ensemble  sanity_data_mismatch_score_mean  \
0    covertype           None       None                              0.0   

   sanity_data_mismatch_score_stddev  \
0                                0.0   

   sanity_common_rows_proportion_score_mean  \
0                                       0.0   

   sanity_common_rows_proportion_score_stddev  \
0                                         0.0   

   sanity_nearest_syn_neighbor_distance_mean_mean  \
0                                        0.373995   

   sanity_nearest_syn_neighbor_distance_mean_stddev  \
0                                             0.012   

   sanity_nearest_real_neighbor_distance_mean_mean  ...  \
0                                         0.440842  ...   

   privacy_distinct l-diversity_gt_mean  \
0                                 183.0   

   privacy_distinct l-diversity_gt_stddev  \
0                                     0.0   

   privacy_distinct l-diversity_syn_mean  \
0          


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable

  dataset_name n_permutations n_ensemble  sanity_data_mismatch_score_mean  \
0    covertype           None       None                              0.0   

   sanity_data_mismatch_score_stddev  \
0                                0.0   

   sanity_common_rows_proportion_score_mean  \
0                                       0.0   

   sanity_common_rows_proportion_score_stddev  \
0                                         0.0   

   sanity_nearest_syn_neighbor_distance_mean_mean  \
0                                         0.37723   

   sanity_nearest_syn_neighbor_distance_mean_stddev  \
0                                          0.020664   

   sanity_nearest_real_neighbor_distance_mean_mean  ...  \
0                                         0.479241  ...   

   privacy_distinct l-diversity_gt_mean  \
0                                 183.0   

   privacy_distinct l-diversity_gt_stddev  \
0                                     0.0   

   privacy_distinct l-diversity_syn_mean  \
0          


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable

Index(['min', 'max', 'mean', 'stddev', 'median', 'iqr', 'rounds', 'errors',
       'durations', 'direction'],
      dtype='object') Index(['sanity.data_mismatch.score', 'sanity.common_rows_proportion.score',
       'sanity.nearest_syn_neighbor_distance.mean',
       'sanity.nearest_real_neighbor_distance.mean',
       'sanity.nearest_syn_neighbor_distance_no_norm.mean',
       'sanity.nearest_real_neighbor_distance_no_norm.mean',
       'sanity.close_values_probability.score',
       'sanity.distant_values_probability.score',
       'stats.jensenshannon_dist.marginal', 'stats.chi_squared_test.marginal',
       'stats.inv_kl_divergence.marginal', 'stats.ks_test.marginal',
       'stats.max_mean_discrepancy.joint', 'stats.wasserstein_dist.joint',
       'stats.prdc.precision', 'stats.prdc.recall', 'stats.prdc.density',
       'stats.prdc.coverage', 'detection.detection_xgb.mean',
       'detection.detection_gmm.mean', 'detection.detection_linear.mean',
       'privacy.delta-presence.scor


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable

Index(['min', 'max', 'mean', 'stddev', 'median', 'iqr', 'rounds', 'errors',
       'durations', 'direction'],
      dtype='object') Index(['sanity.data_mismatch.score', 'sanity.common_rows_proportion.score',
       'sanity.nearest_syn_neighbor_distance.mean',
       'sanity.nearest_real_neighbor_distance.mean',
       'sanity.nearest_syn_neighbor_distance_no_norm.mean',
       'sanity.nearest_real_neighbor_distance_no_norm.mean',
       'sanity.close_values_probability.score',
       'sanity.distant_values_probability.score',
       'stats.jensenshannon_dist.marginal', 'stats.chi_squared_test.marginal',
       'stats.inv_kl_divergence.marginal', 'stats.ks_test.marginal',
       'stats.max_mean_discrepancy.joint', 'stats.wasserstein_dist.joint',
       'stats.prdc.precision', 'stats.prdc.recall', 'stats.prdc.density',
       'stats.prdc.coverage', 'detection.detection_xgb.mean',
       'detection.detection_gmm.mean', 'detection.detection_linear.mean',
       'privacy.delta-presence.scor


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable

Index(['min', 'max', 'mean', 'stddev', 'median', 'iqr', 'rounds', 'errors',
       'durations', 'direction'],
      dtype='object') Index(['sanity.data_mismatch.score', 'sanity.common_rows_proportion.score',
       'sanity.nearest_syn_neighbor_distance.mean',
       'sanity.nearest_real_neighbor_distance.mean',
       'sanity.nearest_syn_neighbor_distance_no_norm.mean',
       'sanity.nearest_real_neighbor_distance_no_norm.mean',
       'sanity.close_values_probability.score',
       'sanity.distant_values_probability.score',
       'stats.jensenshannon_dist.marginal', 'stats.chi_squared_test.marginal',
       'stats.inv_kl_divergence.marginal', 'stats.ks_test.marginal',
       'stats.max_mean_discrepancy.joint', 'stats.wasserstein_dist.joint',
       'stats.prdc.precision', 'stats.prdc.recall', 'stats.prdc.density',
       'stats.prdc.coverage', 'detection.detection_xgb.mean',
       'detection.detection_gmm.mean', 'detection.detection_linear.mean',
       'privacy.delta-presence.scor


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable

  dataset_name n_permutations n_ensemble  sanity_data_mismatch_score_mean  \
0        Higgs           None       None                              0.0   

   sanity_data_mismatch_score_stddev  \
0                                0.0   

   sanity_common_rows_proportion_score_mean  \
0                                       0.0   

   sanity_common_rows_proportion_score_stddev  \
0                                         0.0   

   sanity_nearest_syn_neighbor_distance_mean_mean  \
0                                        0.620143   

   sanity_nearest_syn_neighbor_distance_mean_stddev  \
0                                          0.020286   

   sanity_nearest_real_neighbor_distance_mean_mean  ...  \
0                                         0.440716  ...   

   privacy_distinct l-diversity_gt_mean  \
0                                 999.0   

   privacy_distinct l-diversity_gt_stddev  \
0                                     0.0   

   privacy_distinct l-diversity_syn_mean  \
0          


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable

Index(['min_train', 'max_train', 'mean_train', 'stddev_train', 'median_train',
       'iqr_train', 'rounds_train', 'errors_train', 'durations_train',
       'direction_train'],
      dtype='object') Index(['sanity.data_mismatch.score', 'sanity.common_rows_proportion.score',
       'sanity.nearest_syn_neighbor_distance.mean',
       'sanity.nearest_real_neighbor_distance.mean',
       'sanity.nearest_syn_neighbor_distance_no_norm.mean',
       'sanity.nearest_real_neighbor_distance_no_norm.mean',
       'sanity.close_values_probability.score',
       'sanity.distant_values_probability.score',
       'stats.jensenshannon_dist.marginal', 'stats.chi_squared_test.marginal',
       'stats.inv_kl_divergence.marginal', 'stats.ks_test.marginal',
       'stats.max_mean_discrepancy.joint', 'stats.wasserstein_dist.joint',
       'stats.prdc.precision', 'stats.prdc.recall', 'stats.prdc.density',
       'stats.prdc.coverage', 'detection.detection_xgb.mean',
       'detection.detection_gmm.mean', 'de


In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.



In [24]:
list(df.columns)

['dataset_name',
 'model_name',
 'n_permutations',
 'n_ensemble',
 'sanity_data_mismatch_score_mean',
 'sanity_data_mismatch_score_stddev',
 'sanity_common_rows_proportion_score_mean',
 'sanity_common_rows_proportion_score_stddev',
 'sanity_nearest_syn_neighbor_distance_mean_mean',
 'sanity_nearest_syn_neighbor_distance_mean_stddev',
 'sanity_nearest_real_neighbor_distance_mean_mean',
 'sanity_nearest_real_neighbor_distance_mean_stddev',
 'sanity_nearest_syn_neighbor_distance_no_norm_mean_mean',
 'sanity_nearest_syn_neighbor_distance_no_norm_mean_stddev',
 'sanity_nearest_real_neighbor_distance_no_norm_mean_mean',
 'sanity_nearest_real_neighbor_distance_no_norm_mean_stddev',
 'sanity_close_values_probability_score_mean',
 'sanity_close_values_probability_score_stddev',
 'sanity_distant_values_probability_score_mean',
 'sanity_distant_values_probability_score_stddev',
 'stats_jensenshannon_dist_marginal_mean',
 'stats_jensenshannon_dist_marginal_stddev',
 'stats_chi_squared_test_margina

In [25]:
import plotly.express as px
#metric = "detection_detection_xgb_mean_mean"
#metric = "detection_detection_mlp_mean_mean"
#metric = "privacy_identifiability_score_score_mean"
#metric = "sanity_nearest_syn_neighbor_distance_mean_mean"
#metric = "sanity_nearest_real_neighbor_distance_mean_mean"
#metric = "sanity_nearest_real_neighbor_distance_no_norm_mean_mean_train"
metric = "sanity_nearest_syn_neighbor_distance_no_norm_mean_mean"
#metric = "performance_xgb_syn_id_mean"
#metric = "stats_wasserstein_dist_joint_mean"
#metric = "privacy_delta-presence_score_mean"
#metric = 'privacy_k-anonymization_gt_mean'
better_is_lower_list = ["detection_detection_xgb_mean_mean", "detection_detection_mlp_mean_mean", "privacy_identifiability_score_score_mean", "stats_wasserstein_dist_joint_mean", "privacy_delta-presence_score_mean", "privacy_k-anonymization_gt_mean_mean"]
better_is_lower = metric in better_is_lower_list

use_X_as_train = True
print(better_is_lower)


# compute mean rank for each model
df_all =  df.copy()
# for model_name == "tabpfn_points', only keep n_permutations == 1 and n_ensemble == 7 (excluding nans)
#df_all = df_all[(df_all["model_name"] != "tabpfn_points") | ((df_all["n_permutations"] == "5") & (df_all["n_ensemble"] == "5"))]
df_rank = df_all.copy()
df_rank = df_rank[df_rank["use_X_as_train"] == use_X_as_train]
df_rank = df_rank[[metric, "model_name", "dataset_name"]]
df_rank = df_rank.groupby(["model_name", "dataset_name"]).mean().reset_index()
# remove when metric is null
df_rank = df_rank[df_rank[metric].notna()]
print(df_rank.groupby("model_name").count())
# TODO check that we have one row per model_name and dataset_name
# compute rank for each model
df_rank["rank"] = df_rank.groupby("dataset_name")[metric].rank(ascending=better_is_lower)
df_rank = df_rank.drop(metric, axis=1)
df_rank = df_rank.groupby("model_name").mean().reset_index()
df_rank = df_rank.sort_values("rank")
df_rank



False
                  dataset_name  \
model_name                       
arf                         12   
ddpm                        12   
dummy_sampler               11   
forest_diffusion             7   
gaussian_noise              12   
smote                       12   
smote_imblearn              12   

                  sanity_nearest_syn_neighbor_distance_no_norm_mean_mean  
model_name                                                                
arf                                                              12       
ddpm                                                             12       
dummy_sampler                                                    11       
forest_diffusion                                                  7       
gaussian_noise                                                   12       
smote                                                            12       
smote_imblearn                                                   12       



The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.



Unnamed: 0,model_name,rank
4,gaussian_noise,1.5
0,arf,1.916667
1,ddpm,2.916667
3,forest_diffusion,4.142857
6,smote_imblearn,4.416667
2,dummy_sampler,5.363636
5,smote,6.5


In [21]:
px.strip(df_all, y="dataset_name", x=metric, color="model_name", log_x=True, hover_data=["n_permutations", "n_ensemble"], title=metric, width=1000, height=1000,
         facet_row="use_X_as_train")

In [22]:
df_tabpfn = df[(df["model_name"].str.contains("tabpfn"))]
df_tabpfn["n_ensemble"] = df_tabpfn["n_ensemble"].astype(float)

# show rank between tabpfn versions
df_tabpfn_rank = df_tabpfn.copy()
df_tabpfn_rank = df_tabpfn_rank[[metric, "n_permutations", "n_ensemble", "dataset_name"]]
df_tabpfn_rank = df_tabpfn_rank.groupby(["n_permutations", "n_ensemble", "dataset_name"]).mean().reset_index()
df_tabpfn_rank["rank"] = df_tabpfn_rank.groupby("dataset_name")[metric].rank(ascending=better_is_lower)
df_tabpfn_rank = df_tabpfn_rank.drop(metric, axis=1)
df_tabpfn_rank = df_tabpfn_rank.groupby(["n_permutations", "n_ensemble"]).mean().reset_index()
df_tabpfn_rank = df_tabpfn_rank.sort_values("rank")
df_tabpfn_rank

#px.strip(df_tabpfn, y="dataset_name", x=metric, color="n_ensemble", log_x=True)


The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.



Unnamed: 0,n_permutations,n_ensemble,rank


In [16]:
def plot_two_scores(score_1, score_2):

    better_is_lower_score_1 = score_1 in better_is_lower_list
    print(f"For {score_1} better is lower: {better_is_lower_score_1}")
    df_rank_1 = df_all.copy()
    df_rank_1 = df_rank_1[[score_1, "model_name", "dataset_name"]]
    df_rank_1 = df_rank_1.groupby(["model_name", "dataset_name"]).mean().reset_index()
    # compute rank for each model
    df_rank_1["rank"] = df_rank_1.groupby("dataset_name")[score_1].rank(ascending=better_is_lower_score_1)
    df_rank_1 = df_rank_1.drop(score_1, axis=1)
    df_rank_1 = df_rank_1.groupby("model_name").mean().reset_index()

    better_is_lower_score_2 = score_2 in better_is_lower_list
    print(f"For {score_2} better is lower: {better_is_lower_score_2}")
    df_rank_2 = df_all.copy()
    df_rank_2 = df_rank_2[[score_2, "model_name", "dataset_name"]]
    df_rank_2 = df_rank_2.groupby(["model_name", "dataset_name"]).mean().reset_index()
    # compute rank for each model
    df_rank_2["rank"] = df_rank_2.groupby("dataset_name")[score_2].rank(ascending=better_is_lower_score_2)
    df_rank_2 = df_rank_2.drop(score_2, axis=1)
    df_rank_2 = df_rank_2.groupby("model_name").mean().reset_index()

    df_rank = df_rank_1.merge(df_rank_2, on="model_name", suffixes=(f"_{score_1}", f"_{score_2}"))



    fig = px.scatter(df_rank, x=f"rank_{score_1}", y=f"rank_{score_2}", text="model_name")

    fig.update_layout(
        font=dict(
            size=18  # Increase overall font size
        ),
        title={
            'text': "Scatter Plot of Model Ranks",
            'font': {'size': 24},  # Increase title font size
            'x': 0.5,
            'xanchor': 'center'
        },
        xaxis=dict(
            title=f"Rank of {score_1}",
            titlefont=dict(size=20)  # Increase x-axis title font size
        ),
        yaxis=dict(
            title=f"Rank of {score_2}",
            titlefont=dict(size=20)  # Increase y-axis title font size
        )
    )

    x_start = 1.3#df_rank[f"rank_{score_1}"].max()
    x_end = 1#df_rank[f"rank_{score_1}"].min()

    y_start = 1.3#df_rank[f"rank_{score_2}"].max()
    y_end = 1#df_rank[f"rank_{score_2}"].min()

    # Add an arrow to indicate the direction of improvement
    fig.add_annotation(
        x=x_end,
        y=y_end,
        ax=x_start,
        ay=y_start,
        xref="x",
        yref="y",
        axref="x",
        ayref="y",
        showarrow=True,
        arrowhead=2,
        arrowsize=1,
        arrowwidth=2,
        arrowcolor="red",
        text="Better",
    )

    # Show the plot
    fig.show()

In [17]:
plot_two_scores(score_1 = "detection_detection_xgb_mean_mean", score_2 = "sanity_nearest_real_neighbor_distance_mean_mean"
)


The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.


The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.



For detection_detection_xgb_mean_mean better is lower: True
For sanity_nearest_real_neighbor_distance_mean_mean better is lower: False


In [137]:
plot_two_scores(score_1 = "detection_detection_mlp_mean_mean", score_2 = "sanity_nearest_syn_neighbor_distance_mean_mean"
)

For detection_detection_mlp_mean_mean better is lower: True


KeyError: "['detection_detection_mlp_mean_mean'] not in index"

In [139]:
plot_two_scores(score_1 = "stats_wasserstein_dist_joint_mean", score_2 = "sanity_nearest_real_neighbor_distance_mean_mean"
)

For stats_wasserstein_dist_joint_mean better is lower: True
For sanity_nearest_real_neighbor_distance_mean_mean better is lower: False



The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.


The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.



In [33]:
plot_two_scores(score_1 = "performance_xgb_syn_id_mean", score_2 = "privacy_delta-presence_score_mean")

For performance_xgb_syn_id_mean better is lower: False
For privacy_delta-presence_score_mean better is lower: True



The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.


The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.



In [34]:
plot_two_scores(score_1 = "stats_wasserstein_dist_joint_mean", score_2 = "privacy_delta-presence_score_mean")

For stats_wasserstein_dist_joint_mean better is lower: True
For privacy_delta-presence_score_mean better is lower: True



The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.


The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.



In [248]:
filename = "results/bank-marketing/ddpm.pkl"
with open(filename, 'rb') as f:
    data = pickle.load(f)
    print(data.keys())
    print(data['ddpm'].keys())

dict_keys(['ddpm'])
Index(['min', 'max', 'mean', 'stddev', 'median', 'iqr', 'rounds', 'errors',
       'durations', 'direction'],
      dtype='object')


In [34]:
result_dict

{'sanity_data_mismatch_score_mean': 0.5,
 'sanity_data_mismatch_score_stddev': 0.0,
 'sanity_common_rows_proportion_score_mean': 0.0,
 'sanity_common_rows_proportion_score_stddev': 0.0,
 'sanity_nearest_syn_neighbor_distance_mean_mean': 0.04967439833059386,
 'sanity_nearest_syn_neighbor_distance_mean_stddev': 0.008817575647896697,
 'sanity_close_values_probability_score_mean': 0.9680989583333334,
 'sanity_close_values_probability_score_stddev': 0.013932900103536913,
 'sanity_distant_values_probability_score_mean': 0.00390625,
 'sanity_distant_values_probability_score_stddev': 0.001594719884624465,
 'stats_jensenshannon_dist_marginal_mean': 0.010554966242995909,
 'stats_jensenshannon_dist_marginal_stddev': 0.00021185598149434876,
 'stats_chi_squared_test_marginal_mean': 0.33333333262044,
 'stats_chi_squared_test_marginal_stddev': 0.0673435029539392,
 'stats_inv_kl_divergence_marginal_mean': 0.928053986976059,
 'stats_inv_kl_divergence_marginal_stddev': 0.0034157737474699596,
 'stats_ks_