In [1]:
from sqlalchemy import create_engine, text
import pandas as pd
from ml_experiments.analyze import get_df_runs_from_mlflow_sql, get_missing_entries
from pathlib import Path
import os
from functools import partial

# Save Results

## Load mlflow runs

In [2]:
results_dir = Path.cwd().parent / "results" / "real"
os.makedirs(results_dir, exist_ok=True)

In [3]:
db_port = 5001
db_name = "cohirf"
url = f"postgresql://belucci@localhost:{db_port}/{db_name}"
# url = f"postgresql://beluccib@clust5:{db_port}/{db_name}"
engine = create_engine(url)
query = "SELECT experiments.name from experiments"
experiment_names = pd.read_sql(query, engine)["name"].tolist()
experiments_names = [exp for exp in experiment_names if (exp.startswith("real-"))]

In [4]:
experiments_names

['real-ari-BatchCoHiRF-1iter',
 'real-ari-AverageAgglomerativeClustering',
 'real-ari-BatchCoHiRF-DBSCAN-1iter',
 'real-ari-AffinityPropagation',
 'real-ari-BatchCoHiRF-SC-SRGF',
 'real-ari-CoHiRF-DBSCAN',
 'real-ari-CoHiRF-KernelRBF',
 'real-ari-CoHiRF',
 'real-ari-CompleteAgglomerativeClustering',
 'real-ari-DBSCAN',
 'real-ari-HDBSCAN',
 'real-ari-IRFLLRR',
 'real-ari-KMeans',
 'real-ari-MeanShift',
 'real-ari-OPTICS',
 'real-ari-Proclus',
 'real-ari-SingleAgglomerativeClustering',
 'real-ari-SpectralClustering',
 'real-ari-SpectralSubspaceRandomization',
 'real-ari-WardAgglomerativeClustering',
 'real-adjusted_mutual_info-BatchCoHiRF-DBSCAN-1iter',
 'real-adjusted_mutual_info-DBSCAN',
 'real-adjusted_mutual_info-BatchCoHiRF-SC-SRGF',
 'real-adjusted_mutual_info-CompleteAgglomerativeClustering',
 'real-adjusted_mutual_info-AverageAgglomerativeClustering',
 'real-adjusted_mutual_info-BatchCoHiRF-1iter',
 'real-adjusted_mutual_info-CoHiRF',
 'real-adjusted_mutual_info-CoHiRF-KernelRBF

In [5]:
query = "SELECT DISTINCT(key) FROM params WHERE key LIKE 'best/%%'"
best_params = pd.read_sql(query, engine)["key"].tolist()

In [6]:
params_columns = [
    "model",
    "dataset_id",
    "n_trials",
    "dataset_name",
    "standardize",
    "hpo_metric",
    "direction",
    "hpo_seed",
    "cohirf_kwargs/consensus_strategy",
    "consensus_strategy",
    "batch_sample_strategy",
    "cohirf_kwargs/n_samples_representative",
    "n_samples_representative",
] + best_params

In [7]:
latest_metrics_columns = [
    "fit_model_return_elapsed_time",
    "max_memory_used_after_fit",
    "max_memory_used",
	"best/n_clusters_",
    "best/rand_score",
    "best/adjusted_rand",
    "best/mutual_info",
    "best/adjusted_mutual_info",
    "best/normalized_mutual_info",
    "best/homogeneity_completeness_v_measure",
    "best/silhouette",
    "best/calinski_harabasz_score",
    "best/davies_bouldin_score",
    "best/inertia_score",
    "best/homogeneity",
    "best/completeness",
    "best/v_measure",
    "best/elapsed_time",
]

In [8]:
tags_columns = ["raised_exception", "EXCEPTION", "mlflow.parentRunId", "Last step finished"]

In [9]:
runs_columns = ['run_uuid', 'status', 'start_time', 'end_time']
experiments_columns = []
other_table = 'params'
other_table_keys = params_columns

In [10]:
df_params = get_df_runs_from_mlflow_sql(engine, runs_columns=runs_columns, experiments_columns=experiments_columns, experiments_names=experiments_names, other_table=other_table, other_table_keys=other_table_keys)
df_latest_metrics = get_df_runs_from_mlflow_sql(engine, runs_columns=['run_uuid'], experiments_columns=experiments_columns, experiments_names=experiments_names, other_table='latest_metrics', other_table_keys=latest_metrics_columns)
df_tags = get_df_runs_from_mlflow_sql(engine, runs_columns=['run_uuid'], experiments_columns=experiments_columns, experiments_names=experiments_names, other_table='tags', other_table_keys=tags_columns)

In [3]:
dataset_characteristics = pd.read_csv(results_dir / "datasets_characteristics.csv", index_col=0)
dataset_characteristics.index = dataset_characteristics["openml_id"].astype(str)

In [12]:
df_runs_raw = df_params.join(df_latest_metrics)
df_runs_raw = df_runs_raw.join(df_tags)
df_runs_raw["start_time"] = pd.to_datetime(df_runs_raw["start_time"], unit="ms")
df_runs_raw = df_runs_raw.loc[df_runs_raw["start_time"] > "2025-11-01"]  # to filter out old runs

In [13]:
df_runs_raw = df_runs_raw.join(dataset_characteristics, on="dataset_id", rsuffix="_dataset")
df_runs_raw.to_csv(results_dir / 'df_runs_raw_cer.csv', index=True)

In [14]:
df_runs_raw["n_trials"].unique()

array(['60', nan], dtype=object)

In [4]:
df_runs_raw = pd.read_csv(results_dir / "df_runs_raw_cer.csv", index_col=0, low_memory=False)
df_runs_raw = df_runs_raw.dropna(subset=["n_trials"])
df_runs_raw["model"] = df_runs_raw["model"] + "-" + df_runs_raw["n_trials"].astype(int).astype(str)
df_runs_raw_parents = df_runs_raw.copy()
df_runs_raw_parents = df_runs_raw_parents.loc[df_runs_raw_parents["mlflow.parentRunId"].isna()]

In [5]:
df_runs_raw_parents.head(5)

Unnamed: 0_level_0,status,start_time,end_time,batch_sample_strategy,best/alpha,best/avg_dims,best/base_model_kwargs/eps,best/base_model_kwargs/min_samples,best/base_model_kwargs/n_clusters,best/base_model_kwargs/n_similarities,...,EXCEPTION,Last step finished,mlflow.parentRunId,raised_exception,dataset,openml_id,n_instances,n_features,n_classes,n_categorical
run_uuid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0001e2df550247b7a3e752cefeeba0e7,FINISHED,2025-11-10 10:52:43.219,1762772000000.0,,,,,,2.0,,...,,_on_train_end,,False,ecoli,39,336,8,8,1
000b127fdf284d39897d1c063f4a2ff2,FINISHED,2025-11-14 20:15:50.100,1763152000000.0,,,,,,,,...,,_on_train_end,,False,bittner-2000,46776,38,2202,2,0
000d83f75ed1401280ced298349a48ab,FINISHED,2025-11-14 21:44:44.935,1763157000000.0,,,,,,,,...,,_on_train_end,,False,alizadeh-2000-v2,46773,62,2094,3,0
00142a924906462f87f78d0ba1adc309,FINISHED,2025-11-14 22:52:19.366,1763166000000.0,,,,,,,,...,,_on_train_end,,False,binary_alpha_digits,46782,1404,321,36,0
00197a66ddea4fe8a5ddcd324bfadcb0,FINISHED,2025-11-21 15:31:13.327,1763739000000.0,,,,,,,,...,,_on_train_end,,False,shuttle,40685,58000,10,7,1


## Delete duplicate runs (if any) and complete some models that cannot run with some datasets

In [6]:
non_duplicate_columns = [
    "model",
    "dataset_id",
	"standardize",
	"hpo_metric",
	"hpo_seed",
]
# df_runs_parents.loc[df_runs_parents["best/n_clusters_"]*0.5 > df_runs_parents["n_instances"], "best/adjusted_rand"] = 
df_runs_parents = df_runs_raw_parents.dropna(axis=0, how="all", subset=["best/adjusted_rand"]).copy()
# add back runs that were not evaluated because we judged too many clusters (but they run anyway)
# df_valid_runs = df_runs_raw_parents.loc[df_runs_raw_parents["best/n_clusters_"] > df_runs_raw_parents["n_instances"]*0.5].copy()
# df_runs_parents = pd.concat([df_runs_parents, df_valid_runs], axis=0)
df_runs_parents = df_runs_parents.loc[(~df_runs_parents.duplicated(non_duplicate_columns))]
# fill missing values with "None"
df_runs_parents = df_runs_parents.fillna("None")

In [7]:
# get number of children runs that raised exception for each parent run
children_exceptions = df_runs_raw.groupby("mlflow.parentRunId")["raised_exception"].sum()
df_runs_parents["n_children_raised_exception"] = df_runs_parents.index.map(children_exceptions).fillna(0)

In [8]:
df_runs_parents.loc[(df_runs_parents["n_children_raised_exception"] > 0) & (df_runs_parents["raised_exception"] == False), ["dataset_id", "model", "hpo_metric", "n_children_raised_exception"]]

Unnamed: 0_level_0,dataset_id,model,hpo_metric,n_children_raised_exception
run_uuid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
00e1b380855e48d797ebdeaafa8845aa,39,CoHiRF-SC-SRGF-1R-top-down-inv-60,silhouette,2
06cde4eca7c745c4bb56c4adec85bb84,554,CoHiRF-DBSCAN-top-down-60,calinski_harabasz_score,5
07293a4c1b304b2ba6a3486c0ad4c56f,1568,CoHiRF-1000-60,adjusted_rand,1
076d89a42c8441359e6f012b11ca3300,40685,CoHiRF-top-down-60,calinski_harabasz_score,1
0a6b8d26743c4647b5e784f268934344,554,CoHiRF-DBSCAN-60,silhouette,5
...,...,...,...,...
f6277fdb765441738ee4cb0205e4594d,40685,CoHiRF-top-down-60,silhouette,3
f64bfe426c7547eaa7c7df4eb6163193,39,CoHiRF-SC-SRGF-1R-top-down-inv-60,silhouette,1
f8c4350235744ee1acaadcbd4fcef496,554,CoHiRF-DBSCAN-top-down-60,silhouette,7
fce8ebd9765c47edb1c76abb4fb8d482,40685,CoHiRF-DBSCAN-top-down-60,calinski_harabasz_score,28


In [9]:
df_to_cat = []
hpo_metrics = [
    "adjusted_rand",
    "adjusted_mutual_info",
    "calinski_harabasz_score",
    "silhouette",
    "davies_bouldin_score",
    "normalized_mutual_info",
]
standardize = [True]
hpo_seed = [i for i in range(5)]
fill_value = pd.NA
fill_columns = ["best/adjusted_rand", "best/adjusted_mutual_info", "best/calinski_harabasz_score", "best/silhouette", "best/davies_bouldin_score", "best/normalized_mutual_info"]

In [10]:
# Too memory intensive
dataset_ids_to_complete = [182, 554, 1478, 1568, 40685]
model_names = [
    "CoHiRF-SC-SRGF-60",
    "CoHiRF-SC-SRGF-top-down-60",
    "CoHiRF-SC-SRGF-top-down-inv-60",
    "SpectralSubspaceRandomization-60",
    "CoHiRF-SC-SRGF-1R-60",
    "CoHiRF-SC-SRGF-top-down-1R-60",
    "CoHiRF-SC-SRGF-top-down-inv-1R-60",
    "CoHiRF-SC-SRGF-2R-60",
    "CoHiRF-SC-SRGF-top-down-2R-60",
	"CoHiRF-SC-SRGF-top-down-inv-2R-60",
]
for dataset_id in dataset_ids_to_complete:
    for model_name in model_names:
        for hpo_metric in hpo_metrics:
            for std in standardize:
                for seed in hpo_seed:
                    new_row = {
						"dataset_id": dataset_id,
						"model": model_name,
						"hpo_metric": hpo_metric,
						"standardize": std,
						"hpo_seed": seed
					}
                    for col in fill_columns:
                        new_row[col] = fill_value
                    df_to_cat.append(new_row)

In [11]:
# # Too few examples (<100) to run in batch
# dataset_ids_to_complete = [46773, 46774, 46775, 46776, 46777, 46779, 46780, 46781]
# model_names = [
#     "BatchCoHiRF-1iter-30",
#     "BatchCoHiRF-DBSCAN-1iter-30",
#     "BatchCoHiRF-SC-SRGF-30",
#     "BatchCoHiRF-KernelRBF-1iter-30",
#     "BatchCoHiRF-1iter-60",
# 	"BatchCoHiRF-DBSCAN-1iter-60",
# 	# "BatchCoHiRF-SC-SRGF-60",
# 	# "BatchCoHiRF-KernelRBF-1iter-60",
# ]
# for dataset_id in dataset_ids_to_complete:
# 	for model_name in model_names:
# 		for hpo_metric in hpo_metrics:
# 			for std in standardize:
# 				mask = (df_runs_parents["dataset_id"] == dataset_id) & (df_runs_parents["model"] == model_name) & (df_runs_parents["hpo_metric"] == hpo_metric) & (df_runs_parents["standardize"] == std)
# 				if not mask.any():
# 					new_row = {
# 						"dataset_id": dataset_id,
# 						"model": model_name,
# 						"hpo_metric": hpo_metric,
# 						"standardize": std,
# 					}
# 					for col in fill_columns:
# 						new_row[col] = fill_value
# 					df_to_cat.append(new_row)

In [12]:
# # Too many examples for IRFLLRR
# dataset_ids_to_complete = [40685]
# model_names = [
#     "IRFLLRR-30",
#     "IRFLLRR-60",
# ]
# hpo_metrics = ["adjusted_rand", "adjusted_mutual_info", "calinski_harabasz_score", "silhouette", "davies_bouldin_score", "normalized_mutual_info"]
# standardize = [True]
# fill_value = pd.NA
# fill_columns = ["best/adjusted_rand", "best/adjusted_mutual_info", "best/calinski_harabasz_score", "best/silhouette", "best/davies_bouldin_score", "best/normalized_mutual_info"]
# for dataset_id in dataset_ids_to_complete:
#     for model_name in model_names:
#         for hpo_metric in hpo_metrics:
#             for std in standardize:
#                 mask = (
#                     (df_runs_parents["dataset_id"] == dataset_id)
#                     & (df_runs_parents["model"] == model_name)
#                     & (df_runs_parents["hpo_metric"] == hpo_metric)
#                     & (df_runs_parents["standardize"] == std)
#                 )
#                 if not mask.any():
#                     new_row = {
#                         "dataset_id": dataset_id,
#                         "model": model_name,
#                         "hpo_metric": hpo_metric,
#                         "standardize": std,
#                     }
#                     for col in fill_columns:
#                         new_row[col] = fill_value
#                     df_to_cat.append(new_row)

In [13]:
df_runs_parents = pd.concat([df_runs_parents, pd.DataFrame(df_to_cat)], axis=0)

# Missing

In [14]:
model_nickname = df_runs_parents['model'].unique().tolist()
model_nickname.sort()
model_nickname

['BatchCoHiRF-1iter-random-60',
 'BatchCoHiRF-1iter-random-nolaststop-60',
 'BatchCoHiRF-1iter-random-top-down-60',
 'BatchCoHiRF-1iter-random-top-down-nolaststop-60',
 'BatchCoHiRF-DBSCAN-1iter-random-60',
 'BatchCoHiRF-DBSCAN-1iter-random-nolaststop-60',
 'BatchCoHiRF-DBSCAN-1iter-random-top-down-60',
 'BatchCoHiRF-DBSCAN-1iter-random-top-down-nolaststop-60',
 'BatchCoHiRF-KernelRBF-1iter-random-60',
 'BatchCoHiRF-KernelRBF-1iter-random-nolaststop-60',
 'BatchCoHiRF-KernelRBF-1iter-random-top-down-60',
 'BatchCoHiRF-KernelRBF-1iter-random-top-down-nolaststop-60',
 'BatchCoHiRF-SC-SRGF-1R-1iter-random-60',
 'BatchCoHiRF-SC-SRGF-2R-1iter-random-60',
 'CoHiRF-1000-60',
 'CoHiRF-60',
 'CoHiRF-DBSCAN-60',
 'CoHiRF-DBSCAN-top-down-60',
 'CoHiRF-DBSCAN-top-down-inv-60',
 'CoHiRF-KernelRBF-60',
 'CoHiRF-KernelRBF-top-down-60',
 'CoHiRF-KernelRBF-top-down-inv-60',
 'CoHiRF-SC-SRGF-1R-60',
 'CoHiRF-SC-SRGF-1R-top-down-60',
 'CoHiRF-SC-SRGF-1R-top-down-inv-60',
 'CoHiRF-SC-SRGF-2R-60',
 'CoHiRF

In [15]:
non_duplicate_columns = [
	"model",
	"dataset_id",
	"standardize",
	"hpo_metric",
	"hpo_seed",
]

In [16]:
model_nickname = [
    "CoHiRF-60",
    "CoHiRF-top-down-60",
    "CoHiRF-1000-60",
    "CoHiRF-DBSCAN-60",
    "CoHiRF-DBSCAN-top-down-60",
    "CoHiRF-KernelRBF-60",
	"CoHiRF-KernelRBF-top-down-60",
    "CoHiRF-SC-SRGF-1R-60",
	"CoHiRF-SC-SRGF-2R-60",
    "DBSCAN-60",
    "KMeans-60",
    "KernelRBFKMeans-60",
    "SpectralSubspaceRandomization-60",
]
dataset_id = [
    61,
    46773,
    46776,
    46778,
    46779,
    46782,
    46783,
]
standardize = [True]
hpo_metric = [
    "adjusted_rand",
    "calinski_harabasz_score",
    "silhouette",
]
hpo_seed = [i for i in range(5)]
columns_names = non_duplicate_columns
should_contain_values = [model_nickname, dataset_id, standardize, hpo_metric, hpo_seed]
df_missing = get_missing_entries(df_runs_parents, columns_names, should_contain_values)
df_missing

Unnamed: 0,model,dataset_id,standardize,hpo_metric,hpo_seed


In [17]:
model_nickname = [
    "CoHiRF-SC-SRGF-1R-60",
    "CoHiRF-SC-SRGF-2R-60",
    "SpectralSubspaceRandomization-60",
]
dataset_id = [
    163,
    477,
    10,
    61,
    48,
    46336,
    46331,
    46334,
    7,
    51,
    49,
    39,
    46335,
    42855,
    35,
    46333,
    40496,
    478,
    377,
    11,
    42,
    188,
    469,
    458,
    54,
    46332,
    307,
    40966,
    1468,
    40733,
    23,
    1501,
    1493,
    40975,
    40982,
    18,
    22,
    16,
    14,
    12,
    40979,
    1466,
    40984,
    23380,
    40670,
    46,
    1497,
    30,
    40499,
    28,
    1475,
    182,
    300,
    41164,
    4538,
    375,
]
standardize = [True]
hpo_metric = [
    "adjusted_rand",
    "calinski_harabasz_score",
    "silhouette",
]
hpo_seed = [i for i in range(5)]
columns_names = non_duplicate_columns
should_contain_values = [model_nickname, dataset_id, standardize, hpo_metric, hpo_seed]
df_missing = get_missing_entries(df_runs_parents, columns_names, should_contain_values)
df_missing

Unnamed: 0,model,dataset_id,standardize,hpo_metric,hpo_seed
0,CoHiRF-SC-SRGF-1R-60,163,True,adjusted_rand,0
1,CoHiRF-SC-SRGF-1R-60,163,True,adjusted_rand,1
2,CoHiRF-SC-SRGF-1R-60,163,True,adjusted_rand,2
3,CoHiRF-SC-SRGF-1R-60,163,True,adjusted_rand,3
4,CoHiRF-SC-SRGF-1R-60,163,True,adjusted_rand,4
...,...,...,...,...,...
2381,SpectralSubspaceRandomization-60,375,True,silhouette,0
2382,SpectralSubspaceRandomization-60,375,True,silhouette,1
2383,SpectralSubspaceRandomization-60,375,True,silhouette,2
2384,SpectralSubspaceRandomization-60,375,True,silhouette,3


In [17]:
model_nickname = [
    "BatchCoHiRF-1iter-random-60",
    "BatchCoHiRF-1iter-random-top-down-60",
    "BatchCoHiRF-DBSCAN-1iter-random-60",
    "BatchCoHiRF-DBSCAN-1iter-random-top-down-60",
    "BatchCoHiRF-KernelRBF-1iter-random-60",
    "BatchCoHiRF-KernelRBF-1iter-random-top-down-60",
    "BatchCoHiRF-SC-SRGF-1R-1iter-random-60",
    # "BatchCoHiRF-SC-SRGF-2R-1iter-random-60",
    "CoHiRF-60",
    "CoHiRF-top-down-60",
    "CoHiRF-1000-60",
    "CoHiRF-DBSCAN-60",
    "CoHiRF-DBSCAN-top-down-60",
    "CoHiRF-KernelRBF-60",
    "CoHiRF-KernelRBF-top-down-60",
    "CoHiRF-SC-SRGF-1R-60",
    "CoHiRF-SC-SRGF-2R-60",
    "DBSCAN-60",
    "KMeans-60",
    "KernelRBFKMeans-60",
    "SpectralSubspaceRandomization-60",
]
dataset_id = [
    554,
	40685,
	1568,
]
standardize = [True]
hpo_metric = [
    "adjusted_rand",
    "calinski_harabasz_score",
    "silhouette",
]
hpo_seed = [i for i in range(5)]
columns_names = non_duplicate_columns
should_contain_values = [model_nickname, dataset_id, standardize, hpo_metric, hpo_seed]
df_missing = get_missing_entries(df_runs_parents, columns_names, should_contain_values)
df_missing

Unnamed: 0,model,dataset_id,standardize,hpo_metric,hpo_seed


In [18]:
# model_nickname = [
#     "BatchCoHiRF-1iter-random-nolaststop-60",
#     "BatchCoHiRF-1iter-random-top-down-nolaststop-60",
#     "BatchCoHiRF-DBSCAN-1iter-random-nolaststop-60",
#     "BatchCoHiRF-DBSCAN-1iter-random-top-down-nolaststop-60",
#     "BatchCoHiRF-KernelRBF-1iter-random-nolaststop-60",
#     "BatchCoHiRF-KernelRBF-1iter-random-top-down-nolaststop-60",
#     "BatchCoHiRF-SC-SRGF-1R-1iter-random-nolaststop-60",
#     # "BatchCoHiRF-SC-SRGF-2R-1iter-random-nolaststop-60",
#     # "CoHiRF-60",
#     # "CoHiRF-top-down-60",
#     # "CoHiRF-1000-60",
#     # "CoHiRF-DBSCAN-60",
#     # "CoHiRF-DBSCAN-top-down-60",
#     # "CoHiRF-KernelRBF-60",
#     # "CoHiRF-KernelRBF-top-down-60",
#     # "CoHiRF-SC-SRGF-1R-60",
#     # "CoHiRF-SC-SRGF-2R-60",
#     # "DBSCAN-60",
#     # "KMeans-60",
#     # "KernelRBFKMeans-60",
#     # "SpectralSubspaceRandomization-60",
# ]
# dataset_id = [
#     554,
#     40685,
#     1568,
# ]
# standardize = [True]
# hpo_metric = [
#     "adjusted_rand",
#     "calinski_harabasz_score",
#     "silhouette",
# ]
# hpo_seed = [i for i in range(5)]
# columns_names = non_duplicate_columns
# should_contain_values = [model_nickname, dataset_id, standardize, hpo_metric, hpo_seed]
# df_missing = get_missing_entries(df_runs_parents, columns_names, should_contain_values)
# df_missing

In [19]:
# model_nickname = [
#     # "BatchCoHiRF-1iter-random-60",
#     # "BatchCoHiRF-1iter-random-top-down-60",
#     # "BatchCoHiRF-DBSCAN-1iter-random-60",
#     # "BatchCoHiRF-DBSCAN-1iter-random-top-down-60",
#     # "BatchCoHiRF-KernelRBF-1iter-random-60",
#     # "BatchCoHiRF-KernelRBF-1iter-random-top-down-60",
#     "BatchCoHiRF-SC-SRGF-1R-1iter-random-60",
#     "BatchCoHiRF-SC-SRGF-2R-1iter-random-60",
#     # "CoHiRF-60",
#     # "CoHiRF-top-down-60",
#     # "CoHiRF-1000-60",
#     # "CoHiRF-DBSCAN-60",
#     # "CoHiRF-DBSCAN-top-down-60",
#     # "CoHiRF-KernelRBF-60",
#     # "CoHiRF-KernelRBF-top-down-60",
#     # "CoHiRF-SC-SRGF-1R-60",
#     # "CoHiRF-SC-SRGF-2R-60",
#     # "DBSCAN-60",
#     # "KMeans-60",
#     # "KernelRBFKMeans-60",
#     # "SpectralSubspaceRandomization-60",
# ]
# dataset_id = [47039]
# standardize = [True]
# hpo_metric = [
#     "adjusted_rand",
#     # "calinski_harabasz_score",
#     # "silhouette",
# ]
# hpo_seed = [i for i in range(5)]
# columns_names = non_duplicate_columns
# should_contain_values = [model_nickname, dataset_id, standardize, hpo_metric, hpo_seed]
# df_missing = get_missing_entries(df_runs_parents, columns_names, should_contain_values)
# df_missing

In [20]:
# Join df_runs_raw_parents into df_missing using non_duplicate_columns to get the EXCEPTION column
df_missing_with_exception = df_missing.merge(
    df_runs_raw_parents[non_duplicate_columns + ["raised_exception", "EXCEPTION", "Last step finished"]],
    how="left",
    left_on=["model", "dataset_id", "standardize", "hpo_metric", "hpo_seed"],
    right_on=["model", "dataset_id", "standardize", "hpo_metric", "hpo_seed"],
)
df_missing_with_exception[
    [
        "model",
        "dataset_id",
        "standardize",
        "hpo_metric",
        "hpo_seed",
        "raised_exception",
        "EXCEPTION",
        "Last step finished",
    ]
]

Unnamed: 0,model,dataset_id,standardize,hpo_metric,hpo_seed,raised_exception,EXCEPTION,Last step finished


In [19]:
df_missing_dict = df_missing.copy()
# get only rows from high_mem_tuples
# df_missing_dict = df_missing_dict.merge(high_mem_tuples, on=["model", "dataset_id"], how="left", indicator=True)
# df_missing_dict = df_missing_dict[df_missing_dict["_merge"] == "both"].drop(columns="_merge")
# exclude rows that are in missing_ari_tuples
# df_missing_dict = df_missing_dict.merge(
# 	missing_ari_tuples, on=["model", "dataset_id"], how="left", indicator=True
# )|
# df_missing_dict = df_missing_dict[df_missing_dict["_merge"] == "left_only"].drop(columns="_merge")
# exclude rows that are in high_mem_tuples
# df_missing_dict = df_missing_dict.merge(
# 	high_mem_tuples, on=["model", "dataset_id"], how="left", indicator=True
# )
# df_missing_dict = df_missing_dict[df_missing_dict["_merge"] == "left_only"].drop(columns="_merge")
# to_drop = pd.concat([missing_ari_tuples, high_mem_tuples], ignore_index=True)
# df_missing_dict = df_missing_dict[df_missing_dict["_merge"] == "left_only"].drop(columns="_merge")

In [20]:
# get rid of -60
df_missing_dict["model"] = df_missing_dict["model"].str.replace("-60", "")
df_missing_dict["seed_dataset_order"] = df_missing_dict["hpo_seed"]
# df_missing_dict = df_missing_dict.loc[~df_missing_dict["dataset_id"].isin([40685, 554])]
df_missing_dict.to_csv(results_dir / "df_missing_dict.csv", index=False)

In [21]:
df_missing_dict

Unnamed: 0,model,dataset_id,standardize,hpo_metric,hpo_seed,seed_dataset_order
0,CoHiRF-SC-SRGF-1R,163,True,adjusted_rand,0,0
1,CoHiRF-SC-SRGF-1R,163,True,adjusted_rand,1,1
2,CoHiRF-SC-SRGF-1R,163,True,adjusted_rand,2,2
3,CoHiRF-SC-SRGF-1R,163,True,adjusted_rand,3,3
4,CoHiRF-SC-SRGF-1R,163,True,adjusted_rand,4,4
...,...,...,...,...,...,...
2381,SpectralSubspaceRandomization,375,True,silhouette,0,0
2382,SpectralSubspaceRandomization,375,True,silhouette,1,1
2383,SpectralSubspaceRandomization,375,True,silhouette,2,2
2384,SpectralSubspaceRandomization,375,True,silhouette,3,3


# Tables

In [24]:
def get_parameters_string(row):
    parameter_names = {
		"best/alpha": "\\alpha",
		"best/avg_dims": "d",
		"best/base_model_kwargs/eps": "\\epsilon",
		"best/base_model_kwargs/min_samples": "n_{\\text{min}}",
		"best/base_model_kwargs/n_clusters": "C",
		"best/c": "c",
		"best/cohirf_kwargs/base_model_kwargs/eps": "\\epsilon",
		"best/cohirf_kwargs/base_model_kwargs/min_samples": "n_{\\text{min}}",
		"best/cohirf_kwargs/kmeans_n_clusters": "C",
		"best/cohirf_kwargs/n_features": "q",
		"best/cohirf_kwargs/repetitions": "R",
		"best/damping": "\\lambda",
		# "best/density_threshold": "\\tau",
		"best/eps": "\\epsilon",
		"best/kmeans_n_clusters": "C",
		"best/lambda_": "\\lambda",
		"best/min_bin_freq": "bin_{\\text{min}}",
		"best/min_cluster_size": "C_{\\text{min}}",
		"best/min_samples": "n_{\\text{min}}",
		"best/n_clusters": "C",
		"best/n_features": "q",
		# "best/n_partitions": "P",
		"best/n_similarities": "m",
		"best/p": "p",
		"best/repetitions": "R",
		"best/sampling_ratio": "r",
		"best/sc_n_clusters": "C",
		"best/transform_kwargs/gamma": "\\gamma",
	}
    first = True
    str = ""
    for p in parameter_names.keys():
        if not pd.isna(row[p]) and row[p] != "None":
            if not first:
                str += "; "
            else:
                first = False
            value = float(row[p])
            if value.is_integer():
                value = int(value)
                str += f"${parameter_names[p]}={value}$"
            else:
                str += f"${parameter_names[p]}={value:0.2f}$"
    return str

In [25]:
def highlight_max(df, column_name, level=0):
    df_column = df[column_name]
    max_values = df_column.groupby(level=level).transform('max')
    is_highlighted = df_column.round(3) == max_values.round(3)
    df_css = df.copy().astype(str)
    df_css.loc[:, :] = ''
    df_css[is_highlighted] = 'font-weight: bold'
    return df_css

In [26]:
def highlight_min(df, column_name, level=0):
    df_column = df[column_name]
    min_values = df_column.groupby(level=level).transform("min")
    is_highlighted = df_column.round(3) == min_values.round(3)
    df_css = df.copy().astype(str)
    df_css.loc[:, :] = ""
    df_css[is_highlighted] = "font-weight: bold"
    return df_css

In [27]:
def highlight_max_index(series_index, df_column, level=0):
    max_values = df_column.groupby(level=level).transform('max')
    is_highlighted = df_column.round(3) == max_values.round(3)
    series_css = series_index.copy().astype(str)
    series_css[:] = ''
    series_css[is_highlighted.values] = 'font-weight: bold'
    return series_css

In [28]:
def underline_2nd_max(df, column_name, level=0):
    df_column = df[column_name]
    # get the second max value
    second_max_values = df_column.groupby(level=level).transform(lambda x: x.round(3).drop_duplicates().nlargest(2).iloc[-1])
    is_underlined = df_column.round(3) == second_max_values.round(3)
    df_css = df.copy().astype(str)
    df_css.loc[:, :] = ''
    df_css[is_underlined] = 'underline: --latex--rwrap'
    return df_css

In [29]:
def underline_2nd_min(df, column_name, level=0):
    df_column = df[column_name]
    # get the second min value
    second_min_values = df_column.groupby(level=level).transform(
        lambda x: x.round(3).drop_duplicates().nsmallest(2).iloc[-1]
    )
    is_underlined = df_column.round(3) == second_min_values.round(3)
    df_css = df.copy().astype(str)
    df_css.loc[:, :] = ""
    df_css[is_underlined] = "underline: --latex--rwrap"
    return df_css

In [30]:
def underline_2nd_max_index(series_index, df_column, level=0):
    # get the second max value
    second_max_values = df_column.groupby(level=level).transform(lambda x: x.nlargest(2).iloc[-1])
    is_underlined = df_column.round(3) == second_max_values.round(3)
    series_css = series_index.copy().astype(str)
    series_css.loc[:] = ''
    series_css[is_underlined.values] = 'underline: --latex--rwrap'
    return series_css

In [31]:
def get_df_metrics(df, hpo_metrics, hpo_metrics_rename):
    dfs_metrics = {}

    for hpo_metric, hpo_metric_rename in zip(hpo_metrics, hpo_metrics_rename):
        if hpo_metric.find("_rescaled") != -1:
            original_metric = hpo_metric.replace("_rescaled", "")
        else:
            original_metric = hpo_metric
        df_metric = df.loc[df["hpo_metric"] == original_metric][
            ["dataset_name", "model", "hpo_seed", f"best/{hpo_metric}"]
        ].rename(columns={f"best/{hpo_metric}": hpo_metric_rename})
        df_metric = df_metric.dropna(subset=[hpo_metric_rename])
        df_metric = df_metric.set_index(["dataset_name", "model", "hpo_seed"])
        df_metric = df_metric.astype({hpo_metric_rename: float})
        dfs_metrics[hpo_metric_rename] = df_metric

    df_metrics = pd.concat(dfs_metrics.values(), axis=1, join="outer")
    df_metrics = df_metrics.reset_index()

    # calculate mean and std
    df_metrics = df_metrics.groupby(["dataset_name", "model"]).agg(["mean", "std"])
    # flatten multiindex columns
    df_metrics.columns = [" ".join(col).strip() for col in df_metrics.columns.values]
    # drop hpo_seed level
    df_metrics = df_metrics.drop(columns=["hpo_seed mean", "hpo_seed std"])
    # Rename index levels
    df_metrics.index.names = ["Dataset", "Model"]

    # create a composite metric as the average of the metrics
    df_metrics["Composite Metric mean"] = df_metrics[
        [f"{metric} mean" for metric in hpo_metrics_rename if "Rescaled" in metric]
    ].mean(axis=1)
    df_metrics["Composite Metric std"] = (
        1
        / len(hpo_metrics_rename)
        * (df_metrics[[f"{metric} std" for metric in hpo_metrics_rename if "Rescaled" in metric]] ** 2).sum(axis=1) ** 0.5
    )
    hpo_metrics_rename.append("Composite Metric")

    for metric in hpo_metrics_rename:
        df_metrics[f"{metric}"] = (
            df_metrics[f"{metric} mean"].apply(lambda x: f"{x:.3f}" if not pd.isna(x) else "No Run")
            + " $\\pm$ "
            + df_metrics[f"{metric} std"].apply(lambda x: f"{x:.3f}" if not pd.isna(x) else "No Run")
        )
    # Calculate mean and std times for each dataset-model combination across all metrics
    df_times = (
        df.groupby(["dataset_name", "model"])
        .agg({"best/elapsed_time": ["mean", "std"], "fit_model_return_elapsed_time": ["mean", "std"]})
        .rename(columns={"best/elapsed_time": "Best Time", "fit_model_return_elapsed_time": "HPO Time"})
    )

    # Flatten multiindex columns
    df_times.columns = [" ".join(col).strip() for col in df_times.columns.values]
    # Set the same index structure as df_metrics
    df_times.index.names = ["Dataset", "Model"]

    df_times["Best Time"] = (
        df_times["Best Time mean"].apply(lambda x: f"{x:4.3f}" if not pd.isna(x) else "No Run")
        + " $\\pm$ "
        + df_times["Best Time std"].apply(lambda x: f"{x:4.3f}" if not pd.isna(x) else "No Run")
    )
    df_times["HPO Time"] = (
        df_times["HPO Time mean"].apply(lambda x: f"{x:4.3f}" if not pd.isna(x) else "No Run")
        + " $\\pm$ "
        + df_times["HPO Time std"].apply(lambda x: f"{x:4.3f}" if not pd.isna(x) else "No Run")
    )

    # Join with the existing df_metrics (verify we have the same number of rows!)
    df_metrics = df_metrics.join(df_times, how="outer")
    return df_metrics

In [33]:
def print_one_table_per_dataset(df_metrics, hpo_metrics_rename, model_groups, datasets=None):
    df_latex = df_metrics.copy()
    df_latex = df_latex.rename(
        columns={"Best Time": "Time (s)", "Best Time mean": "Time (s) mean", "Best Time std": "Time (s) std"}
    )
    df_latex = df_latex.reset_index()
    if datasets is not None:
        df_latex = df_latex.loc[df_latex["Dataset"].isin(datasets)]
    # reapply model groups
    df_latex["Base Model"] = df_latex["Model"].apply(
        lambda x: next((group for group, models in model_groups.items() if x in models), "Other")
    )
    # redefine index with model_group
    df_latex = df_latex.set_index(["Dataset", "Base Model", "Model"])
    # sort by dataset, model_group, model
    df_latex = df_latex.sort_index(level=["Dataset", "Base Model", "Model"])

    # print per dataset
    for dataset in df_latex.index.get_level_values("Dataset").unique():
        df_print = df_latex.copy()
        df_print = df_print.loc[dataset]
        hpo_metrics_to_hide = [metric for metric in hpo_metrics_rename if metric.find("Rescaled") != -1]
        columns_to_hide = [
            col for col in df_latex.columns if col not in (hpo_metrics_rename + ["Time (s)"])
        ]
        columns_to_hide += hpo_metrics_to_hide
        df_print = df_print.style.hide(columns_to_hide, axis=1)
        for col in hpo_metrics_rename + ["Time (s)"]:
            highlight_metric = partial(highlight_max, column_name=f"{col} mean")
            underline_2nd_metric = partial(underline_2nd_max, column_name=f"{col} mean")
            if col in ["Davies-Bouldin", "Time (s)"]:
                highlight_metric = partial(highlight_min, column_name=f"{col} mean")
                underline_2nd_metric = partial(underline_2nd_min, column_name=f"{col} mean")
            (
                df_print.apply(highlight_metric, subset=[col, f"{col} mean"], axis=None).apply(
                    underline_2nd_metric, subset=[col, f"{col} mean"], axis=None
                )
            )

        latex_output = df_print.to_latex(
            hrules=True,
            clines="skip-last;data",
            convert_css=True,
            column_format="ll" + "l" * (len(df_print.columns) - len(columns_to_hide)),
            # environment="longtable",
            caption=f"Clustering results on dataset {dataset}",
        )

        # fix header
        columns = df_print.index.names + [col for col in df_print.columns if col not in columns_to_hide]
        header_line = " & ".join(columns) + r" \\"

        # split into lines
        latex_output = latex_output.splitlines()
        # remove 5th and 6th line and replace with header_line
        latex_output = latex_output[:4] + [header_line] + latex_output[6:]
        # remove last cline
        latex_output = latex_output[:-4] + latex_output[-3:]

        latex_output = "\n".join(latex_output)

        print(latex_output)
        print("\n\n")

In [65]:
def print_single_table(df_metrics, hpo_metrics_rename, datasets=None):
    # def print_single_table(df_metrics, hpo_metrics_rename, datasets=None):
    df_latex = df_metrics.copy()
    df_latex = df_latex.rename(
        columns={"Best Time": "Time (s)", "Best Time mean": "Time (s) mean", "Best Time std": "Time (s) std"}
    )
    df_latex = df_latex.reset_index()
    if datasets is not None:
        df_latex = df_latex.loc[df_latex["Dataset"].isin(datasets)]
    df_latex = df_latex.set_index(["Dataset", "Model"])
    hpo_metrics_to_hide = [metric for metric in hpo_metrics_rename if metric.find("Rescaled") != -1]
    columns_to_hide = [col for col in df_latex.columns if col not in (hpo_metrics_rename + ["Time (s)"])]
    columns_to_hide += hpo_metrics_to_hide
    df_latex = df_latex.style.hide(columns_to_hide, axis=1)
    for col in hpo_metrics_rename + ["Time (s)"]:
        highlight_metric = partial(highlight_max, column_name=f"{col} mean")
        underline_2nd_metric = partial(underline_2nd_max, column_name=f"{col} mean")
        if col in ["Davies-Bouldin", "Time (s)"]:
            highlight_metric = partial(highlight_min, column_name=f"{col} mean")
            underline_2nd_metric = partial(underline_2nd_min, column_name=f"{col} mean")
        (
            df_latex.apply(highlight_metric, subset=[col, f"{col} mean"], axis=None).apply(
                underline_2nd_metric, subset=[col, f"{col} mean"], axis=None
            )
        )

    environment = "longtable"
    latex_output = df_latex.to_latex(
        hrules=True,
        clines="skip-last;data",
        convert_css=True,
        column_format="ll" + "l" * (len(df_latex.columns) - len(columns_to_hide)),
        environment=environment,
    )

    # fix header
    columns = df_latex.index.names + [col for col in df_latex.columns if col not in columns_to_hide]
    header_line = " & ".join(columns) + r" \\"
    latex_output = latex_output.splitlines()
    if environment is None:
        # remove 3th and 4th line and replace with header_line
        latex_output = latex_output[:2] + [header_line] + latex_output[4:]
    else:
        # remove 3rd and 4th line and 8th and 9th line and replace with header_line
        latex_output = latex_output[:2] + [header_line] + latex_output[4:7] + [header_line] + latex_output[9:]

    latex_output = "\n".join(latex_output)
    print(latex_output)

# Composite per dataset and model family

In [47]:
model_names = {
    "BatchCoHiRF-1iter-random-60": "BatchCoHiRF",
	# "BatchCoHiRF-1iter-random-nolaststop-60": "BatchCoHiRF-nolaststop",
    "BatchCoHiRF-1iter-random-top-down-60": "R-BatchCoHiRF",
    # "BatchCoHiRF-1iter-random-top-down-nolaststop-60": "R-BatchCoHiRF-nolaststop",
    "BatchCoHiRF-DBSCAN-1iter-random-60": "BatchCoHiRF-DBSCAN",
	# "BatchCoHiRF-DBSCAN-1iter-random-nolaststop-60": "BatchCoHiRF-DBSCAN-nolaststop",
    "BatchCoHiRF-DBSCAN-1iter-random-top-down-60": "R-BatchCoHiRF-DBSCAN",
	# "BatchCoHiRF-DBSCAN-1iter-random-top-down-nolaststop-60": "R-BatchCoHiRF-DBSCAN-nolaststop",
    "BatchCoHiRF-KernelRBF-1iter-random-60": "BatchCoHiRF-KernelRBF",
	# "BatchCoHiRF-KernelRBF-1iter-random-nolaststop-60": "BatchCoHiRF-KernelRBF-nolaststop",
    "BatchCoHiRF-KernelRBF-1iter-random-top-down-60": "R-BatchCoHiRF-KernelRBF",
	# "BatchCoHiRF-KernelRBF-1iter-random-top-down-nolaststop-60": "R-BatchCoHiRF-KernelRBF-nolaststop",
    "BatchCoHiRF-SC-SRGF-1R-1iter-random-60": "BatchCoHiRF-SC-SRGF",
	# "BatchCoHiRF-SC-SRGF-1R-1iter-random-nolaststop-60": "BatchCoHiRF-SC-SRGF-nolaststop",
    # "BatchCoHiRF-SC-SRGF-2R-1iter-random-60": "BatchCoHiRF-SC-SRGF-2R",
	# "BatchCoHiRF-SC-SRGF-2R-1iter-random-nolaststop-60": "BatchCoHiRF-SC-SRGF-2R-nolaststop",
    # "CoHiRF-1000-60": "CoHiRF-1000",
    "CoHiRF-60": "CoHiRF",
    "CoHiRF-top-down-60": "R-CoHiRF",
    "CoHiRF-DBSCAN-60": "CoHiRF-DBSCAN",
    "CoHiRF-DBSCAN-top-down-60": "R-CoHiRF-DBSCAN",
    "CoHiRF-KernelRBF-60": "CoHiRF-KernelRBF",
    "CoHiRF-KernelRBF-top-down-60": "R-CoHiRF-KernelRBF",
    "CoHiRF-SC-SRGF-1R-60": "CoHiRF-SC-SRGF-1R",
    "CoHiRF-SC-SRGF-2R-60": "CoHiRF-SC-SRGF-2R",
    "DBSCAN-60": "DBSCAN",
    "KMeans-60": "KMeans",
    "KernelRBFKMeans-60": "KernelRBFKMeans",
    "SpectralSubspaceRandomization-60": "SC-SRGF",
}

dataset_names = {
    "binary_alpha_digits": "binary-alpha-digits",
	"mnist_784": "mnist",
}  # otherwise we get an error in latex

dataset_id = [
    61,
    46773,
    46776,
    46778,
    46779,
    46782,
    46783,
    554,
    40685,
    1568,
	47039,
]

hpo_metrics = [
	"adjusted_rand",
    "calinski_harabasz_score",
	"silhouette",
]

# Filter to only standardized runs
df = df_runs_parents.copy()
df = df.loc[df['standardize'] == True]
df = df.loc[df['model'].isin(model_names.keys())]
df = df.loc[df["dataset_id"].isin(dataset_id)]
df = df.loc[df['hpo_metric'].isin(hpo_metrics)]
df = df.replace({"model": model_names})
df = df.replace({"dataset_name": dataset_names})

# Filter to only runs with hpo_seed in range(5)
df = df.loc[df['hpo_seed'].isin(range(5))]

# Filter to only show batch methods for datasets with more than 1000 instances
df = df.loc[~((df['n_instances'] < 1000) & (df['model'].str.find('Batch') != -1))]

# define group of models
model_groups = {
	"KMeans": ["KMeans", "CoHiRF", "R-CoHiRF", "CoHiRF-1000", "BatchCoHiRF", "R-BatchCoHiRF", "BatchCoHiRF-nolaststop", "R-BatchCoHiRF-nolaststop"],
	"KernelKMeans": ["KernelRBFKMeans", "CoHiRF-KernelRBF", "R-CoHiRF-KernelRBF", "BatchCoHiRF-KernelRBF", "R-BatchCoHiRF-KernelRBF", "BatchCoHiRF-KernelRBF-nolaststop", "R-BatchCoHiRF-KernelRBF-nolaststop"],
    "DBSCAN": ["DBSCAN", "CoHiRF-DBSCAN", "R-CoHiRF-DBSCAN", "BatchCoHiRF-DBSCAN", "R-BatchCoHiRF-DBSCAN", "BatchCoHiRF-DBSCAN-nolaststop", "R-BatchCoHiRF-DBSCAN-nolaststop"],
    "SC-SRGF": ["SC-SRGF", "CoHiRF-SC-SRGF", "CoHiRF-SC-SRGF-1R", "CoHiRF-SC-SRGF-2R", "BatchCoHiRF-SC-SRGF", "BatchCoHiRF-SC-SRGF-2R", "BatchCoHiRF-SC-SRGF-nolaststop", "BatchCoHiRF-SC-SRGF-2R-nolaststop"],
}
df['model_group'] = df['model'].apply(lambda x: next((group for group, models in model_groups.items() if x in models), 'Other'))

# re-scale some metrics and build composite metric
# re-scale ari to be between 0 and 1 (originally between -0.5 and 1), by considering everything below 0 as 0
df["best/adjusted_rand_rescaled"] = df["best/adjusted_rand"].apply(lambda x: 0.0 if x < 0 else x)

# re-scale silhouette to be between 0 and 1 (originally between -1 and 1)
df["best/silhouette_rescaled"] = (df["best/silhouette"] - (-1)) / (1 - (-1)) 

# re-scale calinski to be between 0 and 1 normalized by dataset, model_group and hpo_metric
# replace calinksi -1.0 with 0.0
df["best/calinski_harabasz_score"] = df["best/calinski_harabasz_score"].replace(-1.0, 0.0)
# df["best/calinski_harabasz_score_rescaled"] = df["best/calinski_harabasz_score"].replace(-1.0, 0.0)
df["best/calinski_harabasz_score_rescaled"] = df.groupby(["dataset_id", "model_group", "hpo_metric"])[
    "best/calinski_harabasz_score"
].transform(lambda x: (x - x.min()) / (x.max() - x.min()) if x.max() != x.min() else (0.0 if x.max() == 0 else 1.0))

hpo_metrics = [
    "adjusted_rand",
    "adjusted_rand_rescaled",
    "calinski_harabasz_score",
    "calinski_harabasz_score_rescaled",
    "silhouette",
    "silhouette_rescaled",
]

hpo_metrics_rename = [
    "ARI",
    "Rescaled ARI",
    "Calinski",
    "Rescaled Calinski",
    "Silhouette",
    "Rescaled Silhouette",
]
df_metrics = get_df_metrics(df, hpo_metrics, hpo_metrics_rename)

# One table per dataset

In [48]:
print_one_table_per_dataset(df_metrics, hpo_metrics_rename, model_groups)

\begin{table}
\caption{Clustering results on dataset alizadeh-2000-v2}
\begin{tabular}{lllllll}
\toprule
Base Model & Model & ARI & Calinski & Silhouette & Composite Metric & Time (s) \\
\midrule
\multirow[c]{3}{*}{DBSCAN} & CoHiRF-DBSCAN & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{-1.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & \underline{1.891 $\pm$ 0.121} \\
 & DBSCAN & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{-1.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries 0.072 $\pm$ 0.008 \\
 & R-CoHiRF-DBSCAN & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{-1.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & 3.367 $\pm$ 0.243 \\
\cline{1-7}
\multirow[c]{3}{*}{KMeans} & CoHiRF & \underline{0.871 $\pm$ 0.014} & \underline{15.094 $\pm$ 0.126} & 0.186 $\pm$ 0.006 &

In [49]:
datasets = [
    "alizadeh-2000-v2",
    "garber-2001",
    "bittner-2000",
    "nursery",
    "shuttle",
    "mnist",
    "coil-20",
    "chowdary-2006",
]
print_one_table_per_dataset(df_metrics, hpo_metrics_rename, model_groups, datasets=datasets)

\begin{table}
\caption{Clustering results on dataset alizadeh-2000-v2}
\begin{tabular}{lllllll}
\toprule
Base Model & Model & ARI & Calinski & Silhouette & Composite Metric & Time (s) \\
\midrule
\multirow[c]{3}{*}{DBSCAN} & CoHiRF-DBSCAN & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{-1.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & \underline{1.891 $\pm$ 0.121} \\
 & DBSCAN & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{-1.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries 0.072 $\pm$ 0.008 \\
 & R-CoHiRF-DBSCAN & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{-1.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & 3.367 $\pm$ 0.243 \\
\cline{1-7}
\multirow[c]{3}{*}{KMeans} & CoHiRF & \underline{0.871 $\pm$ 0.014} & \underline{15.094 $\pm$ 0.126} & 0.186 $\pm$ 0.006 &

# Composite per dataset

In [50]:
model_names = {
    "BatchCoHiRF-1iter-random-60": "BatchCoHiRF",
    # "BatchCoHiRF-1iter-random-nolaststop-60": "BatchCoHiRF-nolaststop",
    "BatchCoHiRF-1iter-random-top-down-60": "R-BatchCoHiRF",
    # "BatchCoHiRF-1iter-random-top-down-nolaststop-60": "R-BatchCoHiRF-nolaststop",
    "BatchCoHiRF-DBSCAN-1iter-random-60": "BatchCoHiRF-DBSCAN",
    # "BatchCoHiRF-DBSCAN-1iter-random-nolaststop-60": "BatchCoHiRF-DBSCAN-nolaststop",
    "BatchCoHiRF-DBSCAN-1iter-random-top-down-60": "R-BatchCoHiRF-DBSCAN",
    # "BatchCoHiRF-DBSCAN-1iter-random-top-down-nolaststop-60": "R-BatchCoHiRF-DBSCAN-nolaststop",
    "BatchCoHiRF-KernelRBF-1iter-random-60": "BatchCoHiRF-KernelRBF",
    # "BatchCoHiRF-KernelRBF-1iter-random-nolaststop-60": "BatchCoHiRF-KernelRBF-nolaststop",
    "BatchCoHiRF-KernelRBF-1iter-random-top-down-60": "R-BatchCoHiRF-KernelRBF",
    # "BatchCoHiRF-KernelRBF-1iter-random-top-down-nolaststop-60": "R-BatchCoHiRF-KernelRBF-nolaststop",
    "BatchCoHiRF-SC-SRGF-1R-1iter-random-60": "BatchCoHiRF-SC-SRGF",
    # "BatchCoHiRF-SC-SRGF-1R-1iter-random-nolaststop-60": "BatchCoHiRF-SC-SRGF-nolaststop",
    # "BatchCoHiRF-SC-SRGF-2R-1iter-random-60": "BatchCoHiRF-SC-SRGF-2R",
    # "BatchCoHiRF-SC-SRGF-2R-1iter-random-nolaststop-60": "BatchCoHiRF-SC-SRGF-2R-nolaststop",
    # "CoHiRF-1000-60": "CoHiRF-1000",
    "CoHiRF-60": "CoHiRF",
    "CoHiRF-top-down-60": "R-CoHiRF",
    "CoHiRF-DBSCAN-60": "CoHiRF-DBSCAN",
    "CoHiRF-DBSCAN-top-down-60": "R-CoHiRF-DBSCAN",
    "CoHiRF-KernelRBF-60": "CoHiRF-KernelRBF",
    "CoHiRF-KernelRBF-top-down-60": "R-CoHiRF-KernelRBF",
    "CoHiRF-SC-SRGF-1R-60": "CoHiRF-SC-SRGF-1R",
    "CoHiRF-SC-SRGF-2R-60": "CoHiRF-SC-SRGF-2R",
    "DBSCAN-60": "DBSCAN",
    "KMeans-60": "KMeans",
    "KernelRBFKMeans-60": "KernelRBFKMeans",
    "SpectralSubspaceRandomization-60": "SC-SRGF",
}

dataset_names = {
    "binary_alpha_digits": "binary-alpha-digits",
    "mnist_784": "mnist",
}  # otherwise we get an error in latex

dataset_id = [
    61,
    46773,
    46776,
    46778,
    46779,
    46782,
    46783,
    554,
    40685,
    1568,
    47039,
]

hpo_metrics = [
    "adjusted_rand",
    "calinski_harabasz_score",
    "silhouette",
]

# Filter to only standardized runs
df = df_runs_parents.copy()
df = df.loc[df["standardize"] == True]
df = df.loc[df["model"].isin(model_names.keys())]
df = df.loc[df["dataset_id"].isin(dataset_id)]
df = df.loc[df["hpo_metric"].isin(hpo_metrics)]
df = df.replace({"model": model_names})
df = df.replace({"dataset_name": dataset_names})

# Filter to only runs with hpo_seed in range(5)
df = df.loc[df["hpo_seed"].isin(range(5))]

# Filter to only show batch methods for datasets with more than 1000 instances
df = df.loc[~((df["n_instances"] < 1000) & (df["model"].str.find("Batch") != -1))]

# define group of models
model_groups = {
    "KMeans": [
        "KMeans",
        "CoHiRF",
        "R-CoHiRF",
        "CoHiRF-1000",
        "BatchCoHiRF",
        "R-BatchCoHiRF",
        "BatchCoHiRF-nolaststop",
        "R-BatchCoHiRF-nolaststop",
    ],
    "KernelKMeans": [
        "KernelRBFKMeans",
        "CoHiRF-KernelRBF",
        "R-CoHiRF-KernelRBF",
        "BatchCoHiRF-KernelRBF",
        "R-BatchCoHiRF-KernelRBF",
        "BatchCoHiRF-KernelRBF-nolaststop",
        "R-BatchCoHiRF-KernelRBF-nolaststop",
    ],
    "DBSCAN": [
        "DBSCAN",
        "CoHiRF-DBSCAN",
        "R-CoHiRF-DBSCAN",
        "BatchCoHiRF-DBSCAN",
        "R-BatchCoHiRF-DBSCAN",
        "BatchCoHiRF-DBSCAN-nolaststop",
        "R-BatchCoHiRF-DBSCAN-nolaststop",
    ],
    "SC-SRGF": [
        "SC-SRGF",
        "CoHiRF-SC-SRGF",
        "CoHiRF-SC-SRGF-1R",
        "CoHiRF-SC-SRGF-2R",
        "BatchCoHiRF-SC-SRGF",
        "BatchCoHiRF-SC-SRGF-2R",
        "BatchCoHiRF-SC-SRGF-nolaststop",
        "BatchCoHiRF-SC-SRGF-2R-nolaststop",
    ],
}
df["model_group"] = df["model"].apply(
    lambda x: next((group for group, models in model_groups.items() if x in models), "Other")
)

# re-scale some metrics and build composite metric
# re-scale ari to be between 0 and 1 (originally between -0.5 and 1), by considering everything below 0 as 0
df["best/adjusted_rand_rescaled"] = df["best/adjusted_rand"].apply(lambda x: 0.0 if x < 0 else x)

# re-scale silhouette to be between 0 and 1 (originally between -1 and 1)
df["best/silhouette_rescaled"] = (df["best/silhouette"] - (-1)) / (1 - (-1))

# re-scale calinski to be between 0 and 1 normalized by dataset, model_group and hpo_metric
# replace calinksi -1.0 with 0.0
df["best/calinski_harabasz_score"] = df["best/calinski_harabasz_score"].replace(-1.0, 0.0)
# df["best/calinski_harabasz_score_rescaled"] = df["best/calinski_harabasz_score"].replace(-1.0, 0.0)
df["best/calinski_harabasz_score_rescaled"] = df.groupby(["dataset_id", "hpo_metric"])[
    "best/calinski_harabasz_score"
].transform(lambda x: (x - x.min()) / (x.max() - x.min()) if x.max() != x.min() else (0.0 if x.max() == 0 else 1.0))

hpo_metrics = [
    "adjusted_rand",
    "adjusted_rand_rescaled",
    "calinski_harabasz_score",
    "calinski_harabasz_score_rescaled",
    "silhouette",
    "silhouette_rescaled",
]

hpo_metrics_rename = [
    "ARI",
    "Rescaled ARI",
    "Calinski",
    "Rescaled Calinski",
    "Silhouette",
    "Rescaled Silhouette",
]
df_metrics = get_df_metrics(df, hpo_metrics, hpo_metrics_rename)

In [51]:
print_one_table_per_dataset(df_metrics, hpo_metrics_rename, model_groups, datasets=None)

\begin{table}
\caption{Clustering results on dataset alizadeh-2000-v2}
\begin{tabular}{lllllll}
\toprule
Base Model & Model & ARI & Calinski & Silhouette & Composite Metric & Time (s) \\
\midrule
\multirow[c]{3}{*}{DBSCAN} & CoHiRF-DBSCAN & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{-1.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & \underline{1.891 $\pm$ 0.121} \\
 & DBSCAN & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{-1.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries 0.072 $\pm$ 0.008 \\
 & R-CoHiRF-DBSCAN & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{-1.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & 3.367 $\pm$ 0.243 \\
\cline{1-7}
\multirow[c]{3}{*}{KMeans} & CoHiRF & \underline{0.871 $\pm$ 0.014} & \underline{15.094 $\pm$ 0.126} & 0.186 $\pm$ 0.006 &

In [52]:
datasets = [
    "alizadeh-2000-v2",
    "garber-2001",
    "bittner-2000",
    "nursery",
    "shuttle",
    "mnist",
    "coil-20",
    "chowdary-2006",
]
print_one_table_per_dataset(df_metrics, hpo_metrics_rename, model_groups, datasets=datasets)

\begin{table}
\caption{Clustering results on dataset alizadeh-2000-v2}
\begin{tabular}{lllllll}
\toprule
Base Model & Model & ARI & Calinski & Silhouette & Composite Metric & Time (s) \\
\midrule
\multirow[c]{3}{*}{DBSCAN} & CoHiRF-DBSCAN & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{-1.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & \underline{1.891 $\pm$ 0.121} \\
 & DBSCAN & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{-1.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries 0.072 $\pm$ 0.008 \\
 & R-CoHiRF-DBSCAN & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{-1.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & 3.367 $\pm$ 0.243 \\
\cline{1-7}
\multirow[c]{3}{*}{KMeans} & CoHiRF & \underline{0.871 $\pm$ 0.014} & \underline{15.094 $\pm$ 0.126} & 0.186 $\pm$ 0.006 &

# Single table

In [66]:
print_single_table(df_metrics, hpo_metrics_rename, datasets=None)

\begin{longtable}{lllllll}
\toprule
Dataset & Model & ARI & Calinski & Silhouette & Composite Metric & Time (s) \\
\midrule
\endfirsthead
\toprule
Dataset & Model & ARI & Calinski & Silhouette & Composite Metric & Time (s) \\
\midrule
\endhead
\midrule
\multicolumn{7}{r}{Continued on next page} \\
\midrule
\endfoot
\bottomrule
\endlastfoot
\multirow[c]{12}{*}{alizadeh-2000-v2} & CoHiRF & 0.871 $\pm$ 0.014 & \underline{15.094 $\pm$ 0.126} & 0.186 $\pm$ 0.006 & \underline{0.820 $\pm$ 0.003} & 0.078 $\pm$ 0.025 \\
 & CoHiRF-DBSCAN & 0.000 $\pm$ 0.000 & 0.000 $\pm$ 0.000 & -1.000 $\pm$ 0.000 & 0.000 $\pm$ 0.000 & 1.891 $\pm$ 0.121 \\
 & CoHiRF-KernelRBF & 0.083 $\pm$ 0.034 & 1.569 $\pm$ 0.143 & 0.001 $\pm$ 0.012 & 0.229 $\pm$ 0.006 & 0.388 $\pm$ 0.318 \\
 & CoHiRF-SC-SRGF-1R & \bfseries 0.947 $\pm$ 0.000 & 12.338 $\pm$ 0.000 & 0.194 $\pm$ 0.000 & 0.786 $\pm$ 0.000 & 2.913 $\pm$ 1.498 \\
 & CoHiRF-SC-SRGF-2R & \bfseries 0.947 $\pm$ 0.000 & 12.338 $\pm$ 0.000 & 0.194 $\pm$ 0.000 & 0.786 $\pm

In [68]:
datasets = [
    "alizadeh-2000-v2",
    "garber-2001",
    "bittner-2000",
    "nursery",
    "shuttle",
    "mnist",
    "coil-20",
    "chowdary-2006",
]
print_single_table(df_metrics, hpo_metrics_rename, datasets=datasets)

\begin{longtable}{lllllll}
\toprule
Dataset & Model & ARI & Calinski & Silhouette & Composite Metric & Time (s) \\
\midrule
\endfirsthead
\toprule
Dataset & Model & ARI & Calinski & Silhouette & Composite Metric & Time (s) \\
\midrule
\endhead
\midrule
\multicolumn{7}{r}{Continued on next page} \\
\midrule
\endfoot
\bottomrule
\endlastfoot
\multirow[c]{12}{*}{alizadeh-2000-v2} & CoHiRF & 0.871 $\pm$ 0.014 & \underline{15.094 $\pm$ 0.126} & 0.186 $\pm$ 0.006 & \underline{0.820 $\pm$ 0.003} & 0.078 $\pm$ 0.025 \\
 & CoHiRF-DBSCAN & 0.000 $\pm$ 0.000 & 0.000 $\pm$ 0.000 & -1.000 $\pm$ 0.000 & 0.000 $\pm$ 0.000 & 1.891 $\pm$ 0.121 \\
 & CoHiRF-KernelRBF & 0.083 $\pm$ 0.034 & 1.569 $\pm$ 0.143 & 0.001 $\pm$ 0.012 & 0.229 $\pm$ 0.006 & 0.388 $\pm$ 0.318 \\
 & CoHiRF-SC-SRGF-1R & \bfseries 0.947 $\pm$ 0.000 & 12.338 $\pm$ 0.000 & 0.194 $\pm$ 0.000 & 0.786 $\pm$ 0.000 & 2.913 $\pm$ 1.498 \\
 & CoHiRF-SC-SRGF-2R & \bfseries 0.947 $\pm$ 0.000 & 12.338 $\pm$ 0.000 & 0.194 $\pm$ 0.000 & 0.786 $\pm

# per dataset per model group

In [52]:
df_latex = df_metrics.copy()
df_latex = df_latex.reset_index()
# reapply model groups
df_latex["Base Model"] = df_latex["Model"].apply(
    lambda x: next((group for group, models in model_groups.items() if x in models), "Other")
)
# redefine index with model_group
df_latex = df_latex.set_index(["Dataset", "Base Model", "Model"])
# sort by dataset, model_group, model
df_latex = df_latex.sort_index(level=["Dataset", "Base Model", "Model"])


# print per dataset
for dataset in df_latex.index.get_level_values("Dataset").unique():
    df_print = df_latex.copy()
    df_print = df_print.loc[dataset]
    hpo_metrics_to_hide = [metric for metric in hpo_metrics_rename if metric.find("Rescaled") != -1]
    columns_to_hide = [col for col in df_latex.columns if col not in (hpo_metrics_rename + ["Best Time", "HPO Time"])]
    columns_to_hide += hpo_metrics_to_hide
    df_print = df_print.style.hide(columns_to_hide, axis=1)
    for col in hpo_metrics_rename + ["Best Time", "HPO Time"]:
        highlight_metric = partial(highlight_max, column_name=f"{col} mean")
        underline_2nd_metric = partial(underline_2nd_max, column_name=f"{col} mean")
        if col in ["Davies-Bouldin", "Best Time", "HPO Time"]:
            highlight_metric = partial(highlight_min, column_name=f"{col} mean")
            underline_2nd_metric = partial(underline_2nd_min, column_name=f"{col} mean")
        (
            df_print.apply(highlight_metric, subset=[col, f"{col} mean"], axis=None).apply(
                underline_2nd_metric, subset=[col, f"{col} mean"], axis=None
            )
        )

    latex_output = df_print.to_latex(
        hrules=True,
        clines="skip-last;data",
        convert_css=True,
        column_format="ll" + "l" * (len(df_print.columns) - len(columns_to_hide)),
        # environment="longtable",
        caption=f"Clustering results on dataset {dataset}",
    )

    # fix header
    columns = df_print.index.names + [col for col in df_print.columns if col not in columns_to_hide]
    header_line = " & ".join(columns) + r" \\"

    # split into lines
    latex_output = latex_output.splitlines()
    # remove 5th and 6th line and replace with header_line
    latex_output = latex_output[:4] + [header_line] + latex_output[6:]
    # remove last cline
    latex_output = latex_output[:-4] + latex_output[-3:]

    latex_output = "\n".join(latex_output)

    print(latex_output)
    print("\n\n")

\begin{table}
\caption{Clustering results on dataset alizadeh-2000-v2}
\begin{tabular}{llllllll}
\toprule
Base Model & Model & ARI & Calinski & Silhouette & Composite Metric & Best Time & HPO Time \\
\midrule
\multirow[c]{3}{*}{DBSCAN} & CoHiRF-DBSCAN & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{-1.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & \underline{1.891 $\pm$ 0.121} & \bfseries 358.777 $\pm$ 10.698 \\
 & DBSCAN & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{-1.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries 0.072 $\pm$ 0.008 & \underline{364.587 $\pm$ 12.037} \\
 & R-CoHiRF-DBSCAN & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{-1.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & 3.367 $\pm$ 0.243 & 438.810 $\pm$ 19.708 \\
\cline{1-8}
\multirow[c]{3}{*}

In [53]:
df_latex

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,ARI mean,ARI std,Rescaled ARI mean,Rescaled ARI std,Calinski mean,Calinski std,Rescaled Calinski mean,Rescaled Calinski std,Silhouette mean,Silhouette std,...,Rescaled Calinski,Silhouette,Rescaled Silhouette,Composite Metric,Best Time mean,Best Time std,HPO Time mean,HPO Time std,Best Time,HPO Time
Dataset,Base Model,Model,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
alizadeh-2000-v2,DBSCAN,CoHiRF-DBSCAN,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000e+00,0.000000,0.000000e+00,-1.000000,0.000000,...,0.000 $\pm$ 0.000,-1.000 $\pm$ 0.000,0.000 $\pm$ 0.000,0.000 $\pm$ 0.000,1.891236,0.121433,358.777156,10.697893,1.891 $\pm$ 0.121,358.777 $\pm$ 10.698
alizadeh-2000-v2,DBSCAN,DBSCAN,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000e+00,0.000000,0.000000e+00,-1.000000,0.000000,...,0.000 $\pm$ 0.000,-1.000 $\pm$ 0.000,0.000 $\pm$ 0.000,0.000 $\pm$ 0.000,0.071820,0.007800,364.587429,12.037019,0.072 $\pm$ 0.008,364.587 $\pm$ 12.037
alizadeh-2000-v2,DBSCAN,R-CoHiRF-DBSCAN,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000e+00,0.000000,0.000000e+00,-1.000000,0.000000,...,0.000 $\pm$ 0.000,-1.000 $\pm$ 0.000,0.000 $\pm$ 0.000,0.000 $\pm$ 0.000,3.367055,0.242905,438.809734,19.707713,3.367 $\pm$ 0.243,438.810 $\pm$ 19.708
alizadeh-2000-v2,KMeans,CoHiRF,0.870727,0.013688,0.870727,0.013688,15.094428,1.258956e-01,0.800000,4.472136e-01,0.185935,0.006072,...,0.800 $\pm$ 0.447,0.186 $\pm$ 0.006,0.593 $\pm$ 0.003,0.755 $\pm$ 0.075,0.077761,0.025291,360.449912,12.657163,0.078 $\pm$ 0.025,360.450 $\pm$ 12.657
alizadeh-2000-v2,KMeans,KMeans,0.837874,0.013844,0.837874,0.013844,15.150730,3.323259e-15,1.000000,1.041955e-14,0.204196,0.035676,...,1.000 $\pm$ 0.000,0.204 $\pm$ 0.036,0.602 $\pm$ 0.018,0.813 $\pm$ 0.004,0.027972,0.003558,356.250373,8.688019,0.028 $\pm$ 0.004,356.250 $\pm$ 8.688
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
shuttle,KernelKMeans,CoHiRF-KernelRBF,0.581087,0.114397,0.581087,0.114397,11265.563356,3.001593e+03,0.469216,2.471653e-01,0.689508,0.241302,...,0.469 $\pm$ 0.247,0.690 $\pm$ 0.241,0.845 $\pm$ 0.121,0.632 $\pm$ 0.050,19.815364,12.709478,1954.295042,1229.403456,19.815 $\pm$ 12.709,1954.295 $\pm$ 1229.403
shuttle,KernelKMeans,KernelRBFKMeans,0.250069,0.083577,0.250069,0.083577,11742.147770,2.434781e+03,0.508461,2.004913e-01,0.389934,0.010019,...,0.508 $\pm$ 0.200,0.390 $\pm$ 0.010,0.695 $\pm$ 0.005,0.484 $\pm$ 0.036,1.701472,0.241054,1085.777835,1159.365950,1.701 $\pm$ 0.241,1085.778 $\pm$ 1159.366
shuttle,KernelKMeans,R-BatchCoHiRF-KernelRBF,0.372898,0.054661,0.372898,0.054661,8989.754481,1.375662e+03,0.281816,1.132785e-01,0.641444,0.244014,...,0.282 $\pm$ 0.113,0.641 $\pm$ 0.244,0.821 $\pm$ 0.122,0.492 $\pm$ 0.029,2.993204,0.945362,1257.914199,1231.635099,2.993 $\pm$ 0.945,1257.914 $\pm$ 1231.635
shuttle,KernelKMeans,R-CoHiRF-KernelRBF,0.552653,0.073841,0.552653,0.073841,13994.981173,2.466253e+03,0.693969,2.030829e-01,0.856269,0.079403,...,0.694 $\pm$ 0.203,0.856 $\pm$ 0.079,0.928 $\pm$ 0.040,0.725 $\pm$ 0.037,32.912177,17.803011,2791.128631,1315.003222,32.912 $\pm$ 17.803,2791.129 $\pm$ 1315.003


In [55]:
df_latex = df_metrics.copy()
df_latex = df_latex.reset_index()
datasets = ["alizadeh-2000-v2", "garber-2001", "bittner-2000", "nursery", "shuttle", "mnist", "coil-20", "chowdary-2006"]
df_latex = df_latex.loc[df_latex["Dataset"].isin(datasets)]
# reapply model groups
df_latex["Base Model"] = df_latex["Model"].apply(
    lambda x: next((group for group, models in model_groups.items() if x in models), "Other")
)
# redefine index with model_group
df_latex = df_latex.set_index(["Dataset", "Base Model", "Model"])
# sort by dataset, model_group, model
df_latex = df_latex.sort_index(level=["Dataset", "Base Model", "Model"])


# print per dataset
for dataset in df_latex.index.get_level_values("Dataset").unique():
    df_print = df_latex.copy()
    df_print = df_print.loc[dataset]
    hpo_metrics_to_hide = [metric for metric in hpo_metrics_rename if metric.find("Rescaled") != -1]
    columns_to_hide = [col for col in df_latex.columns if col not in (hpo_metrics_rename + ["Best Time", "HPO Time"])]
    columns_to_hide += hpo_metrics_to_hide
    df_print = df_print.style.hide(columns_to_hide, axis=1)
    for col in hpo_metrics_rename + ["Best Time", "HPO Time"]:
        highlight_metric = partial(highlight_max, column_name=f"{col} mean")
        underline_2nd_metric = partial(underline_2nd_max, column_name=f"{col} mean")
        if col in ["Davies-Bouldin", "Best Time", "HPO Time"]:
            highlight_metric = partial(highlight_min, column_name=f"{col} mean")
            underline_2nd_metric = partial(underline_2nd_min, column_name=f"{col} mean")
        (
            df_print.apply(highlight_metric, subset=[col, f"{col} mean"], axis=None).apply(
                underline_2nd_metric, subset=[col, f"{col} mean"], axis=None
            )
        )

    latex_output = df_print.to_latex(
        hrules=True,
        clines="skip-last;data",
        convert_css=True,
        column_format="ll" + "l" * (len(df_print.columns) - len(columns_to_hide)),
        # environment="longtable",
        caption=f"Clustering results on dataset {dataset}",
    )

    # fix header
    columns = df_print.index.names + [col for col in df_print.columns if col not in columns_to_hide]
    header_line = " & ".join(columns) + r" \\"

    # split into lines
    latex_output = latex_output.splitlines()
    # remove 5th and 6th line and replace with header_line
    latex_output = latex_output[:4] + [header_line] + latex_output[6:]
    # remove last cline
    latex_output = latex_output[:-4] + latex_output[-3:]

    latex_output = "\n".join(latex_output)

    print(latex_output)
    print("\n\n")

\begin{table}
\caption{Clustering results on dataset alizadeh-2000-v2}
\begin{tabular}{llllllll}
\toprule
Base Model & Model & ARI & Calinski & Silhouette & Composite Metric & Best Time & HPO Time \\
\midrule
\multirow[c]{3}{*}{DBSCAN} & CoHiRF-DBSCAN & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{-1.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & \underline{1.891 $\pm$ 0.121} & \bfseries 358.777 $\pm$ 10.698 \\
 & DBSCAN & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{-1.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries 0.072 $\pm$ 0.008 & \underline{364.587 $\pm$ 12.037} \\
 & R-CoHiRF-DBSCAN & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{-1.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & 3.367 $\pm$ 0.243 & 438.810 $\pm$ 19.708 \\
\cline{1-8}
\multirow[c]{3}{*}

In [36]:
df_metrics

Unnamed: 0_level_0,Unnamed: 1_level_0,ARI mean,ARI std,Rescaled ARI mean,Rescaled ARI std,Calinski mean,Calinski std,Rescaled Calinski mean,Rescaled Calinski std,Silhouette mean,Silhouette std,...,Rescaled Calinski,Silhouette,Rescaled Silhouette,Composite Metric,Best Time mean,Best Time std,HPO Time mean,HPO Time std,Best Time,HPO Time
Dataset,Model,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
alizadeh-2000-v2,CoHiRF,0.870727,0.013688,0.870727,0.013688,15.094428,1.258956e-01,0.800000,0.447214,0.185935,0.006072,...,0.800 $\pm$ 0.447,0.186 $\pm$ 0.006,0.593 $\pm$ 0.003,0.755 $\pm$ 0.075,0.077761,0.025291,360.449912,12.657163,0.078 $\pm$ 0.025,360.450 $\pm$ 12.657
alizadeh-2000-v2,CoHiRF-1000,0.870727,0.013688,0.870727,0.013688,15.094428,1.258956e-01,0.800000,0.447214,0.185935,0.006072,...,0.800 $\pm$ 0.447,0.186 $\pm$ 0.006,0.593 $\pm$ 0.003,0.755 $\pm$ 0.075,0.079105,0.024731,376.338298,29.726852,0.079 $\pm$ 0.025,376.338 $\pm$ 29.727
alizadeh-2000-v2,CoHiRF-DBSCAN,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000e+00,0.000000,0.000000,-1.000000,0.000000,...,0.000 $\pm$ 0.000,-1.000 $\pm$ 0.000,0.000 $\pm$ 0.000,0.000 $\pm$ 0.000,1.891236,0.121433,358.777156,10.697893,1.891 $\pm$ 0.121,358.777 $\pm$ 10.698
alizadeh-2000-v2,CoHiRF-KernelRBF,0.083405,0.034230,0.083405,0.034230,1.568841,1.426340e-01,0.168354,0.073935,0.001354,0.012104,...,0.168 $\pm$ 0.074,0.001 $\pm$ 0.012,0.501 $\pm$ 0.006,0.251 $\pm$ 0.014,0.388461,0.317894,409.038416,20.935751,0.388 $\pm$ 0.318,409.038 $\pm$ 20.936
alizadeh-2000-v2,CoHiRF-SC-SRGF,0.947128,0.000000,0.947128,0.000000,12.338328,1.986027e-15,0.800000,0.447214,0.194047,0.000000,...,0.800 $\pm$ 0.447,0.194 $\pm$ 0.000,0.597 $\pm$ 0.000,0.781 $\pm$ 0.075,2.913118,1.498119,463.230301,13.566887,2.913 $\pm$ 1.498,463.230 $\pm$ 13.567
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
shuttle,R-BatchCoHiRF-DBSCAN,0.710417,0.055361,0.710417,0.055361,1191.659904,1.367356e+02,0.030470,0.006230,0.915851,0.029745,...,0.030 $\pm$ 0.006,0.916 $\pm$ 0.030,0.958 $\pm$ 0.015,0.566 $\pm$ 0.010,13.462567,7.998126,1879.495971,1222.850442,13.463 $\pm$ 7.998,1879.496 $\pm$ 1222.850
shuttle,R-BatchCoHiRF-KernelRBF,0.372898,0.054661,0.372898,0.054661,8989.754481,1.375662e+03,0.281816,0.113278,0.641444,0.244014,...,0.282 $\pm$ 0.113,0.641 $\pm$ 0.244,0.821 $\pm$ 0.122,0.492 $\pm$ 0.029,2.993204,0.945362,1257.914199,1231.635099,2.993 $\pm$ 0.945,1257.914 $\pm$ 1231.635
shuttle,R-CoHiRF,0.645521,0.024557,0.645521,0.024557,20315.310691,1.808820e+02,0.548221,0.007472,0.969336,0.004637,...,0.548 $\pm$ 0.007,0.969 $\pm$ 0.005,0.985 $\pm$ 0.002,0.726 $\pm$ 0.004,20.328187,3.064537,2034.077139,1262.851535,20.328 $\pm$ 3.065,2034.077 $\pm$ 1262.852
shuttle,R-CoHiRF-DBSCAN,0.696130,0.014815,0.696130,0.014815,1291.725980,5.836486e+02,0.035029,0.026594,0.937522,0.031919,...,0.035 $\pm$ 0.027,0.938 $\pm$ 0.032,0.969 $\pm$ 0.016,0.567 $\pm$ 0.006,63.722125,28.394231,5543.052255,2328.999420,63.722 $\pm$ 28.394,5543.052 $\pm$ 2328.999


In [37]:
df_latex = df_metrics.copy()
df_latex = df_latex.reset_index()
# reapply model groups
df_latex['Base Model'] = df_latex['Model'].apply(lambda x: next((group for group, models in model_groups.items() if x in models), 'Other'))
# redefine index with model_group
df_latex = df_latex.set_index(['Dataset', 'Base Model', 'Model'])
# sort by dataset, model_group, model
df_latex = df_latex.sort_index(level=['Dataset', 'Base Model', 'Model'])


# print per dataset
for dataset in df_latex.index.get_level_values('Dataset').unique():
    df_print = df_latex.copy()
    df_print = df_print.loc[dataset]
    hpo_metrics_to_hide = [metric for metric in hpo_metrics_rename if metric.find("Rescaled") != -1] 
    columns_to_hide = [col for col in df_latex.columns if col not in (hpo_metrics_rename + ["Best Time", "HPO Time"])]
    columns_to_hide += hpo_metrics_to_hide
    df_print = df_print.style.hide(columns_to_hide, axis=1)
    for col in hpo_metrics_rename + ["Best Time", "HPO Time"]:
        highlight_metric = partial(highlight_max, column_name=f"{col} mean")
        underline_2nd_metric = partial(underline_2nd_max, column_name=f"{col} mean")
        if col in ["Davies-Bouldin", "Best Time", "HPO Time"]:
            highlight_metric = partial(highlight_min, column_name=f"{col} mean")
            underline_2nd_metric = partial(underline_2nd_min, column_name=f"{col} mean")
        (
            df_print.apply(highlight_metric, subset=[col, f"{col} mean"], axis=None).apply(
                underline_2nd_metric, subset=[col, f"{col} mean"], axis=None
            )
        )

    df_print = df_print.hide(level=0, axis=0)
    latex_output = (
        df_print.to_latex(
            hrules=True,
            clines="skip-last;data",
            convert_css=True,
            column_format="ll" + "l" * (len(df_print.columns) - len(columns_to_hide)),
            # environment="longtable",
            caption=f"Clustering results on dataset {dataset}",
        )
    )

    # fix header
    columns = df_print.index.names[:1] + [col for col in df_print.columns if col not in columns_to_hide]
    header_line = ' & '.join(columns) + r' \\'

    # split into lines
    latex_output = latex_output.splitlines()
    # remove 5th and 6th line and replace with header_line
    latex_output = latex_output[:4] + [header_line] + latex_output[6:]
    latex_output = '\n'.join(latex_output)

    # manually add clines after model groups
    model_groups_in_data = df_print.index.get_level_values('Base Model').unique().tolist()
    lines = latex_output.splitlines()
    new_lines = []
    last_line = ""
    for i, line in enumerate(lines[6:-3]): # skip first 6 lines and last 3 lines
        model_last_line = last_line.split('&')[0].strip()
        model_current_line = line.split('&')[0].strip()
        model_group_last_line = next((group for group, models in model_groups.items() if model_last_line in models), 'Other')
        model_group_current_line = next((group for group, models in model_groups.items() if model_current_line in models), 'Other')
        if model_group_last_line != model_group_current_line and i != 0:
            new_lines.append(r'\cline{' + f'1-{len(columns)}' + r'}')
        new_lines.append(line)
        last_line = line

    latex_output = '\n'.join(lines[:6] + new_lines + lines[-3:])

    print(latex_output)
    print("\n\n")
    print("\pagebreak")

\begin{table}
\caption{Clustering results on dataset alizadeh-2000-v2}
\begin{tabular}{llllllll}
\toprule
Base Model & ARI & Calinski & Silhouette & Composite Metric & Best Time & HPO Time \\
\midrule
CoHiRF-DBSCAN & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{-1.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & \underline{1.891 $\pm$ 0.121} & \bfseries 358.777 $\pm$ 10.698 \\
DBSCAN & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{-1.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries 0.072 $\pm$ 0.008 & \underline{364.587 $\pm$ 12.037} \\
R-CoHiRF-DBSCAN & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{-1.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & 3.367 $\pm$ 0.243 & 438.810 $\pm$ 19.708 \\
\cline{1-7}
CoHiRF & \underline{0.871 $\pm$ 0.014} & \underline{15.094 $\

# Composite per dataset

In [56]:
model_names = {
    "BatchCoHiRF-1iter-random-60": "BatchCoHiRF",
	# "BatchCoHiRF-1iter-random-nolaststop-60": "BatchCoHiRF-nolaststop",
    "BatchCoHiRF-1iter-random-top-down-60": "R-BatchCoHiRF",
    # "BatchCoHiRF-1iter-random-top-down-nolaststop-60": "R-BatchCoHiRF-nolaststop",
    "BatchCoHiRF-DBSCAN-1iter-random-60": "BatchCoHiRF-DBSCAN",
	# "BatchCoHiRF-DBSCAN-1iter-random-nolaststop-60": "BatchCoHiRF-DBSCAN-nolaststop",
    "BatchCoHiRF-DBSCAN-1iter-random-top-down-60": "R-BatchCoHiRF-DBSCAN",
	# "BatchCoHiRF-DBSCAN-1iter-random-top-down-nolaststop-60": "R-BatchCoHiRF-DBSCAN-nolaststop",
    "BatchCoHiRF-KernelRBF-1iter-random-60": "BatchCoHiRF-KernelRBF",
	# "BatchCoHiRF-KernelRBF-1iter-random-nolaststop-60": "BatchCoHiRF-KernelRBF-nolaststop",
    "BatchCoHiRF-KernelRBF-1iter-random-top-down-60": "R-BatchCoHiRF-KernelRBF",
	# "BatchCoHiRF-KernelRBF-1iter-random-top-down-nolaststop-60": "R-BatchCoHiRF-KernelRBF-nolaststop",
    "BatchCoHiRF-SC-SRGF-1R-1iter-random-60": "BatchCoHiRF-SC-SRGF",
	# "BatchCoHiRF-SC-SRGF-1R-1iter-random-nolaststop-60": "BatchCoHiRF-SC-SRGF-nolaststop",
    # "BatchCoHiRF-SC-SRGF-2R-1iter-random-60": "BatchCoHiRF-SC-SRGF-2R",
	# "BatchCoHiRF-SC-SRGF-2R-1iter-random-nolaststop-60": "BatchCoHiRF-SC-SRGF-2R-nolaststop",
    # "CoHiRF-1000-60": "CoHiRF-1000",
    "CoHiRF-60": "CoHiRF",
    "CoHiRF-top-down-60": "R-CoHiRF",
    "CoHiRF-DBSCAN-60": "CoHiRF-DBSCAN",
    "CoHiRF-DBSCAN-top-down-60": "R-CoHiRF-DBSCAN",
    "CoHiRF-KernelRBF-60": "CoHiRF-KernelRBF",
    "CoHiRF-KernelRBF-top-down-60": "R-CoHiRF-KernelRBF",
    "CoHiRF-SC-SRGF-1R-60": "CoHiRF-SC-SRGF-1R",
    "CoHiRF-SC-SRGF-2R-60": "CoHiRF-SC-SRGF-2R",
    "DBSCAN-60": "DBSCAN",
    "KMeans-60": "KMeans",
    "KernelRBFKMeans-60": "KernelRBFKMeans",
    "SpectralSubspaceRandomization-60": "SC-SRGF",
}

dataset_names = {
    "binary_alpha_digits": "binary-alpha-digits",
	"mnist_784": "mnist",
}  # otherwise we get an error in latex

dataset_id = [
    61,
    46773,
    46776,
    46778,
    46779,
    46782,
    46783,
    554,
    40685,
    1568,
	47039,
]

hpo_metrics = [
	"adjusted_rand",
    "calinski_harabasz_score",
	"silhouette",
]

# Filter to only standardized runs
df = df_runs_parents.copy()
df = df.loc[df['standardize'] == True]
df = df.loc[df['model'].isin(model_names.keys())]
df = df.loc[df["dataset_id"].isin(dataset_id)]
df = df.loc[df['hpo_metric'].isin(hpo_metrics)]
df = df.replace({"model": model_names})
df = df.replace({"dataset_name": dataset_names})

# Filter to only runs with hpo_seed in range(5)
df = df.loc[df['hpo_seed'].isin(range(5))]

# Filter to only show batch methods for datasets with more than 1000 instances
df = df.loc[~((df['n_instances'] < 1000) & (df['model'].str.find('Batch') != -1))]

# define group of models
model_groups = {
	"KMeans": ["KMeans", "CoHiRF", "R-CoHiRF", "CoHiRF-1000", "BatchCoHiRF", "R-BatchCoHiRF", "BatchCoHiRF-nolaststop", "R-BatchCoHiRF-nolaststop"],
	"KernelKMeans": ["KernelRBFKMeans", "CoHiRF-KernelRBF", "R-CoHiRF-KernelRBF", "BatchCoHiRF-KernelRBF", "R-BatchCoHiRF-KernelRBF", "BatchCoHiRF-KernelRBF-nolaststop", "R-BatchCoHiRF-KernelRBF-nolaststop"],
    "DBSCAN": ["DBSCAN", "CoHiRF-DBSCAN", "R-CoHiRF-DBSCAN", "BatchCoHiRF-DBSCAN", "R-BatchCoHiRF-DBSCAN", "BatchCoHiRF-DBSCAN-nolaststop", "R-BatchCoHiRF-DBSCAN-nolaststop"],
    "SC-SRGF": ["SC-SRGF", "CoHiRF-SC-SRGF", "CoHiRF-SC-SRGF-1R", "CoHiRF-SC-SRGF-2R", "BatchCoHiRF-SC-SRGF", "BatchCoHiRF-SC-SRGF-2R", "BatchCoHiRF-SC-SRGF-nolaststop", "BatchCoHiRF-SC-SRGF-2R-nolaststop"],
}
df['model_group'] = df['model'].apply(lambda x: next((group for group, models in model_groups.items() if x in models), 'Other'))


# re-scale some metrics and build composite metric
# re-scale ari to be between 0 and 1 (originally between -0.5 and 1), by considering everything below 0 as 0
df["best/adjusted_rand_rescaled"] = df["best/adjusted_rand"].apply(lambda x: 0.0 if x < 0 else x)

# re-scale silhouette to be between 0 and 1 (originally between -1 and 1)
df["best/silhouette_rescaled"] = (df["best/silhouette"] - (-1)) / (1 - (-1)) 

# re-scale calinski to be between 0 and 1 normalized by dataset, and hpo_metric
# replace calinksi -1.0 with 0.0
df["best/calinski_harabasz_score"] = df["best/calinski_harabasz_score"].replace(-1.0, 0.0)
# df["best/calinski_harabasz_score_rescaled"] = df["best/calinski_harabasz_score"].replace(-1.0, 0.0)
df["best/calinski_harabasz_score_rescaled"] = df.groupby(["dataset_id", "hpo_metric"])[
    "best/calinski_harabasz_score"
].transform(lambda x: (x - x.min()) / (x.max() - x.min()) if x.max() != x.min() else (0.0 if x.max() == 0 else 1.0))

In [57]:
hpo_metrics = [
    # "adjusted_rand",
    # "adjusted_mutual_info",
    # "calinski_harabasz_score",
    # "silhouette",
    # "davies_bouldin_score",
    # "normalized_mutual_info",
    "adjusted_rand",
    "adjusted_rand_rescaled",
    "calinski_harabasz_score",
    "calinski_harabasz_score_rescaled",
    "silhouette",
    "silhouette_rescaled",
]

hpo_metrics_rename = [
    # "ARI",
    # "AMI",
    # "Calinski",
    # "Silhouette",
    # "Davies-Bouldin",
    # "NMI",
    "ARI",
    "Rescaled ARI",
    "Calinski",
    "Rescaled Calinski",
    "Silhouette",
    "Rescaled Silhouette",
]

dfs_metrics = {}

for hpo_metric, hpo_metric_rename in zip(hpo_metrics, hpo_metrics_rename):
    if hpo_metric.find("_rescaled") != -1:
        original_metric = hpo_metric.replace("_rescaled", "")
    else:
        original_metric = hpo_metric
    df_metric = df.loc[df["hpo_metric"] == original_metric][
        ["dataset_name", "model", "hpo_seed", f"best/{hpo_metric}"]
    ].rename(columns={f"best/{hpo_metric}": hpo_metric_rename})
    df_metric = df_metric.dropna(subset=[hpo_metric_rename])
    df_metric = df_metric.set_index(['dataset_name', 'model', 'hpo_seed'])
    df_metric = df_metric.astype({hpo_metric_rename: float})
    dfs_metrics[hpo_metric_rename] = df_metric

df_metrics = pd.concat(dfs_metrics.values(), axis=1, join="outer")
df_metrics = df_metrics.reset_index()

# calculate mean and std
df_metrics = df_metrics.groupby(['dataset_name', 'model']).agg(['mean', 'std'])
# flatten multiindex columns
df_metrics.columns = [' '.join(col).strip() for col in df_metrics.columns.values]
# drop hpo_seed level
df_metrics = df_metrics.drop(columns=['hpo_seed mean', 'hpo_seed std'])
# Rename index levels
df_metrics.index.names = ["Dataset", "Model"]

# create a composite metric as the average of the metrics
df_metrics["Composite Metric mean"] = df_metrics[[f"{metric} mean" for metric in hpo_metrics_rename if "Rescaled" in metric]].mean(axis=1)
df_metrics["Composite Metric std"] = 1/len(hpo_metrics_rename) * (df_metrics[[f"{metric} std" for metric in hpo_metrics_rename if "Rescaled" in metric]]**2).sum(axis=1)**0.5
hpo_metrics_rename.append("Composite Metric")


for metric in hpo_metrics_rename:
    df_metrics[f"{metric}"] = (
        df_metrics[f"{metric} mean"].apply(lambda x: f"{x:.3f}" if not pd.isna(x) else "No Run")
        + " $\\pm$ "
        + df_metrics[f"{metric} std"].apply(lambda x: f"{x:.3f}" if not pd.isna(x) else "No Run")
    )

In [58]:
# Calculate mean and std times for each dataset-model combination across all metrics
df_times = (
    df.groupby(["dataset_name", "model"])
    .agg({"best/elapsed_time": ["mean", "std"], "fit_model_return_elapsed_time": ["mean", "std"]})
    .rename(columns={"best/elapsed_time": "Best Time", "fit_model_return_elapsed_time": "HPO Time"})
)

# Flatten multiindex columns
df_times.columns = [' '.join(col).strip() for col in df_times.columns.values]
# Set the same index structure as df_metrics
df_times.index.names = ["Dataset", "Model"]

df_times["Best Time"] = (
	df_times["Best Time mean"].apply(lambda x: f"{x:4.3f}" if not pd.isna(x) else "No Run")
	+ " $\\pm$ " 
	+ df_times["Best Time std"].apply(lambda x: f"{x:4.3f}" if not pd.isna(x) else "No Run")
)
df_times["HPO Time"] = (
	df_times["HPO Time mean"].apply(lambda x: f"{x:4.3f}" if not pd.isna(x) else "No Run")
	+ " $\\pm$ "
	+ df_times["HPO Time std"].apply(lambda x: f"{x:4.3f}" if not pd.isna(x) else "No Run")
)

# Join with the existing df_metrics (verify we have the same number of rows!)
df_metrics = df_metrics.join(df_times, how="outer")

The following will provide the latex code for a clean table, we only need to make a little adjustement in the first line to delete the "key" and have only one header. For the longtable environment (full data) we need to add the "\*" at the end of lines we dont want to have a page break. We also should replace the entire begin{table} ... end{table} by begin{longtable} ... end{longtable} in the latex file, if you want to put caption and labels you should break the line after with '\\' (put both on the same line!)


In [59]:
df_latex = df_metrics.copy()
hpo_metrics_to_hide = [metric for metric in hpo_metrics_rename if metric.find("Rescaled") != -1] 
columns_to_hide = [col for col in df_latex.columns if col not in (hpo_metrics_rename + ["Best Time", "HPO Time"])]
columns_to_hide += hpo_metrics_to_hide
df_latex = df_latex.style.hide(columns_to_hide, axis=1)
for col in hpo_metrics_rename + ["Best Time", "HPO Time"]:
    highlight_metric = partial(highlight_max, column_name=f"{col} mean")
    underline_2nd_metric = partial(underline_2nd_max, column_name=f"{col} mean")
    if col in ["Davies-Bouldin", "Best Time", "HPO Time"]:
        highlight_metric = partial(highlight_min, column_name=f"{col} mean")
        underline_2nd_metric = partial(underline_2nd_min, column_name=f"{col} mean")
    (df_latex.apply(highlight_metric, subset=[col, f"{col} mean"], axis=None)
    .apply(underline_2nd_metric, subset=[col, f"{col} mean"], axis=None))

environment = 'longtable'
latex_output = df_latex.to_latex(
    hrules=True,
    clines="skip-last;data",
    convert_css=True,
    column_format="ll" + "l" * (len(df_latex.columns) - len(columns_to_hide)),
    environment=environment,
)

# fix header
columns = df_latex.index.names + [col for col in df_latex.columns if col not in columns_to_hide]
header_line = ' & '.join(columns) + r' \\'
latex_output = latex_output.splitlines()
if environment is None:
    # remove 3th and 4th line and replace with header_line
    latex_output = latex_output[:2] + [header_line] + latex_output[4:]
else:
    # remove 3rd and 4th line and 8th and 9th line and replace with header_line
    latex_output = latex_output[:2] + [header_line] + latex_output[4:7] + [header_line] + latex_output[9:]

latex_output = "\n".join(latex_output)
print(latex_output)

\begin{longtable}{llllllll}
\toprule
Dataset & Model & ARI & Calinski & Silhouette & Composite Metric & Best Time & HPO Time \\
\midrule
\endfirsthead
\toprule
Dataset & Model & ARI & Calinski & Silhouette & Composite Metric & Best Time & HPO Time \\
\midrule
\endhead
\midrule
\multicolumn{8}{r}{Continued on next page} \\
\midrule
\endfoot
\bottomrule
\endlastfoot
\multirow[c]{12}{*}{alizadeh-2000-v2} & CoHiRF & 0.871 $\pm$ 0.014 & \underline{15.094 $\pm$ 0.126} & 0.186 $\pm$ 0.006 & \underline{0.820 $\pm$ 0.003} & 0.078 $\pm$ 0.025 & 360.450 $\pm$ 12.657 \\
 & CoHiRF-DBSCAN & 0.000 $\pm$ 0.000 & 0.000 $\pm$ 0.000 & -1.000 $\pm$ 0.000 & 0.000 $\pm$ 0.000 & 1.891 $\pm$ 0.121 & \underline{358.777 $\pm$ 10.698} \\
 & CoHiRF-KernelRBF & 0.083 $\pm$ 0.034 & 1.569 $\pm$ 0.143 & 0.001 $\pm$ 0.012 & 0.229 $\pm$ 0.006 & 0.388 $\pm$ 0.318 & 409.038 $\pm$ 20.936 \\
 & CoHiRF-SC-SRGF-1R & \bfseries 0.947 $\pm$ 0.000 & 12.338 $\pm$ 0.000 & 0.194 $\pm$ 0.000 & 0.786 $\pm$ 0.000 & 2.913 $\pm$ 1.498 &

# per dataset per model group

In [60]:
df_latex = df_metrics.copy()
df_latex = df_latex.reset_index()
# reapply model groups
df_latex["Base Model"] = df_latex["Model"].apply(
    lambda x: next((group for group, models in model_groups.items() if x in models), "Other")
)
# redefine index with model_group
df_latex = df_latex.set_index(["Dataset", "Base Model", "Model"])
# sort by dataset, model_group, model
df_latex = df_latex.sort_index(level=["Dataset", "Base Model", "Model"])


# print per dataset
for dataset in df_latex.index.get_level_values("Dataset").unique():
    df_print = df_latex.copy()
    df_print = df_print.loc[dataset]
    hpo_metrics_to_hide = [metric for metric in hpo_metrics_rename if metric.find("Rescaled") != -1]
    columns_to_hide = [col for col in df_latex.columns if col not in (hpo_metrics_rename + ["Best Time", "HPO Time"])]
    columns_to_hide += hpo_metrics_to_hide
    df_print = df_print.style.hide(columns_to_hide, axis=1)
    for col in hpo_metrics_rename + ["Best Time", "HPO Time"]:
        highlight_metric = partial(highlight_max, column_name=f"{col} mean")
        underline_2nd_metric = partial(underline_2nd_max, column_name=f"{col} mean")
        if col in ["Davies-Bouldin", "Best Time", "HPO Time"]:
            highlight_metric = partial(highlight_min, column_name=f"{col} mean")
            underline_2nd_metric = partial(underline_2nd_min, column_name=f"{col} mean")
        (
            df_print.apply(highlight_metric, subset=[col, f"{col} mean"], axis=None).apply(
                underline_2nd_metric, subset=[col, f"{col} mean"], axis=None
            )
        )

    latex_output = df_print.to_latex(
        hrules=True,
        clines="skip-last;data",
        convert_css=True,
        column_format="ll" + "l" * (len(df_print.columns) - len(columns_to_hide)),
        # environment="longtable",
        caption=f"Clustering results on dataset {dataset}",
    )

    # fix header
    columns = df_print.index.names + [col for col in df_print.columns if col not in columns_to_hide]
    header_line = " & ".join(columns) + r" \\"

    # split into lines
    latex_output = latex_output.splitlines()
    # remove 5th and 6th line and replace with header_line
    latex_output = latex_output[:4] + [header_line] + latex_output[6:]
    # remove last cline
    latex_output = latex_output[:-4] + latex_output[-3:]

    latex_output = "\n".join(latex_output)

    print(latex_output)
    print("\n\n")

\begin{table}
\caption{Clustering results on dataset alizadeh-2000-v2}
\begin{tabular}{llllllll}
\toprule
Base Model & Model & ARI & Calinski & Silhouette & Composite Metric & Best Time & HPO Time \\
\midrule
\multirow[c]{3}{*}{DBSCAN} & CoHiRF-DBSCAN & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{-1.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & \underline{1.891 $\pm$ 0.121} & \bfseries 358.777 $\pm$ 10.698 \\
 & DBSCAN & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{-1.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries 0.072 $\pm$ 0.008 & \underline{364.587 $\pm$ 12.037} \\
 & R-CoHiRF-DBSCAN & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{-1.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & 3.367 $\pm$ 0.243 & 438.810 $\pm$ 19.708 \\
\cline{1-8}
\multirow[c]{3}{*}

In [None]:
df_latex

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,ARI mean,ARI std,Rescaled ARI mean,Rescaled ARI std,Calinski mean,Calinski std,Rescaled Calinski mean,Rescaled Calinski std,Silhouette mean,Silhouette std,...,Rescaled Calinski,Silhouette,Rescaled Silhouette,Composite Metric,Best Time mean,Best Time std,HPO Time mean,HPO Time std,Best Time,HPO Time
Dataset,Base Model,Model,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
alizadeh-2000-v2,DBSCAN,CoHiRF-DBSCAN,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000e+00,0.000000,0.000000e+00,-1.000000,0.000000,...,0.000 $\pm$ 0.000,-1.000 $\pm$ 0.000,0.000 $\pm$ 0.000,0.000 $\pm$ 0.000,1.891236,0.121433,358.777156,10.697893,1.891 $\pm$ 0.121,358.777 $\pm$ 10.698
alizadeh-2000-v2,DBSCAN,DBSCAN,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000e+00,0.000000,0.000000e+00,-1.000000,0.000000,...,0.000 $\pm$ 0.000,-1.000 $\pm$ 0.000,0.000 $\pm$ 0.000,0.000 $\pm$ 0.000,0.071820,0.007800,364.587429,12.037019,0.072 $\pm$ 0.008,364.587 $\pm$ 12.037
alizadeh-2000-v2,DBSCAN,R-CoHiRF-DBSCAN,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000e+00,0.000000,0.000000e+00,-1.000000,0.000000,...,0.000 $\pm$ 0.000,-1.000 $\pm$ 0.000,0.000 $\pm$ 0.000,0.000 $\pm$ 0.000,3.367055,0.242905,438.809734,19.707713,3.367 $\pm$ 0.243,438.810 $\pm$ 19.708
alizadeh-2000-v2,KMeans,CoHiRF,0.870727,0.013688,0.870727,0.013688,15.094428,1.258956e-01,0.800000,4.472136e-01,0.185935,0.006072,...,0.800 $\pm$ 0.447,0.186 $\pm$ 0.006,0.593 $\pm$ 0.003,0.755 $\pm$ 0.075,0.077761,0.025291,360.449912,12.657163,0.078 $\pm$ 0.025,360.450 $\pm$ 12.657
alizadeh-2000-v2,KMeans,KMeans,0.837874,0.013844,0.837874,0.013844,15.150730,3.323259e-15,1.000000,1.041955e-14,0.204196,0.035676,...,1.000 $\pm$ 0.000,0.204 $\pm$ 0.036,0.602 $\pm$ 0.018,0.813 $\pm$ 0.004,0.027972,0.003558,356.250373,8.688019,0.028 $\pm$ 0.004,356.250 $\pm$ 8.688
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
shuttle,KernelKMeans,CoHiRF-KernelRBF,0.581087,0.114397,0.581087,0.114397,11265.563356,3.001593e+03,0.469216,2.471653e-01,0.689508,0.241302,...,0.469 $\pm$ 0.247,0.690 $\pm$ 0.241,0.845 $\pm$ 0.121,0.632 $\pm$ 0.050,19.815364,12.709478,1954.295042,1229.403456,19.815 $\pm$ 12.709,1954.295 $\pm$ 1229.403
shuttle,KernelKMeans,KernelRBFKMeans,0.250069,0.083577,0.250069,0.083577,11742.147770,2.434781e+03,0.508461,2.004913e-01,0.389934,0.010019,...,0.508 $\pm$ 0.200,0.390 $\pm$ 0.010,0.695 $\pm$ 0.005,0.484 $\pm$ 0.036,1.701472,0.241054,1085.777835,1159.365950,1.701 $\pm$ 0.241,1085.778 $\pm$ 1159.366
shuttle,KernelKMeans,R-BatchCoHiRF-KernelRBF,0.372898,0.054661,0.372898,0.054661,8989.754481,1.375662e+03,0.281816,1.132785e-01,0.641444,0.244014,...,0.282 $\pm$ 0.113,0.641 $\pm$ 0.244,0.821 $\pm$ 0.122,0.492 $\pm$ 0.029,2.993204,0.945362,1257.914199,1231.635099,2.993 $\pm$ 0.945,1257.914 $\pm$ 1231.635
shuttle,KernelKMeans,R-CoHiRF-KernelRBF,0.552653,0.073841,0.552653,0.073841,13994.981173,2.466253e+03,0.693969,2.030829e-01,0.856269,0.079403,...,0.694 $\pm$ 0.203,0.856 $\pm$ 0.079,0.928 $\pm$ 0.040,0.725 $\pm$ 0.037,32.912177,17.803011,2791.128631,1315.003222,32.912 $\pm$ 17.803,2791.129 $\pm$ 1315.003


In [61]:
df_latex = df_metrics.copy()
df_latex = df_latex.reset_index()
datasets = ["alizadeh-2000-v2", "garber-2001", "bittner-2000", "nursery", "shuttle", "mnist", "coil-20", "chowdary-2006"]
df_latex = df_latex.loc[df_latex["Dataset"].isin(datasets)]
# reapply model groups
df_latex["Base Model"] = df_latex["Model"].apply(
    lambda x: next((group for group, models in model_groups.items() if x in models), "Other")
)
# redefine index with model_group
df_latex = df_latex.set_index(["Dataset", "Base Model", "Model"])
# sort by dataset, model_group, model
df_latex = df_latex.sort_index(level=["Dataset", "Base Model", "Model"])


# print per dataset
for dataset in df_latex.index.get_level_values("Dataset").unique():
    df_print = df_latex.copy()
    df_print = df_print.loc[dataset]
    hpo_metrics_to_hide = [metric for metric in hpo_metrics_rename if metric.find("Rescaled") != -1]
    columns_to_hide = [col for col in df_latex.columns if col not in (hpo_metrics_rename + ["Best Time", "HPO Time"])]
    columns_to_hide += hpo_metrics_to_hide
    df_print = df_print.style.hide(columns_to_hide, axis=1)
    for col in hpo_metrics_rename + ["Best Time", "HPO Time"]:
        highlight_metric = partial(highlight_max, column_name=f"{col} mean")
        underline_2nd_metric = partial(underline_2nd_max, column_name=f"{col} mean")
        if col in ["Davies-Bouldin", "Best Time", "HPO Time"]:
            highlight_metric = partial(highlight_min, column_name=f"{col} mean")
            underline_2nd_metric = partial(underline_2nd_min, column_name=f"{col} mean")
        (
            df_print.apply(highlight_metric, subset=[col, f"{col} mean"], axis=None).apply(
                underline_2nd_metric, subset=[col, f"{col} mean"], axis=None
            )
        )

    latex_output = df_print.to_latex(
        hrules=True,
        clines="skip-last;data",
        convert_css=True,
        column_format="ll" + "l" * (len(df_print.columns) - len(columns_to_hide)),
        # environment="longtable",
        caption=f"Clustering results on dataset {dataset}",
    )

    # fix header
    columns = df_print.index.names + [col for col in df_print.columns if col not in columns_to_hide]
    header_line = " & ".join(columns) + r" \\"

    # split into lines
    latex_output = latex_output.splitlines()
    # remove 5th and 6th line and replace with header_line
    latex_output = latex_output[:4] + [header_line] + latex_output[6:]
    # remove last cline
    latex_output = latex_output[:-4] + latex_output[-3:]

    latex_output = "\n".join(latex_output)

    print(latex_output)
    print("\n\n")

\begin{table}
\caption{Clustering results on dataset alizadeh-2000-v2}
\begin{tabular}{llllllll}
\toprule
Base Model & Model & ARI & Calinski & Silhouette & Composite Metric & Best Time & HPO Time \\
\midrule
\multirow[c]{3}{*}{DBSCAN} & CoHiRF-DBSCAN & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{-1.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & \underline{1.891 $\pm$ 0.121} & \bfseries 358.777 $\pm$ 10.698 \\
 & DBSCAN & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{-1.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries 0.072 $\pm$ 0.008 & \underline{364.587 $\pm$ 12.037} \\
 & R-CoHiRF-DBSCAN & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{-1.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & 3.367 $\pm$ 0.243 & 438.810 $\pm$ 19.708 \\
\cline{1-8}
\multirow[c]{3}{*}

In [None]:
df_metrics

Unnamed: 0_level_0,Unnamed: 1_level_0,ARI mean,ARI std,Rescaled ARI mean,Rescaled ARI std,Calinski mean,Calinski std,Rescaled Calinski mean,Rescaled Calinski std,Silhouette mean,Silhouette std,...,Rescaled Calinski,Silhouette,Rescaled Silhouette,Composite Metric,Best Time mean,Best Time std,HPO Time mean,HPO Time std,Best Time,HPO Time
Dataset,Model,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
alizadeh-2000-v2,CoHiRF,0.870727,0.013688,0.870727,0.013688,15.094428,1.258956e-01,0.800000,0.447214,0.185935,0.006072,...,0.800 $\pm$ 0.447,0.186 $\pm$ 0.006,0.593 $\pm$ 0.003,0.755 $\pm$ 0.075,0.077761,0.025291,360.449912,12.657163,0.078 $\pm$ 0.025,360.450 $\pm$ 12.657
alizadeh-2000-v2,CoHiRF-1000,0.870727,0.013688,0.870727,0.013688,15.094428,1.258956e-01,0.800000,0.447214,0.185935,0.006072,...,0.800 $\pm$ 0.447,0.186 $\pm$ 0.006,0.593 $\pm$ 0.003,0.755 $\pm$ 0.075,0.079105,0.024731,376.338298,29.726852,0.079 $\pm$ 0.025,376.338 $\pm$ 29.727
alizadeh-2000-v2,CoHiRF-DBSCAN,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000e+00,0.000000,0.000000,-1.000000,0.000000,...,0.000 $\pm$ 0.000,-1.000 $\pm$ 0.000,0.000 $\pm$ 0.000,0.000 $\pm$ 0.000,1.891236,0.121433,358.777156,10.697893,1.891 $\pm$ 0.121,358.777 $\pm$ 10.698
alizadeh-2000-v2,CoHiRF-KernelRBF,0.083405,0.034230,0.083405,0.034230,1.568841,1.426340e-01,0.168354,0.073935,0.001354,0.012104,...,0.168 $\pm$ 0.074,0.001 $\pm$ 0.012,0.501 $\pm$ 0.006,0.251 $\pm$ 0.014,0.388461,0.317894,409.038416,20.935751,0.388 $\pm$ 0.318,409.038 $\pm$ 20.936
alizadeh-2000-v2,CoHiRF-SC-SRGF,0.947128,0.000000,0.947128,0.000000,12.338328,1.986027e-15,0.800000,0.447214,0.194047,0.000000,...,0.800 $\pm$ 0.447,0.194 $\pm$ 0.000,0.597 $\pm$ 0.000,0.781 $\pm$ 0.075,2.913118,1.498119,463.230301,13.566887,2.913 $\pm$ 1.498,463.230 $\pm$ 13.567
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
shuttle,R-BatchCoHiRF-DBSCAN,0.710417,0.055361,0.710417,0.055361,1191.659904,1.367356e+02,0.030470,0.006230,0.915851,0.029745,...,0.030 $\pm$ 0.006,0.916 $\pm$ 0.030,0.958 $\pm$ 0.015,0.566 $\pm$ 0.010,13.462567,7.998126,1879.495971,1222.850442,13.463 $\pm$ 7.998,1879.496 $\pm$ 1222.850
shuttle,R-BatchCoHiRF-KernelRBF,0.372898,0.054661,0.372898,0.054661,8989.754481,1.375662e+03,0.281816,0.113278,0.641444,0.244014,...,0.282 $\pm$ 0.113,0.641 $\pm$ 0.244,0.821 $\pm$ 0.122,0.492 $\pm$ 0.029,2.993204,0.945362,1257.914199,1231.635099,2.993 $\pm$ 0.945,1257.914 $\pm$ 1231.635
shuttle,R-CoHiRF,0.645521,0.024557,0.645521,0.024557,20315.310691,1.808820e+02,0.548221,0.007472,0.969336,0.004637,...,0.548 $\pm$ 0.007,0.969 $\pm$ 0.005,0.985 $\pm$ 0.002,0.726 $\pm$ 0.004,20.328187,3.064537,2034.077139,1262.851535,20.328 $\pm$ 3.065,2034.077 $\pm$ 1262.852
shuttle,R-CoHiRF-DBSCAN,0.696130,0.014815,0.696130,0.014815,1291.725980,5.836486e+02,0.035029,0.026594,0.937522,0.031919,...,0.035 $\pm$ 0.027,0.938 $\pm$ 0.032,0.969 $\pm$ 0.016,0.567 $\pm$ 0.006,63.722125,28.394231,5543.052255,2328.999420,63.722 $\pm$ 28.394,5543.052 $\pm$ 2328.999


In [None]:
df_latex = df_metrics.copy()
df_latex = df_latex.reset_index()
# reapply model groups
df_latex['Base Model'] = df_latex['Model'].apply(lambda x: next((group for group, models in model_groups.items() if x in models), 'Other'))
# redefine index with model_group
df_latex = df_latex.set_index(['Dataset', 'Base Model', 'Model'])
# sort by dataset, model_group, model
df_latex = df_latex.sort_index(level=['Dataset', 'Base Model', 'Model'])


# print per dataset
for dataset in df_latex.index.get_level_values('Dataset').unique():
    df_print = df_latex.copy()
    df_print = df_print.loc[dataset]
    hpo_metrics_to_hide = [metric for metric in hpo_metrics_rename if metric.find("Rescaled") != -1] 
    columns_to_hide = [col for col in df_latex.columns if col not in (hpo_metrics_rename + ["Best Time", "HPO Time"])]
    columns_to_hide += hpo_metrics_to_hide
    df_print = df_print.style.hide(columns_to_hide, axis=1)
    for col in hpo_metrics_rename + ["Best Time", "HPO Time"]:
        highlight_metric = partial(highlight_max, column_name=f"{col} mean")
        underline_2nd_metric = partial(underline_2nd_max, column_name=f"{col} mean")
        if col in ["Davies-Bouldin", "Best Time", "HPO Time"]:
            highlight_metric = partial(highlight_min, column_name=f"{col} mean")
            underline_2nd_metric = partial(underline_2nd_min, column_name=f"{col} mean")
        (
            df_print.apply(highlight_metric, subset=[col, f"{col} mean"], axis=None).apply(
                underline_2nd_metric, subset=[col, f"{col} mean"], axis=None
            )
        )

    df_print = df_print.hide(level=0, axis=0)
    latex_output = (
        df_print.to_latex(
            hrules=True,
            clines="skip-last;data",
            convert_css=True,
            column_format="ll" + "l" * (len(df_print.columns) - len(columns_to_hide)),
            # environment="longtable",
            caption=f"Clustering results on dataset {dataset}",
        )
    )

    # fix header
    columns = df_print.index.names[:1] + [col for col in df_print.columns if col not in columns_to_hide]
    header_line = ' & '.join(columns) + r' \\'

    # split into lines
    latex_output = latex_output.splitlines()
    # remove 5th and 6th line and replace with header_line
    latex_output = latex_output[:4] + [header_line] + latex_output[6:]
    latex_output = '\n'.join(latex_output)

    # manually add clines after model groups
    model_groups_in_data = df_print.index.get_level_values('Base Model').unique().tolist()
    lines = latex_output.splitlines()
    new_lines = []
    last_line = ""
    for i, line in enumerate(lines[6:-3]): # skip first 6 lines and last 3 lines
        model_last_line = last_line.split('&')[0].strip()
        model_current_line = line.split('&')[0].strip()
        model_group_last_line = next((group for group, models in model_groups.items() if model_last_line in models), 'Other')
        model_group_current_line = next((group for group, models in model_groups.items() if model_current_line in models), 'Other')
        if model_group_last_line != model_group_current_line and i != 0:
            new_lines.append(r'\cline{' + f'1-{len(columns)}' + r'}')
        new_lines.append(line)
        last_line = line

    latex_output = '\n'.join(lines[:6] + new_lines + lines[-3:])

    print(latex_output)
    print("\n\n")
    print("\pagebreak")

\begin{table}
\caption{Clustering results on dataset alizadeh-2000-v2}
\begin{tabular}{llllllll}
\toprule
Base Model & ARI & Calinski & Silhouette & Composite Metric & Best Time & HPO Time \\
\midrule
CoHiRF-DBSCAN & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{-1.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & \underline{1.891 $\pm$ 0.121} & \bfseries 358.777 $\pm$ 10.698 \\
DBSCAN & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{-1.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries 0.072 $\pm$ 0.008 & \underline{364.587 $\pm$ 12.037} \\
R-CoHiRF-DBSCAN & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & \bfseries \underline{-1.000 $\pm$ 0.000} & \bfseries \underline{0.000 $\pm$ 0.000} & 3.367 $\pm$ 0.243 & 438.810 $\pm$ 19.708 \\
\cline{1-7}
CoHiRF & \underline{0.871 $\pm$ 0.014} & \underline{15.094 $\

# Dataset selection in one table

In [38]:
model_names = {
    "BatchCoHiRF-1iter-random-60": "BatchCoHiRF",
    # "BatchCoHiRF-1iter-random-nolaststop-60": "BatchCoHiRF-nolaststop",
    "BatchCoHiRF-1iter-random-top-down-60": "R-BatchCoHiRF",
    # "BatchCoHiRF-1iter-random-top-down-nolaststop-60": "R-BatchCoHiRF-nolaststop",
    "BatchCoHiRF-DBSCAN-1iter-random-60": "BatchCoHiRF-DBSCAN",
    # "BatchCoHiRF-DBSCAN-1iter-random-nolaststop-60": "BatchCoHiRF-DBSCAN-nolaststop",
    "BatchCoHiRF-DBSCAN-1iter-random-top-down-60": "R-BatchCoHiRF-DBSCAN",
    # "BatchCoHiRF-DBSCAN-1iter-random-top-down-nolaststop-60": "R-BatchCoHiRF-DBSCAN-nolaststop",
    "BatchCoHiRF-KernelRBF-1iter-random-60": "BatchCoHiRF-KernelRBF",
    # "BatchCoHiRF-KernelRBF-1iter-random-nolaststop-60": "BatchCoHiRF-KernelRBF-nolaststop",
    "BatchCoHiRF-KernelRBF-1iter-random-top-down-60": "R-BatchCoHiRF-KernelRBF",
    # "BatchCoHiRF-KernelRBF-1iter-random-top-down-nolaststop-60": "R-BatchCoHiRF-KernelRBF-nolaststop",
    "BatchCoHiRF-SC-SRGF-1R-1iter-random-60": "BatchCoHiRF-SC-SRGF",
    # "BatchCoHiRF-SC-SRGF-1R-1iter-random-nolaststop-60": "BatchCoHiRF-SC-SRGF-nolaststop",
    # "BatchCoHiRF-SC-SRGF-2R-1iter-random-60": "BatchCoHiRF-SC-SRGF-2R",
    # "BatchCoHiRF-SC-SRGF-2R-1iter-random-nolaststop-60": "BatchCoHiRF-SC-SRGF-2R-nolaststop",
    "CoHiRF-1000-60": "CoHiRF-1000",
    "CoHiRF-60": "CoHiRF",
    "CoHiRF-top-down-60": "R-CoHiRF",
    "CoHiRF-DBSCAN-60": "CoHiRF-DBSCAN",
    "CoHiRF-DBSCAN-top-down-60": "R-CoHiRF-DBSCAN",
    "CoHiRF-KernelRBF-60": "CoHiRF-KernelRBF",
    "CoHiRF-KernelRBF-top-down-60": "R-CoHiRF-KernelRBF",
    "CoHiRF-SC-SRGF-1R-60": "CoHiRF-SC-SRGF",
    # "CoHiRF-SC-SRGF-2R-60": "CoHiRF-SC-SRGF-2R",
    "DBSCAN-60": "DBSCAN",
    "KMeans-60": "KMeans",
    "KernelRBFKMeans-60": "KernelRBFKMeans",
    "SpectralSubspaceRandomization-60": "SC-SRGF",
}

dataset_names = {
    "binary_alpha_digits": "binary-alpha-digits",
    "mnist_784": "mnist",
}  # otherwise we get an error in latex

dataset_id = [
    61,
    46773,
    46776,
    46778,
    46779,
    46782,
    46783,
    554,
    40685,
    1568,
    47039,
]

hpo_metrics = [
    "adjusted_rand",
    "calinski_harabasz_score",
    "silhouette",
]

# Filter to only standardized runs
df = df_runs_parents.copy()
df = df.loc[df["standardize"] == True]
df = df.loc[df["model"].isin(model_names.keys())]
df = df.loc[df["dataset_id"].isin(dataset_id)]
df = df.loc[df["hpo_metric"].isin(hpo_metrics)]
df = df.replace({"model": model_names})
df = df.replace({"dataset_name": dataset_names})

# Filter to only runs with hpo_seed in range(5)
df = df.loc[df["hpo_seed"].isin(range(5))]

# Filter to only show batch methods for datasets with more than 1000 instances
df = df.loc[~((df["n_instances"] < 1000) & (df["model"].str.find("Batch") != -1))]

# define group of models
model_groups = {
    "KMeans": [
        "KMeans",
        "CoHiRF",
        "R-CoHiRF",
        "CoHiRF-1000",
        "BatchCoHiRF",
        "R-BatchCoHiRF",
        "BatchCoHiRF-nolaststop",
        "R-BatchCoHiRF-nolaststop",
    ],
    "KernelKMeans": [
        "KernelRBFKMeans",
        "CoHiRF-KernelRBF",
        "R-CoHiRF-KernelRBF",
        "BatchCoHiRF-KernelRBF",
        "R-BatchCoHiRF-KernelRBF",
        "BatchCoHiRF-KernelRBF-nolaststop",
        "R-BatchCoHiRF-KernelRBF-nolaststop",
    ],
    "DBSCAN": [
        "DBSCAN",
        "CoHiRF-DBSCAN",
        "R-CoHiRF-DBSCAN",
        "BatchCoHiRF-DBSCAN",
        "R-BatchCoHiRF-DBSCAN",
        "BatchCoHiRF-DBSCAN-nolaststop",
        "R-BatchCoHiRF-DBSCAN-nolaststop",
    ],
    "SC-SRGF": [
        "SC-SRGF",
        "CoHiRF-SC-SRGF",
        "CoHiRF-SC-SRGF-1R",
        "CoHiRF-SC-SRGF-2R",
        "BatchCoHiRF-SC-SRGF",
        "BatchCoHiRF-SC-SRGF-2R",
        "BatchCoHiRF-SC-SRGF-nolaststop",
        "BatchCoHiRF-SC-SRGF-2R-nolaststop",
    ],
}
df["model_group"] = df["model"].apply(
    lambda x: next((group for group, models in model_groups.items() if x in models), "Other")
)


# re-scale some metrics and build composite metric
# re-scale ari to be between 0 and 1 (originally between -0.5 and 1), by considering everything below 0 as 0
df["best/adjusted_rand_rescaled"] = df["best/adjusted_rand"].apply(lambda x: 0.0 if x < 0 else x)

# re-scale silhouette to be between 0 and 1 (originally between -1 and 1)
df["best/silhouette_rescaled"] = (df["best/silhouette"] - (-1)) / (1 - (-1))

# re-scale calinski to be between 0 and 1 normalized by dataset, model_group and hpo_metric
# replace calinksi -1.0 with 0.0
df["best/calinski_harabasz_score"] = df["best/calinski_harabasz_score"].replace(-1.0, 0.0)
# df["best/calinski_harabasz_score_rescaled"] = df["best/calinski_harabasz_score"].replace(-1.0, 0.0)
df["best/calinski_harabasz_score_rescaled"] = df.groupby(["dataset_id", "model_group", "hpo_metric"])[
    "best/calinski_harabasz_score"
].transform(lambda x: (x - x.min()) / (x.max() - x.min()) if x.max() != x.min() else (0.0 if x.max() == 0 else 1.0))

In [39]:
hpo_metrics = [
    # "adjusted_rand",
    # "adjusted_mutual_info",
    # "calinski_harabasz_score",
    # "silhouette",
    # "davies_bouldin_score",
    # "normalized_mutual_info",
    "adjusted_rand",
    "adjusted_rand_rescaled",
    "calinski_harabasz_score",
    "calinski_harabasz_score_rescaled",
    "silhouette",
    "silhouette_rescaled",
]

hpo_metrics_rename = [
    # "ARI",
    # "AMI",
    # "Calinski",
    # "Silhouette",
    # "Davies-Bouldin",
    # "NMI",
    "ARI",
    "Rescaled ARI",
    "Calinski",
    "Rescaled Calinski",
    "Silhouette",
    "Rescaled Silhouette",
]

dfs_metrics = {}

for hpo_metric, hpo_metric_rename in zip(hpo_metrics, hpo_metrics_rename):
    if hpo_metric.find("_rescaled") != -1:
        original_metric = hpo_metric.replace("_rescaled", "")
    else:
        original_metric = hpo_metric
    df_metric = df.loc[df["hpo_metric"] == original_metric][
        ["dataset_name", "model", "hpo_seed", f"best/{hpo_metric}"]
    ].rename(columns={f"best/{hpo_metric}": hpo_metric_rename})
    df_metric = df_metric.dropna(subset=[hpo_metric_rename])
    df_metric = df_metric.set_index(["dataset_name", "model", "hpo_seed"])
    df_metric = df_metric.astype({hpo_metric_rename: float})
    dfs_metrics[hpo_metric_rename] = df_metric

df_metrics = pd.concat(dfs_metrics.values(), axis=1, join="outer")
df_metrics = df_metrics.reset_index()

# calculate mean and std
df_metrics = df_metrics.groupby(["dataset_name", "model"]).agg(["mean", "std"])
# flatten multiindex columns
df_metrics.columns = [" ".join(col).strip() for col in df_metrics.columns.values]
# drop hpo_seed level
df_metrics = df_metrics.drop(columns=["hpo_seed mean", "hpo_seed std"])
# Rename index levels
df_metrics.index.names = ["Dataset", "Model"]

# create a composite metric as the average of the metrics
df_metrics["Composite mean"] = df_metrics[
    [f"{metric} mean" for metric in hpo_metrics_rename if "Rescaled" in metric]
].mean(axis=1)
df_metrics["Composite std"] = (
    1
    / len(hpo_metrics_rename)
    * (df_metrics[[f"{metric} std" for metric in hpo_metrics_rename if "Rescaled" in metric]] ** 2).sum(axis=1) ** 0.5
)
hpo_metrics_rename.append("Composite")


for metric in hpo_metrics_rename:
    df_metrics[f"{metric}"] = (
        df_metrics[f"{metric} mean"].apply(lambda x: f"{x:.2f}" if not pd.isna(x) else "No Run")
        + " $\\pm$ "
        + df_metrics[f"{metric} std"].apply(lambda x: f"{x:.2f}" if not pd.isna(x) else "No Run")
    )

In [40]:
# Calculate mean and std times for each dataset-model combination across all metrics
df_times = (
    df.groupby(["dataset_name", "model"])
    .agg({"best/elapsed_time": ["mean", "std"], "fit_model_return_elapsed_time": ["mean", "std"]})
    .rename(columns={"best/elapsed_time": "Time (s)", "fit_model_return_elapsed_time": "HPO Time"})
)

# Flatten multiindex columns
df_times.columns = [' '.join(col).strip() for col in df_times.columns.values]
# Set the same index structure as df_metrics
df_times.index.names = ["Dataset", "Model"]

df_times["Time (s)"] = (
	df_times["Time (s) mean"].apply(lambda x: f"{x:4.2f}" if not pd.isna(x) else "No Run")
	+ " $\\pm$ " 
	+ df_times["Time (s) std"].apply(lambda x: f"{x:4.2f}" if not pd.isna(x) else "No Run")
)
df_times["HPO Time"] = (
	df_times["HPO Time mean"].apply(lambda x: f"{x:4.2f}" if not pd.isna(x) else "No Run")
	+ " $\\pm$ "
	+ df_times["HPO Time std"].apply(lambda x: f"{x:4.2f}" if not pd.isna(x) else "No Run")
)

# Join with the existing df_metrics (verify we have the same number of rows!)
df_metrics = df_metrics.join(df_times, how="outer")

In [42]:
dataset_models = {
	"mnist": {"KMeans" : ["KMeans", "CoHiRF", "R-CoHiRF", "BatchCoHiRF", "R-BatchCoHiRF"]},
	"shuttle": {"DBSCAN" : ["DBSCAN", "CoHiRF-DBSCAN", "R-CoHiRF-DBSCAN", "BatchCoHiRF-DBSCAN", "R-BatchCoHiRF-DBSCAN"]},
	"binary-alpha-digits": {"SC-SRGF" : ["SC-SRGF", "CoHiRF-SC-SRGF"]},
	"chowdary-2006": {"SC-SRGF" : ["SC-SRGF", "CoHiRF-SC-SRGF"]},
}
df = df_metrics.copy()
df = df.reset_index()
# reapply model groups
df['Base Model'] = df['Model'].apply(lambda x: next((group for group, models in model_groups.items() if x in models), 'Other'))
df_table = []
for dataset, models_dict in dataset_models.items():
    for base_model, models in models_dict.items():
        df_subset = df.loc[(df['Dataset'] == dataset) & (df['Model'].isin(models))]
        df_table.append(df_subset)
df_table = pd.concat(df_table, axis=0)
# create column with Dataset - Model Group
df_table["Dataset / Base Model"] = (
    "\parbox{2cm}{\centering " + df_table["Dataset"] + "\\\\" + df_table["Base Model"] + "}"
)
df_table = df_table.set_index(["Dataset / Base Model", "Model"])
df_table = df_table.sort_index(level=["Dataset / Base Model", "Model"])
hpo_metrics_to_hide = [metric for metric in hpo_metrics_rename if metric.find("Rescaled") != -1]
columns_to_hide = [col for col in df_table.columns if col not in (hpo_metrics_rename + ["Time (s)"])]
columns_to_hide += hpo_metrics_to_hide
df_table = df_table.style.hide(columns_to_hide, axis=1)
for col in hpo_metrics_rename + ["Time (s)"]:
    highlight_metric = partial(highlight_max, column_name=f"{col} mean")
    underline_2nd_metric = partial(underline_2nd_max, column_name=f"{col} mean")
    if col in ["Davies-Bouldin", "Time (s)"]:
        highlight_metric = partial(highlight_min, column_name=f"{col} mean")
        underline_2nd_metric = partial(underline_2nd_min, column_name=f"{col} mean")
    (df_table.apply(highlight_metric, subset=[col, f"{col} mean"], axis=None)
    .apply(underline_2nd_metric, subset=[col, f"{col} mean"], axis=None))

latex_output = df_table.to_latex(
    hrules=True,
    clines="skip-last;data",
    convert_css=True,
    column_format="ll" + "l" * (len(df_table.columns) - len(columns_to_hide)),
    # environment="longtable",
    caption=f"Clustering results on real-world datasets.",
    # label="tab:clustering_real_world_datasets",
)

# fix header
columns = df_table.index.names + [col for col in df_table.columns if col not in columns_to_hide]
header_line = ' & '.join(columns) + r' \\'

# split into lines
latex_output = latex_output.splitlines()
# remove 5th and 6th line and replace with header_line
latex_output = latex_output[:4] + [header_line] + latex_output[6:]
# remove last cline
latex_output = latex_output[:-4] + latex_output[-3:]
# add \fontsize{5}{10}\selectfont to table
latex_output.insert(2, r'\fontsize{5}{10}\selectfont')
latex_output = "\n".join(latex_output)


print(latex_output)

\begin{table}
\caption{Clustering results on real-world datasets.}
\fontsize{5}{10}\selectfont
\begin{tabular}{lllllll}
\toprule
Dataset / Base Model & Model & ARI & Calinski & Silhouette & Composite & Time (s) \\
\midrule
\multirow[c]{2}{*}{\parbox{2cm}{\centering binary-alpha-digits\\SC-SRGF}} & CoHiRF-SC-SRGF & \underline{0.09 $\pm$ 0.00} & \bfseries 64.00 $\pm$ 3.24 & \bfseries 0.09 $\pm$ 0.00 & \bfseries 0.40 $\pm$ 0.06 & \bfseries 93.67 $\pm$ 72.52 \\
 & SC-SRGF & \bfseries 0.31 $\pm$ 0.01 & \underline{60.84 $\pm$ 2.53} & \underline{0.09 $\pm$ 0.00} & \underline{0.37 $\pm$ 0.04} & \underline{97.15 $\pm$ 65.47} \\
\cline{1-7}
\multirow[c]{2}{*}{\parbox{2cm}{\centering chowdary-2006\\SC-SRGF}} & CoHiRF-SC-SRGF & \bfseries \underline{0.92 $\pm$ 0.00} & \bfseries 27.74 $\pm$ 0.03 & \underline{0.17 $\pm$ 0.00} & \bfseries 0.84 $\pm$ 0.00 & \underline{1.43 $\pm$ 1.25} \\
 & SC-SRGF & \bfseries \underline{0.92 $\pm$ 0.00} & \underline{23.01 $\pm$ 4.97} & \bfseries 0.22 $\pm$ 0.01 & \und

# Iris with KMeans, KernelRBFKMeans

In [145]:
model_names = {
    "BatchCoHiRF-1iter-random-60": "BatchCoHiRF",
    # "BatchCoHiRF-1iter-random-nolaststop-60": "BatchCoHiRF-nolaststop",
    "BatchCoHiRF-1iter-random-top-down-60": "R-BatchCoHiRF",
    # "BatchCoHiRF-1iter-random-top-down-nolaststop-60": "R-BatchCoHiRF-nolaststop",
    "BatchCoHiRF-DBSCAN-1iter-random-60": "BatchCoHiRF-DBSCAN",
    # "BatchCoHiRF-DBSCAN-1iter-random-nolaststop-60": "BatchCoHiRF-DBSCAN-nolaststop",
    "BatchCoHiRF-DBSCAN-1iter-random-top-down-60": "R-BatchCoHiRF-DBSCAN",
    # "BatchCoHiRF-DBSCAN-1iter-random-top-down-nolaststop-60": "R-BatchCoHiRF-DBSCAN-nolaststop",
    "BatchCoHiRF-KernelRBF-1iter-random-60": "BatchCoHiRF-KernelRBF",
    # "BatchCoHiRF-KernelRBF-1iter-random-nolaststop-60": "BatchCoHiRF-KernelRBF-nolaststop",
    "BatchCoHiRF-KernelRBF-1iter-random-top-down-60": "R-BatchCoHiRF-KernelRBF",
    # "BatchCoHiRF-KernelRBF-1iter-random-top-down-nolaststop-60": "R-BatchCoHiRF-KernelRBF-nolaststop",
    "BatchCoHiRF-SC-SRGF-1R-1iter-random-60": "BatchCoHiRF-SC-SRGF",
    # "BatchCoHiRF-SC-SRGF-1R-1iter-random-nolaststop-60": "BatchCoHiRF-SC-SRGF-nolaststop",
    # "BatchCoHiRF-SC-SRGF-2R-1iter-random-60": "BatchCoHiRF-SC-SRGF-2R",
    # "BatchCoHiRF-SC-SRGF-2R-1iter-random-nolaststop-60": "BatchCoHiRF-SC-SRGF-2R-nolaststop",
    "CoHiRF-1000-60": "CoHiRF-1000",
    "CoHiRF-60": "CoHiRF",
    "CoHiRF-top-down-60": "R-CoHiRF",
    "CoHiRF-DBSCAN-60": "CoHiRF-DBSCAN",
    "CoHiRF-DBSCAN-top-down-60": "R-CoHiRF-DBSCAN",
    "CoHiRF-KernelRBF-60": "CoHiRF-KernelRBF",
    "CoHiRF-KernelRBF-top-down-60": "R-CoHiRF-KernelRBF",
    "CoHiRF-SC-SRGF-1R-60": "CoHiRF-SC-SRGF",
    # "CoHiRF-SC-SRGF-2R-60": "CoHiRF-SC-SRGF-2R",
    "DBSCAN-60": "DBSCAN",
    "KMeans-60": "KMeans",
    "KernelRBFKMeans-60": "KernelRBFKMeans",
    "SpectralSubspaceRandomization-60": "SC-SRGF",
}

dataset_names = {
    "binary_alpha_digits": "binary-alpha-digits",
    "mnist_784": "mnist",
}  # otherwise we get an error in latex

dataset_id = [
    61,
    46773,
    46776,
    46778,
    46779,
    46782,
    46783,
    554,
    40685,
    1568,
    47039,
]

hpo_metrics = [
    "adjusted_rand",
    "calinski_harabasz_score",
    "silhouette",
]

# Filter to only standardized runs
df = df_runs_parents.copy()
df = df.loc[df["standardize"] == True]
df = df.loc[df["model"].isin(model_names.keys())]
df = df.loc[df["dataset_id"].isin(dataset_id)]
df = df.loc[df["hpo_metric"].isin(hpo_metrics)]
df = df.replace({"model": model_names})
df = df.replace({"dataset_name": dataset_names})

# Filter to only runs with hpo_seed in range(5)
df = df.loc[df["hpo_seed"].isin(range(5))]

# Filter to only show batch methods for datasets with more than 1000 instances
df = df.loc[~((df["n_instances"] < 1000) & (df["model"].str.find("Batch") != -1))]

# define group of models
model_groups = {
    "KMeans": [
        "KMeans",
        "CoHiRF",
        "R-CoHiRF",
        "CoHiRF-1000",
        "BatchCoHiRF",
        "R-BatchCoHiRF",
        "BatchCoHiRF-nolaststop",
        "R-BatchCoHiRF-nolaststop",
        "KernelRBFKMeans",
        "CoHiRF-KernelRBF",
        "R-CoHiRF-KernelRBF",
        "BatchCoHiRF-KernelRBF",
        "R-BatchCoHiRF-KernelRBF",
        "BatchCoHiRF-KernelRBF-nolaststop",
        "R-BatchCoHiRF-KernelRBF-nolaststop",
    ],
    "DBSCAN": [
        "DBSCAN",
        "CoHiRF-DBSCAN",
        "R-CoHiRF-DBSCAN",
        "BatchCoHiRF-DBSCAN",
        "R-BatchCoHiRF-DBSCAN",
        "BatchCoHiRF-DBSCAN-nolaststop",
        "R-BatchCoHiRF-DBSCAN-nolaststop",
    ],
    "SC-SRGF": [
        "SC-SRGF",
        "CoHiRF-SC-SRGF",
        "CoHiRF-SC-SRGF-1R",
        "CoHiRF-SC-SRGF-2R",
        "BatchCoHiRF-SC-SRGF",
        "BatchCoHiRF-SC-SRGF-2R",
        "BatchCoHiRF-SC-SRGF-nolaststop",
        "BatchCoHiRF-SC-SRGF-2R-nolaststop",
    ],
}
df["model_group"] = df["model"].apply(
    lambda x: next((group for group, models in model_groups.items() if x in models), "Other")
)


# re-scale some metrics and build composite metric
# re-scale ari to be between 0 and 1 (originally between -0.5 and 1), by considering everything below 0 as 0
df["best/adjusted_rand_rescaled"] = df["best/adjusted_rand"].apply(lambda x: 0.0 if x < 0 else x)

# re-scale silhouette to be between 0 and 1 (originally between -1 and 1)
df["best/silhouette_rescaled"] = (df["best/silhouette"] - (-1)) / (1 - (-1))

# re-scale calinski to be between 0 and 1 normalized by dataset, model_group and hpo_metric
# replace calinksi -1.0 with 0.0
df["best/calinski_harabasz_score"] = df["best/calinski_harabasz_score"].replace(-1.0, 0.0)
# df["best/calinski_harabasz_score_rescaled"] = df["best/calinski_harabasz_score"].replace(-1.0, 0.0)
df["best/calinski_harabasz_score_rescaled"] = df.groupby(["dataset_id", "model_group", "hpo_metric"])[
    "best/calinski_harabasz_score"
].transform(lambda x: (x - x.min()) / (x.max() - x.min()) if x.max() != x.min() else (0.0 if x.max() == 0 else 1.0))

In [146]:
hpo_metrics = [
    # "adjusted_rand",
    # "adjusted_mutual_info",
    # "calinski_harabasz_score",
    # "silhouette",
    # "davies_bouldin_score",
    # "normalized_mutual_info",
    "adjusted_rand",
    "adjusted_rand_rescaled",
    "calinski_harabasz_score",
    "calinski_harabasz_score_rescaled",
    "silhouette",
    "silhouette_rescaled",
]

hpo_metrics_rename = [
    # "ARI",
    # "AMI",
    # "Calinski",
    # "Silhouette",
    # "Davies-Bouldin",
    # "NMI",
    "ARI",
    "Rescaled ARI",
    "Calinski",
    "Rescaled Calinski",
    "Silhouette",
    "Rescaled Silhouette",
]

dfs_metrics = {}

for hpo_metric, hpo_metric_rename in zip(hpo_metrics, hpo_metrics_rename):
    if hpo_metric.find("_rescaled") != -1:
        original_metric = hpo_metric.replace("_rescaled", "")
    else:
        original_metric = hpo_metric
    df_metric = df.loc[df["hpo_metric"] == original_metric][
        ["dataset_name", "model", "hpo_seed", f"best/{hpo_metric}"]
    ].rename(columns={f"best/{hpo_metric}": hpo_metric_rename})
    df_metric = df_metric.dropna(subset=[hpo_metric_rename])
    df_metric = df_metric.set_index(["dataset_name", "model", "hpo_seed"])
    df_metric = df_metric.astype({hpo_metric_rename: float})
    dfs_metrics[hpo_metric_rename] = df_metric

df_metrics = pd.concat(dfs_metrics.values(), axis=1, join="outer")
df_metrics = df_metrics.reset_index()

# calculate mean and std
df_metrics = df_metrics.groupby(["dataset_name", "model"]).agg(["mean", "std"])
# flatten multiindex columns
df_metrics.columns = [" ".join(col).strip() for col in df_metrics.columns.values]
# drop hpo_seed level
df_metrics = df_metrics.drop(columns=["hpo_seed mean", "hpo_seed std"])
# Rename index levels
df_metrics.index.names = ["Dataset", "Model"]

# create a composite metric as the average of the metrics
df_metrics["Composite Metric mean"] = df_metrics[
    [f"{metric} mean" for metric in hpo_metrics_rename if "Rescaled" in metric]
].mean(axis=1)
df_metrics["Composite Metric std"] = (
    1
    / len(hpo_metrics_rename)
    * (df_metrics[[f"{metric} std" for metric in hpo_metrics_rename if "Rescaled" in metric]] ** 2).sum(axis=1) ** 0.5
)
hpo_metrics_rename.append("Composite Metric")


for metric in hpo_metrics_rename:
    df_metrics[f"{metric}"] = (
        df_metrics[f"{metric} mean"].apply(lambda x: f"{x:.3f}" if not pd.isna(x) else "No Run")
        + " $\\pm$ "
        + df_metrics[f"{metric} std"].apply(lambda x: f"{x:.3f}" if not pd.isna(x) else "No Run")
    )

In [147]:
df_times = (
# Calculate mean and std times for each dataset-model combination across all metrics
    df.groupby(["dataset_name", "model"])
    .agg({"best/elapsed_time": ["mean", "std"], "fit_model_return_elapsed_time": ["mean", "std"]})
    .rename(columns={"best/elapsed_time": "Best Time", "fit_model_return_elapsed_time": "HPO Time"})
)

# Flatten multiindex columns
df_times.columns = [" ".join(col).strip() for col in df_times.columns.values]
# Set the same index structure as df_metrics
df_times.index.names = ["Dataset", "Model"]

df_times["Best Time"] = (
    df_times["Best Time mean"].apply(lambda x: f"{x:4.3f}" if not pd.isna(x) else "No Run")
    + " $\\pm$ "
    + df_times["Best Time std"].apply(lambda x: f"{x:4.3f}" if not pd.isna(x) else "No Run")
)
df_times["HPO Time"] = (
    df_times["HPO Time mean"].apply(lambda x: f"{x:4.3f}" if not pd.isna(x) else "No Run")
    + " $\\pm$ "
    + df_times["HPO Time std"].apply(lambda x: f"{x:4.3f}" if not pd.isna(x) else "No Run")
)

# Join with the existing df_metrics (verify we have the same number of rows!)
df_metrics = df_metrics.join(df_times, how="outer")

In [148]:
df_latex = df_metrics.copy()
df_latex = df_latex.reset_index()
# reapply model groups
df_latex["Base Model"] = df_latex["Model"].apply(
    lambda x: next((group for group, models in model_groups.items() if x in models), "Other")
)
df_latex = df_latex.loc[df_latex["Base Model"] == "KMeans"]
df_latex = df_latex.loc[df_latex["Dataset"] == "iris"]
df_latex = df_latex.loc[df_latex["Model"] != "CoHiRF-1000"]
# redefine index with model_group
df_latex = df_latex.set_index(["Dataset", "Base Model", "Model"])
# sort by dataset, model_group, model
df_latex = df_latex.sort_index(level=["Dataset", "Base Model", "Model"])


# print per dataset
for dataset in df_latex.index.get_level_values("Dataset").unique():
    df_print = df_latex.copy()
    df_print = df_print.loc[dataset]
    hpo_metrics_to_hide = [metric for metric in hpo_metrics_rename if metric.find("Rescaled") != -1]
    columns_to_hide = [col for col in df_latex.columns if col not in (hpo_metrics_rename)]
    columns_to_hide += hpo_metrics_to_hide
    df_print = df_print.style.hide(columns_to_hide, axis=1)
    for col in hpo_metrics_rename:
        highlight_metric = partial(highlight_max, column_name=f"{col} mean")
        underline_2nd_metric = partial(underline_2nd_max, column_name=f"{col} mean")
        if col in ["Davies-Bouldin", "Best Time", "HPO Time"]:
            highlight_metric = partial(highlight_min, column_name=f"{col} mean")
            underline_2nd_metric = partial(underline_2nd_min, column_name=f"{col} mean")
        (
            df_print.apply(highlight_metric, subset=[col, f"{col} mean"], axis=None).apply(
                underline_2nd_metric, subset=[col, f"{col} mean"], axis=None
            )
        )

    df_print = df_print.hide(level=0, axis=0)
    latex_output = df_print.to_latex(
        hrules=True,
        clines="skip-last;data",
        convert_css=True,
        column_format="ll" + "l" * (len(df_print.columns) - len(columns_to_hide)),
        # environment="longtable",
        caption=f"Clustering results on dataset {dataset}",
    )

    # fix header
    columns = df_print.index.names[1:] + [col for col in df_print.columns if col not in columns_to_hide]
    header_line = " & ".join(columns) + r" \\"

    # split into lines
    latex_output = latex_output.splitlines()
    # remove 5th and 6th line and replace with header_line
    latex_output = latex_output[:4] + [header_line] + latex_output[6:]
    latex_output = "\n".join(latex_output)

    # manually add clines after model groups
    model_groups_in_data = df_print.index.get_level_values("Base Model").unique().tolist()
    lines = latex_output.splitlines()
    new_lines = []
    last_line = ""
    for i, line in enumerate(lines[6:-3]):  # skip first 6 lines and last 3 lines
        model_last_line = last_line.split("&")[0].strip()
        model_current_line = line.split("&")[0].strip()
        model_group_last_line = next(
            (group for group, models in model_groups.items() if model_last_line in models), "Other"
        )
        model_group_current_line = next(
            (group for group, models in model_groups.items() if model_current_line in models), "Other"
        )
        if model_group_last_line != model_group_current_line and i != 0:
            new_lines.append(r"\cline{" + f"1-{len(columns)}" + r"}")
        new_lines.append(line)
        last_line = line

    latex_output = "\n".join(lines[:6] + new_lines + lines[-3:])

    print(latex_output)
    print("\n\n")

\begin{table}
\caption{Clustering results on dataset iris}
\begin{tabular}{llllll}
\toprule
Model & ARI & Calinski & Silhouette & Composite Metric \\
\midrule
CoHiRF & 0.786 $\pm$ 0.132 & \underline{238.928 $\pm$ 4.719} & \bfseries 0.579 $\pm$ 0.000 & 0.848 $\pm$ 0.025 \\
CoHiRF-KernelRBF & 0.798 $\pm$ 0.094 & 231.460 $\pm$ 21.417 & \bfseries 0.579 $\pm$ 0.000 & 0.813 $\pm$ 0.057 \\
KMeans & 0.618 $\pm$ 0.035 & \bfseries 241.038 $\pm$ 0.000 & \bfseries 0.579 $\pm$ 0.000 & 0.803 $\pm$ 0.006 \\
KernelRBFKMeans & 0.641 $\pm$ 0.022 & 228.040 $\pm$ 29.064 & \underline{0.578 $\pm$ 0.002} & 0.743 $\pm$ 0.075 \\
R-CoHiRF & \underline{0.868 $\pm$ 0.079} & \bfseries 241.038 $\pm$ 0.000 & \bfseries 0.579 $\pm$ 0.000 & \bfseries 0.886 $\pm$ 0.013 \\
R-CoHiRF-KernelRBF & \bfseries 0.900 $\pm$ 0.018 & 233.757 $\pm$ 16.280 & \bfseries 0.579 $\pm$ 0.000 & \underline{0.859 $\pm$ 0.042} \\
\bottomrule
\end{tabular}
\end{table}





# 

# Debug and explore

In [44]:
df = df_runs_raw.copy()
df = df.loc[
    (df["model"].str.find("CoHiRF") != -1)
    & (df["model"].str.find("top-down-inv") == -1)
    & (df["cohirf_kwargs/consensus_strategy"] == "top-down-inv")
]

['f6e14ba716da42ebb7b7a9e7fec57812',
 '906cb296da3e4d95a431e463f35b82c7',
 'aa39bb82a4aa4ae694ec948399870581',
 '1b214acf6d5e40c6aaa25b4a8d4e61e2',
 '1dc95b66946d42bbb64f626534639322',
 '39dc0d8ff27a448785cef11a34d45fef',
 'ff04d4bab9c54ba0baea1b372fc40cc8',
 '4804bd6d0b7d4bf5b8237f1ad409c476',
 '4ca519dd7e2048979358f5c7c8629aff',
 '64610fdadc3b4e98ab77bb452a5e1f9a',
 '65909a631d5a45acbb3cd78be648ccf9',
 '2b9ca36c00574e11a9fe5a620bb084fe',
 '32c61e2275094463ab9f60b839be1097',
 'f5df094f9fff43e1bf70cf196d54072a',
 'f6221d19debc45f894996be00ec03a36',
 'e140fe1339914acdb856469c61daf4d3',
 '078b9134ac354febb72fc984e9395647',
 '450f40fb5b0945f598ada81997e91e08',
 '1a44ec678c4b481f8e34406d35ded643',
 'ab0b0c898ae044bf8c7be9ff98217c35',
 '3d8b49aa558c454494dc615c548cce6b',
 '5879697a95ab4225ac4b9b6075df1121',
 '83ac70c1b33746a8ba08b885c6b63e4b',
 '6ee4926753a647938bd32fdad6af5e4d',
 'e759fc59902649fbb018cff6019f52a7',
 'cf3456b9ad804fcbb91ac96c07dda66a',
 '288f3a7e1cc14291ade7e60681e0dfe7',
 

In [49]:
runs_to_delete_parents = df["mlflow.parentRunId"].unique().tolist()
df = df_runs_raw.copy()
df = df.loc[df["mlflow.parentRunId"].isin(runs_to_delete_parents)]
runs_to_delete_children = list(df.index)
runs_to_delete = runs_to_delete_children + runs_to_delete_parents
run_uuid_query = [f"'{run_id}'" for run_id in runs_to_delete]
run_uuid_query = ", ".join(run_uuid_query)

In [50]:
query = f"""
UPDATE runs
SET lifecycle_stage = 'deleted'
WHERE run_uuid IN ({run_uuid_query}) 
"""
with engine.begin() as conn:
    conn.execute(text(query))

In [51]:
query = f"""
DELETE
FROM
	experiment_tags
WHERE
	experiment_id = ANY(
	SELECT
		experiment_id
	FROM
		experiments
	WHERE
		lifecycle_stage = 'deleted');

DELETE
FROM
	latest_metrics
WHERE
	run_uuid = ANY(
	SELECT
		run_uuid
	FROM
		runs
	WHERE
		lifecycle_stage = 'deleted');
	
DELETE
FROM
	metrics
WHERE
	run_uuid = ANY(
	SELECT
		run_uuid
	FROM
		runs
	WHERE
		lifecycle_stage = 'deleted');
	
DELETE
FROM
	params
WHERE
	run_uuid = ANY(
	SELECT
		run_uuid
	FROM
		runs
	WHERE
		lifecycle_stage = 'deleted');

DELETE
FROM
	tags
WHERE
	run_uuid = ANY(
	SELECT
		run_uuid
	FROM
		runs
	WHERE
		lifecycle_stage = 'deleted');
	
DELETE 
FROM 
	runs
WHERE 
	lifecycle_stage = 'deleted';

DELETE 
FROM 
	experiments
WHERE 
	lifecycle_stage = 'deleted';
"""
with engine.begin() as conn:
    conn.execute(text(query))