In [1]:
from sqlalchemy import create_engine, text
import pandas as pd
from ml_experiments.analyze import get_df_runs_from_mlflow_sql, get_missing_entries, get_common_combinations, get_df_with_combinations
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from pathlib import Path
import os
import pickle
from functools import partial

# Save Results

## Load mlflow runs

In [2]:
results_dir = Path.cwd().parent / "results" / "csv"
os.makedirs(results_dir, exist_ok=True)

In [None]:
db_port = 5001
db_name = 'cohirf'
url = f'postgresql://belucci@localhost:{db_port}/{db_name}'
# url = f"postgresql://belucci@clust9:{db_port}/{db_name}"
engine = create_engine(url)
query = 'SELECT experiments.name from experiments'
experiment_names = pd.read_sql(query, engine)['name'].tolist()

In [3]:
experiment_names

['Default',
 'sfni-SpectralSubspaceRandomization',
 'sfni-KMeans',
 'sfni-BatchCoHiRF-SC-SRGF',
 'sfni-BatchCoHiRF-1iter',
 'sfni-CoHiRF',
 'sphere-BatchCoHiRF-DBSCAN-1iter',
 'sphere-DBSCAN',
 'sphere-CoHiRF-DBSCAN',
 'real-ari-BatchCoHiRF-1iter',
 'real-ari-AverageAgglomerativeClustering',
 'real-ari-BatchCoHiRF-DBSCAN-1iter',
 'real-ari-AffinityPropagation',
 'real-ari-BatchCoHiRF-SC-SRGF',
 'real-ari-CoHiRF-DBSCAN',
 'real-ari-CoHiRF-KernelRBF',
 'real-ari-CoHiRF',
 'real-ari-CompleteAgglomerativeClustering',
 'real-ari-DBSCAN',
 'real-ari-HDBSCAN',
 'real-ari-IRFLLRR',
 'real-ari-KMeans',
 'real-ari-MeanShift',
 'real-ari-OPTICS',
 'real-ari-Proclus',
 'real-ari-SingleAgglomerativeClustering',
 'real-ari-SpectralClustering',
 'real-ari-SpectralSubspaceRandomization',
 'real-ari-WardAgglomerativeClustering',
 'real-adjusted_mutual_info-BatchCoHiRF-DBSCAN-1iter',
 'real-adjusted_mutual_info-DBSCAN',
 'real-adjusted_mutual_info-BatchCoHiRF-SC-SRGF',
 'real-adjusted_mutual_info-Comple

In [4]:
experiments_names = [exp for exp in experiment_names if exp.startswith("csv-")]

In [5]:
experiments_names

['csv-adjusted_rand-BatchCoHiRF-SC-SRGF',
 'csv-adjusted_rand-BatchCoHiRF-1iter',
 'csv-adjusted_rand-BatchCoHiRF-DBSCAN-1iter',
 'csv-adjusted_rand-BatchCoHiRF-KernelRBF-1iter',
 'csv-adjusted_rand-CoHiRF-KernelRBF',
 'csv-adjusted_rand-CoHiRF-DBSCAN',
 'csv-KMeans',
 'csv-BatchCoHiRF-1iter-random',
 'csv-BatchCoHiRF-DBSCAN-1iter-random',
 'csv-BatchCoHiRF-KernelRBF-1iter-random',
 'csv-BatchCoHiRF-SC-SRGF-1iter-random']

In [6]:
query = "SELECT DISTINCT(key) FROM params WHERE key LIKE 'best/%%'"
best_params = pd.read_sql(query, engine)["key"].tolist()

In [7]:
params_columns = [
    "model",
	"n_trials",
	"dataset_name",
	"standardize",
	"hpo_metric",
	"direction",
	"seed_dataset_order",
	"hpo_seed",
] + best_params

In [8]:
latest_metrics_columns = [
    "fit_model_return_elapsed_time",
    "max_memory_used_after_fit",
    "max_memory_used",
	"best/n_clusters_",
    "best/rand_score",
    "best/adjusted_rand",
    "best/mutual_info",
    "best/adjusted_mutual_info",
    "best/normalized_mutual_info",
    "best/homogeneity_completeness_v_measure",
    "best/silhouette",
    "best/calinski_harabasz_score",
    "best/davies_bouldin_score",
    "best/inertia_score",
    "best/homogeneity",
    "best/completeness",
    "best/v_measure",
    "best/elapsed_time",
]

In [9]:
tags_columns = [
    'raised_exception',
    'EXCEPTION',
    'mlflow.parentRunId',
]

In [10]:
runs_columns = ['run_uuid', 'status', 'start_time', 'end_time']
experiments_columns = []
other_table = 'params'
other_table_keys = params_columns
df_params = get_df_runs_from_mlflow_sql(engine, runs_columns=runs_columns, experiments_columns=experiments_columns, experiments_names=experiments_names, other_table=other_table, other_table_keys=other_table_keys)
df_latest_metrics = get_df_runs_from_mlflow_sql(engine, runs_columns=['run_uuid'], experiments_columns=experiments_columns, experiments_names=experiments_names, other_table='latest_metrics', other_table_keys=latest_metrics_columns)
df_tags = get_df_runs_from_mlflow_sql(engine, runs_columns=['run_uuid'], experiments_columns=experiments_columns, experiments_names=experiments_names, other_table='tags', other_table_keys=tags_columns)

In [11]:
df_runs_raw = df_params.join(df_latest_metrics)
df_runs_raw = df_runs_raw.join(df_tags)
df_runs_raw.to_csv(results_dir / 'df_runs_raw.csv', index=True)

In [3]:
df_runs_raw = pd.read_csv(results_dir / "df_runs_raw.csv", index_col=0)
df_runs_raw["model"] = df_runs_raw["model"] + "-" + df_runs_raw["n_trials"].astype(str)
# mask = df_runs_raw["model"].str.contains("CoHiRF")
# df_runs_raw.loc[mask, "model"] = df_runs_raw.loc[mask].apply(lambda row: f"{row['model']}-{row['n_trials']}", axis=1)
df_runs_raw_parents = df_runs_raw.copy()
df_runs_raw_parents = df_runs_raw_parents.loc[df_runs_raw_parents["mlflow.parentRunId"].isna()]

In [4]:
df_runs_raw_parents.head(5)

Unnamed: 0_level_0,status,start_time,end_time,best/base_model_kwargs/n_clusters,best/child_run_id,best/cohirf_kwargs/base_model_kwargs/eps,best/cohirf_kwargs/base_model_kwargs/min_samples,best/cohirf_kwargs/base_model_kwargs/n_clusters,best/cohirf_kwargs/base_model_kwargs/n_similarities,best/cohirf_kwargs/base_model_kwargs/sampling_ratio,...,best/normalized_mutual_info,best/rand_score,best/silhouette,best/v_measure,fit_model_return_elapsed_time,max_memory_used,max_memory_used_after_fit,EXCEPTION,mlflow.parentRunId,raised_exception
run_uuid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0d71bf1e60f846e3861a3f3bdb50799c,FINISHED,1761767709884,1761782000000.0,,5b1f13fc2260467287bcc0b2d4d30aae,,,5.0,,,...,2.6e-05,0.673106,-0.01184,2.6e-05,13963.713777,3209.604,3209.604,,,False
26c1796354be43c4be54e863aa8f46da,RUNNING,1764258671249,,,,,,,,,...,,,,,,,,,,
36fb731e570042958be2a877ccc49d24,FINISHED,1761766690314,1761784000000.0,,811879002e444becb982582ad6c1e551,4.432113,45.0,,,,...,0.467344,0.838804,-0.205324,0.467344,16871.268177,3227.392,3227.392,,,False
418782031a264581a9b9fdc78c8633e9,FINISHED,1761766690342,1761774000000.0,,cbc261a39c1c43c2a4ccc7d17f4eeba9,1.825958,27.0,,,,...,0.35712,0.641368,-0.2568,0.35712,7262.801147,3208.348,3208.348,,,False
467cbf4461b34e1bbc4fbfe38d7d5819,FAILED,1761770733287,1761785000000.0,,,,,,,,...,,,,,14676.635826,3926.996,3926.996,Best metric adjusted_rand not found in the bes...,,True


## Delete duplicate runs (if any) and complete some models that cannot run with some datasets

In [5]:
non_duplicate_columns = [
    "model",
    "dataset_name",
	"standardize",
	"hpo_metric",
	"hpo_seed",
	"seed_dataset_order"
]
# df_runs_parents.loc[df_runs_parents["best/n_clusters_"]*0.5 > df_runs_parents["n_instances"], "best/adjusted_rand"] = 
df_runs_parents = df_runs_raw_parents.dropna(axis=0, how="all", subset=["best/adjusted_rand"]).copy()
# add back runs that were not evaluated because we judged too many clusters (but they run anyway)
# df_valid_runs = df_runs_raw_parents.loc[df_runs_raw_parents["best/n_clusters_"] > df_runs_raw_parents["n_instances"]*0.5].copy()
# df_runs_parents = pd.concat([df_runs_parents, df_valid_runs], axis=0)
df_runs_parents = df_runs_parents.loc[(~df_runs_parents.duplicated(non_duplicate_columns))]
# fill missing values with "None"
df_runs_parents = df_runs_parents.fillna("None")

In [6]:
df_to_cat = []
hpo_metrics = [
    "adjusted_rand",
    "adjusted_mutual_info",
    "calinski_harabasz_score",
    "silhouette",
    "davies_bouldin_score",
    "normalized_mutual_info",
]
standardize = [True]
fill_value = pd.NA
fill_columns = ["best/adjusted_rand", "best/adjusted_mutual_info", "best/calinski_harabasz_score", "best/silhouette", "best/davies_bouldin_score", "best/normalized_mutual_info"]

# Missing

In [7]:
model_nickname = df_runs_parents['model'].unique().tolist()
model_nickname.sort()
model_nickname

['BatchCoHiRF-1iter-100',
 'BatchCoHiRF-1iter-random-20',
 'BatchCoHiRF-DBSCAN-1iter-random-20',
 'BatchCoHiRF-KernelRBF-1iter-100',
 'BatchCoHiRF-KernelRBF-1iter-random-20',
 'BatchCoHiRF-SC-SRGF-100',
 'CoHiRF-KernelRBF-100',
 'KMeans-20']

In [8]:
model_nickname = ["KMeans-20"]

In [9]:
models_names = [
    "KMeans",
    "BatchCoHiRF-1iter-random",
    "BatchCoHiRF-DBSCAN-1iter-random",
    "BatchCoHiRF-SC-SRGF-1R-1iter-random",
    "BatchCoHiRF-KernelRBF-1iter-random",
]
models_names = [f"{name}-20" for name in models_names]
dataset_names = ["crop-mapping"]
hpo_metrics = [
    "adjusted_rand",
    # "adjusted_mutual_info",
    # "calinski_harabasz_score",
    # "normalized_mutual_info",
    # "davies_bouldin_score",
    # "silhouette",
]
seeds = [0, 1, 2]
combinations = []
for model in models_names:
    for dataset in dataset_names:
        for hpo_metric in hpo_metrics:
            for seed in seeds:
                combination = {
                    "model": model,
                    "dataset_name": dataset,
                    "hpo_metric": hpo_metric,
                    "hpo_seed": seed,
                    "seed_dataset_order": seed,
                    "standardize": True,
                }
                combinations.append(combination)
combinations_df = pd.DataFrame(combinations)
unique_columns = [
    "model",
    "dataset_name",
    "hpo_metric",
    "hpo_seed",
    # "seed_dataset_order",
    "standardize",
]
# compare combinations_df with df_runs_parents to get missing combinations
df_missing = combinations_df.merge(
    df_runs_parents,
    on=unique_columns,
    how="left",
    indicator=True,
)
df_missing = df_missing.loc[df_missing["_merge"] == "left_only"]
df_missing

Unnamed: 0,model,dataset_name,hpo_metric,hpo_seed,seed_dataset_order_x,standardize,status,start_time,end_time,best/base_model_kwargs/n_clusters,...,best/rand_score,best/silhouette,best/v_measure,fit_model_return_elapsed_time,max_memory_used,max_memory_used_after_fit,EXCEPTION,mlflow.parentRunId,raised_exception,_merge
9,BatchCoHiRF-SC-SRGF-1R-1iter-random-20,crop-mapping,adjusted_rand,0,0,True,,,,,...,,,,,,,,,,left_only
10,BatchCoHiRF-SC-SRGF-1R-1iter-random-20,crop-mapping,adjusted_rand,1,1,True,,,,,...,,,,,,,,,,left_only
11,BatchCoHiRF-SC-SRGF-1R-1iter-random-20,crop-mapping,adjusted_rand,2,2,True,,,,,...,,,,,,,,,,left_only


# Tables

In [10]:
def get_parameters_string(row):
    parameter_names = {
		"best/alpha": "\\alpha",
		"best/avg_dims": "d",
		"best/base_model_kwargs/eps": "\\epsilon",
		"best/base_model_kwargs/min_samples": "n_{\\text{min}}",
		"best/base_model_kwargs/n_clusters": "C",
		"best/c": "c",
		"best/cohirf_kwargs/base_model_kwargs/eps": "\\epsilon",
		"best/cohirf_kwargs/base_model_kwargs/min_samples": "n_{\\text{min}}",
		"best/cohirf_kwargs/kmeans_n_clusters": "C",
		"best/cohirf_kwargs/n_features": "q",
		"best/cohirf_kwargs/repetitions": "R",
		"best/damping": "\\lambda",
		# "best/density_threshold": "\\tau",
		"best/eps": "\\epsilon",
		"best/kmeans_n_clusters": "C",
		"best/lambda_": "\\lambda",
		"best/min_bin_freq": "bin_{\\text{min}}",
		"best/min_cluster_size": "C_{\\text{min}}",
		"best/min_samples": "n_{\\text{min}}",
		"best/n_clusters": "C",
		"best/n_features": "q",
		# "best/n_partitions": "P",
		"best/n_similarities": "m",
		"best/p": "p",
		"best/repetitions": "R",
		"best/sampling_ratio": "r",
		"best/sc_n_clusters": "C",
		"best/transform_kwargs/gamma": "\\gamma",
	}
    first = True
    str = ""
    for p in parameter_names.keys():
        if not pd.isna(row[p]) and row[p] != "None":
            if not first:
                str += "; "
            else:
                first = False
            value = float(row[p])
            if value.is_integer():
                value = int(value)
                str += f"${parameter_names[p]}={value}$"
            else:
                str += f"${parameter_names[p]}={value:0.2f}$"
    return str

In [11]:
def highlight_max(df, column_name, level=0):
    df_column = df[column_name]
    max_values = df_column.groupby(level=level).transform('max')
    is_highlighted = df_column.round(3) == max_values.round(3)
    df_css = df.copy().astype(str)
    df_css.loc[:, :] = ''
    df_css[is_highlighted] = 'font-weight: bold'
    return df_css

In [12]:
def highlight_min(df, column_name, level=0):
    df_column = df[column_name]
    min_values = df_column.groupby(level=level).transform("min")
    is_highlighted = df_column.round(3) == min_values.round(3)
    df_css = df.copy().astype(str)
    df_css.loc[:, :] = ""
    df_css[is_highlighted] = "font-weight: bold"
    return df_css

In [13]:
def highlight_max_index(series_index, df_column, level=0):
    max_values = df_column.groupby(level=level).transform('max')
    is_highlighted = df_column.round(3) == max_values.round(3)
    series_css = series_index.copy().astype(str)
    series_css[:] = ''
    series_css[is_highlighted.values] = 'font-weight: bold'
    return series_css

In [23]:
def underline_2nd_max(df, column_name, level=0):
    df_column = df[column_name]
    # get the second max value
    second_max_values = df_column.groupby(level=level).transform(lambda x: x.round(3).drop_duplicates().nlargest(2).iloc[-1])
    is_underlined = df_column.round(3) == second_max_values.round(3)
    df_css = df.copy().astype(str)
    df_css.loc[:, :] = ''
    df_css[is_underlined] = 'underline: --latex--rwrap'
    return df_css

In [24]:
def underline_2nd_min(df, column_name, level=0):
    df_column = df[column_name]
    # get the second min value
    second_min_values = df_column.groupby(level=level).transform(
        lambda x: x.round(3).drop_duplicates().nsmallest(2).iloc[-1]
    )
    is_underlined = df_column.round(3) == second_min_values.round(3)
    df_css = df.copy().astype(str)
    df_css.loc[:, :] = ""
    df_css[is_underlined] = "underline: --latex--rwrap"
    return df_css

In [25]:
def underline_2nd_max_index(series_index, df_column, level=0):
    # get the second max value
    second_max_values = df_column.groupby(level=level).transform(lambda x: x.nlargest(2).iloc[-1])
    is_underlined = df_column.round(3) == second_max_values.round(3)
    series_css = series_index.copy().astype(str)
    series_css.loc[:] = ''
    series_css[is_underlined.values] = 'underline: --latex--rwrap'
    return series_css

## Some Models

In [26]:
print(*df_runs_parents['model'].unique(), sep="\n")

BatchCoHiRF-KernelRBF-1iter-random-20
BatchCoHiRF-DBSCAN-1iter-random-20
BatchCoHiRF-1iter-100
BatchCoHiRF-1iter-random-20
KMeans-20
CoHiRF-KernelRBF-100
BatchCoHiRF-SC-SRGF-100
BatchCoHiRF-KernelRBF-1iter-100


In [27]:
model_names = {
    "KMeans-20": "K-Means",
    "BatchCoHiRF-1iter-random-20": "BatchCoHiRF-1iter-random",
    "BatchCoHiRF-DBSCAN-1iter-random-20": "BatchCoHiRF-DBSCAN-1iter-random",
    "BatchCoHiRF-SC-SRGF-1iter-random-20": "BatchCoHiRF-SC-SRGF-1iter-random",
    "BatchCoHiRF-KernelRBF-1iter-random-20": "BatchCoHiRF-KernelRBF-1iter-random",
}

dataset_names = {
    "binary_alpha_digits": "binary-alpha-digits",
	"mnist_784": "mnist",
}  # otherwise we get an error in latex

# Filter to only standardized runs
df = df_runs_parents.copy()
df = df.loc[df['standardize'] == True]
df = df.loc[df['model'].isin(model_names.keys())]
df = df.replace({"model": model_names})
df = df.replace({"dataset_name": dataset_names})

# Filter to only runs with hpo_seed in range(5)
df = df.loc[df['hpo_seed'].isin(range(3))]

In [28]:
hpo_metrics = [
    "adjusted_rand",
    # "adjusted_mutual_info",
    # "calinski_harabasz_score",
    # "silhouette",
    # "davies_bouldin_score",
    # "normalized_mutual_info",
]

hpo_metrics_rename = [
    "ARI",
    # "AMI",
    # "Calinski",
    # "Silhouette",
    # "Davies-Bouldin",
    # "NMI",
]

dfs_metrics = {}

for hpo_metric, hpo_metric_rename in zip(hpo_metrics, hpo_metrics_rename):
    df_metric = df.loc[df['hpo_metric'] == hpo_metric][
        ['dataset_name', 'model', 'hpo_seed', f'best/{hpo_metric}']
    ].rename(columns={f'best/{hpo_metric}': hpo_metric_rename})
    df_metric = df_metric.dropna(subset=[hpo_metric_rename])
    df_metric = df_metric.set_index(['dataset_name', 'model', 'hpo_seed'])
    df_metric = df_metric.astype({hpo_metric_rename: float})
    dfs_metrics[hpo_metric_rename] = df_metric

df_metrics = pd.concat(dfs_metrics.values(), axis=1, join="outer")
df_metrics = df_metrics.reset_index()

# calculate mean and std
df_metrics = df_metrics.groupby(['dataset_name', 'model']).agg(['mean', 'std'])
# flatten multiindex columns
df_metrics.columns = [' '.join(col).strip() for col in df_metrics.columns.values]
# drop hpo_seed level
df_metrics = df_metrics.drop(columns=['hpo_seed mean', 'hpo_seed std'])
# Rename index levels
df_metrics.index.names = ["Dataset", "Model"]
# df_metrics["Davies-Bouldin"] = df_metrics["Davies-Bouldin"].astype(float)
# create columns Metric (Mean ± Std)
# for metric in hpo_metrics_rename:
#     df_metrics[f"{metric}"] = df_metrics[f"{metric} mean"].round(3).astype(str) + " $\\pm$ " + df_metrics[f"{metric} std"].round(3).astype(str)

for metric in hpo_metrics_rename:
    df_metrics[f"{metric}"] = (
        df_metrics[f"{metric} mean"].apply(lambda x: f"{x:.3f}" if not pd.isna(x) else "No Run")
        + " $\\pm$ "
        + df_metrics[f"{metric} std"].apply(lambda x: f"{x:.3f}" if not pd.isna(x) else "No Run")
    )


# Reset Seed level
# df_metrics = df_metrics.reset_index(level="Seed")

In [29]:
df_metrics

Unnamed: 0_level_0,Unnamed: 1_level_0,ARI mean,ARI std,ARI
Dataset,Model,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
crop-mapping,BatchCoHiRF-1iter-random,0.526921,0.035176,0.527 $\pm$ 0.035
crop-mapping,BatchCoHiRF-DBSCAN-1iter-random,0.315182,0.13862,0.315 $\pm$ 0.139
crop-mapping,BatchCoHiRF-KernelRBF-1iter-random,4.1e-05,2.9e-05,0.000 $\pm$ 0.000
crop-mapping,K-Means,0.722826,0.011014,0.723 $\pm$ 0.011


In [52]:
# Add mean time columns to the existing df_metrics dataframe
# Using the same filtering approach as the original df_metrics
df = df_runs_parents.copy()
df = df.loc[df["standardize"] == True]
df = df.loc[df["model"].isin(model_names.keys())]
df = df.replace({"model": model_names})
df = df.replace({"dataset_name": dataset_names})
# Filter to only runs with hpo_seed in range(5)
df = df.loc[df["hpo_seed"].isin(range(5))]

# Calculate mean and std times for each dataset-model combination across all metrics
df_times = (
    df.groupby(["dataset_name", "model"])
    .agg({"best/elapsed_time": ["mean", "std"], "fit_model_return_elapsed_time": ["mean", "std"]})
    .rename(columns={"best/elapsed_time": "Best Time", "fit_model_return_elapsed_time": "HPO Time"})
)

# Flatten multiindex columns
df_times.columns = [' '.join(col).strip() for col in df_times.columns.values]
# Set the same index structure as df_metrics
df_times.index.names = ["Dataset", "Model"]

df_times["Best Time"] = (
	df_times["Best Time mean"].apply(lambda x: f"{x:4.3f}" if not pd.isna(x) else "No Run")
	+ " $\\pm$ " 
	+ df_times["Best Time std"].apply(lambda x: f"{x:4.3f}" if not pd.isna(x) else "No Run")
)
df_times["HPO Time"] = (
	df_times["HPO Time mean"].apply(lambda x: f"{x:4.3f}" if not pd.isna(x) else "No Run")
	+ " $\\pm$ "
	+ df_times["HPO Time std"].apply(lambda x: f"{x:4.3f}" if not pd.isna(x) else "No Run")
)

# Join with the existing df_metrics (verify we have the same number of rows!)
df_metrics = df_metrics.join(df_times, how="outer")

In [53]:
df_metrics

Unnamed: 0_level_0,Unnamed: 1_level_0,ARI mean,ARI std,ARI,Best Time mean,Best Time std,HPO Time mean,HPO Time std,Best Time,HPO Time
Dataset,Model,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
crop-mapping,K-Means,0.722826,0.011014,0.723 $\pm$ 0.011,1.797588,0.466573,556.773016,21.197466,1.798 $\pm$ 0.467,556.773 $\pm$ 21.197


The following will provide the latex code for a clean table, we only need to make a little adjustement in the first line to delete the "key" and have only one header. For the longtable environment (full data) we need to add the "\*" at the end of lines we dont want to have a page break. We also should replace the entire begin{table} ... end{table} by begin{longtable} ... end{longtable} in the latex file, if you want to put caption and labels you should break the line after with '\\' (put both on the same line!)


In [56]:
df_latex = df_metrics.copy().style
columns_to_hide = [col for col in df_latex.columns if col not in (hpo_metrics_rename + ["Best Time", "HPO Time"])]
columns_to_hide += ["NMI"]
for metric in hpo_metrics_rename:
    highlight_max = partial(highlight_max, column_name=f"{metric} mean")
    highlight_2nd_max = partial(underline_2nd_max, column_name=f"{metric} mean")
    df_latex = df_latex.apply(highlight_max, subset=[metric, f"{metric} mean"], axis=None)
    df_latex = df_latex.apply(highlight_2nd_max, subset=[metric, f"{metric} mean"], axis=None)

df_latex = df_latex.to_latex(
    hrules=True,
    clines="skip-last;data",
    convert_css=True,
    column_format="ll" + "l" * (len(df_latex.columns) - len(columns_to_hide)),
    # environment="longtable",
)
print(df_latex)

\begin{tabular}{llll}
\toprule
 &  & ARI mean & ARI std & ARI & Best Time mean & Best Time std & HPO Time mean & HPO Time std & Best Time & HPO Time \\
Dataset & Model &  &  &  &  &  &  &  &  &  \\
\midrule
crop-mapping & K-Means & \bfseries \underline{0.722826} & 0.011014 & \bfseries \underline{0.723 $\pm$ 0.011} & 1.797588 & 0.466573 & 556.773016 & 21.197466 & 1.798 $\pm$ 0.467 & 556.773 $\pm$ 21.197 \\
\cline{1-11}
\bottomrule
\end{tabular}

