In [None]:
import json
from pathlib import Path


def load_result(pth) -> dict:
    with open(pth) as f:
        result = json.load(f)
    return result

In [None]:
RESULTS_DIR = "../benchmark-results"

all_jsons = list(Path(RESULTS_DIR).glob("*.json"))

entries = {
    'silhouette': [],
    'calinski_harabasz': [],
    'davies_bouldin' : [],
    "kbet": [],
}


for pth in all_jsons:
    raw = load_result(pth)
    
    for score_name in entries.keys():
        value = raw["scores"][score_name]
        if value is not None:
            entry = {
                "method": raw["method"],
                "value": value,
            }
            entries[score_name].append(entry)

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

sns.set_theme(style="ticks")

fig, axs = plt.subplots(1, 4, sharey=False, figsize=(15, 5))

# ax = sns.boxplot(x="day", y="total_bill", data=tips, ax=axs[0])

all_dfs = []

for ax, (score_name, vals) in zip(axs, entries.items()):
    ax.set_title(score_name)
    
    df = pd.DataFrame(vals)

    sns.boxplot(x="method", y="value", data=df, ax=ax)
    sns.swarmplot(x="method", y="value", data=df, ax=ax, color=".25")
    
    df["score_name"] = score_name
    all_dfs.append(df)

final_df = pd.concat(all_dfs)

fig.tight_layout()

In [None]:
final_df.to_csv("../data/generated_results.csv", index=False)