In [1]:
import json
import os
from pathlib import Path
import pandas as pd
from scipy.stats import spearmanr

In [2]:
json_path = "./ladder_results.json"

with open(json_path, "r", encoding="utf-8") as f:
    data = json.load(f)

In [3]:
meta = data.get("meta", {})
rows = data.get("results", [])
df = pd.DataFrame(rows)

In [4]:
out_dir = Path("./results")
out_dir.mkdir(parents=True, exist_ok=True)

In [5]:
def spearman_rho(x, y):
    x = pd.to_numeric(pd.Series(x), errors="coerce")
    y = pd.to_numeric(pd.Series(y), errors="coerce")
    return float(spearmanr(x, y, nan_policy="omit").correlation)

In [6]:
def spearman_per_group(g, metric):
    x = pd.to_numeric(g["shuffle_frac"], errors="coerce")
    y = pd.to_numeric(g[metric], errors="coerce")
    return float(spearmanr(x, y, nan_policy="omit").correlation)

In [7]:
# Compute per dataset
spearman_ds_alg = (
    df.groupby(["dataset", "alg"], dropna=False)
      .apply(lambda g: pd.Series({
          "rho_NS": spearman_per_group(g, "NS"),
          "rho_SNS": spearman_per_group(g, "SNS")
      }))
      .reset_index()
)

by_dataset_csv = os.path.join(out_dir, "spearman_by_dataset.csv")
spearman_ds_alg.to_csv(by_dataset_csv, index=False)

  .apply(lambda g: pd.Series({


In [8]:
# Compute median across datasets
spearman_median = (
    spearman_ds_alg.groupby("alg", dropna=False)[["rho_NS", "rho_SNS"]]
                   .median()
                   .reset_index()
                   .sort_values("alg")
)

median_csv = os.path.join(out_dir, "spearman_median_by_alg.csv")
spearman_median.to_csv(median_csv, index=False)