In [1]:
import torch
import matplotlib.pyplot as plt
import os
import numpy as np
import copy
os.environ["KMP_DUPLICSCORE_LIB_OK"]="TRUE"

In [14]:
import torch
import pandas as pd
import numpy as np

# Load results
cocycles = torch.load('cocycles_csuite_mixed_results_n=2000_corr=0.0_dag=False.pt')
flows1   = torch.load('couplingbgm_csuite_mixed_results_n=2000_corr=0.0_dag=False_affine=False_consistent=False_trials=10_learnable.pt')
flows2   = torch.load('causalflow_csuite_mixed_results_n=2000_corr=0.0_dag=False_affine=False_consistent=True_trials=10_learnable.pt')
flows3   = torch.load('causalflow_csuite_mixed_results_n=2000_corr=0.0_dag=False_affine=True_consistent=False_trials=10_learnable.pt')

metrics = ['KS_int', 'KS_CF', 'W1_CF', 'W1_int', 'RMSE_CF']

# Group your flow‐based runs into named variants
flow_sets = {
    'flows1': flows1,    # default couplingbgm
    'flows2': flows2,    # affine=False, consistent
    'flows3': flows3,    # affine=True
}

# Collect the set of all SCM names across all flow variants
scms = sorted({ r['scm'] for runs in flow_sets.values() for r in runs })

tables = {}

for scm in scms:
    data = {}

    # --- flow variants ---
    for variant, runs in flow_sets.items():
        # pick only runs for this SCM
        flow_runs = [r for r in runs if r['scm'] == scm]
        if not flow_runs:
            continue

        # each run has keys for base‐type methods (e.g. 'normal','laplace',…) plus 'scm'
        base_types = [k for k in flow_runs[0].keys() if k != 'scm']

        for base in base_types:
            vals = {met: [] for met in metrics}
            for run in flow_runs:
                # for couplingbgm‐style results you have run[base][base]
                info = run[base][base]
                for met in metrics:
                    for x in info.get(met, []):
                        vals[met].append(x.item() if hasattr(x, 'item') else float(x))
            # Median across all runs
            key = f"{variant}_{base}"
            data[key] = {met: np.nanmean(vals[met]) for met in metrics}

    # --- cocycle methods ---
    cocy_runs = [r for r in cocycles if r['scm'] == scm]
    if cocy_runs:
        cocy_methods = [k for k in cocy_runs[0].keys() if k != 'scm']
        for m in cocy_methods:
            vals = {met: [] for met in metrics}
            for run in cocy_runs:
                info = run[m]   # cocycle stores metrics directly under run[m]
                for met in metrics:
                    for x in info.get(met, []):
                        vals[met].append(x.item() if hasattr(x, 'item') else float(x))
            data[m] = {met: np.nanmean(vals[met]) for met in metrics}

    # Build DataFrame
    tables[scm] = pd.DataFrame.from_dict(data, orient='index', columns=metrics)


In [15]:
import torch
import pandas as pd
import numpy as np

# … (load your torch files and define `metrics`, `cocycles`, `flow_sets`, etc.) …

tables = {}

for scm in scms:
    data = {}

    # --- flow variants, best‐of‐base per run ---
    for variant, runs in flow_sets.items():
        flow_runs = [r for r in runs if r['scm'] == scm]
        if not flow_runs:
            continue

        # accumulate the best‐of‐base metrics across runs
        vals = {met: [] for met in metrics}
        indices = []     # if you also want the best_idx per run
        bases   = []     # to record which base won

        for run in flow_runs:
            # find all candidate base‐names in this run
            base_types = [k for k in run.keys() if k != 'scm']

            # pick the base with smallest min(cv_scores)
            best_base = min(
                base_types,
                key=lambda b: float(np.min(run[b]['cv_scores']))
            )
            bases.append(best_base)

            # get the info dict for that base
            best_info = run[best_base][best_base]

            # record its index
            indices.append(int(best_info['index']))

            # and each metric (they’re single‐element lists at the best idx)
            for met in metrics:
                x = best_info.get(met, [])[0]
                # handle torch.Tensor vs float
                vals[met].append(x.item() if hasattr(x, 'item') else float(x))

        # now average over runs
        # you can also choose np.median instead of np.nanmean
        data[f"{variant}_best"] = {
            met: np.median(vals[met]) for met in metrics
        }
        # if you want to store the average best index:
        data[f"{variant}_best"]['best_idx'] = np.median(indices)
        # or the modal best_base:
        # from collections import Counter
        # data[f"{variant}_best"]['base'] = Counter(bases).most_common(1)[0][0]

    # --- cocycle methods (unchanged) ---
    cocy_runs = [r for r in cocycles if r['scm'] == scm]
    if cocy_runs:
        cocy_methods = [k for k in cocy_runs[0].keys() if k != 'scm']
        for m in cocy_methods:
            vals = {met: [] for met in metrics}
            for run in cocy_runs:
                info = run[m]
                for met in metrics:
                    for x in info.get(met, []):
                        vals[met].append(x.item() if hasattr(x, 'item') else float(x))
            data[m] = {met: np.median(vals[met]) for met in metrics}

    # Build DataFrame
    tables[scm] = pd.DataFrame.from_dict(data, orient='index', columns=metrics + ['best_idx'])

In [39]:
import torch
import pandas as pd
import numpy as np

# … your loading boilerplate …

k = 2   # ← only consider the first k base‐types for “best” selection

tables = {}
for scm in scms:
    data = {}

    for variant, runs in flow_sets.items():
        flow_runs = [r for r in runs if r['scm'] == scm]
        if not flow_runs:
            continue

        vals    = {met: [] for met in metrics}
        indices = []

        for run in flow_runs:
            # all possible bases in this run
            base_types = [b for b in run.keys() if b != 'scm']

            # restrict to the first k
            candidates = base_types[:k]

            # pick best among those k
            best_base = min(
                candidates,
                key=lambda b: float(np.min(run[b]['cv_scores']))
            )

            # grab its “info” (at best_idx)
            best_info = run[best_base][best_base]
            indices.append(int(best_info['index']))

            for met in metrics:
                x = best_info.get(met, [None])[0]
                vals[met].append(x.item() if hasattr(x, 'item') else float(x))

        data[f"{variant}_best_k{k}"] = {
            **{met: np.mean(vals[met]) for met in metrics},
            'best_idx': np.mean(indices)
        }

    # --- cocycle methods (unchanged) ---
    cocy_runs = [r for r in cocycles if r['scm'] == scm]
    if cocy_runs:
        cocy_methods = [k for k in cocy_runs[0].keys() if k != 'scm']
        for m in cocy_methods:
            vals = {met: [] for met in metrics}
            for run in cocy_runs:
                info = run[m]
                for met in metrics:
                    for x in info.get(met, []):
                        vals[met].append(x.item() if hasattr(x, 'item') else float(x))
            data[m] = {met: np.mean(vals[met]) for met in metrics}
    tables[scm] = pd.DataFrame.from_dict(
        data, orient='index', columns=metrics + ['best_idx']
    )

In [40]:
for scm_name, df in tables.items():
    print(f"\n=== {scm_name} ===")
    print(df)


=== 2var_linear ===
                  KS_int     KS_CF      W1_CF     W1_int     RMSE_CF  best_idx
flows1_best_k2  0.130257  0.184517  15.161492  21.284450  202.000206       3.0
flows2_best_k2  0.311757  0.239388   1.460300  17.746339   32.702805       2.2
flows3_best_k2  0.402669  0.147898   7.194635  19.011633   80.918861       0.8
Cocycle_CMMD_V  0.027538  0.043664   0.062796  13.706214    0.041833       NaN

=== 2var_nonlinear ===
                  KS_int     KS_CF      W1_CF     W1_int     RMSE_CF  best_idx
flows1_best_k2  0.116588  0.270060  17.827800  24.797504  207.949100       3.0
flows2_best_k2  0.298519  0.264636   0.750440  17.665810   26.014764       2.3
flows3_best_k2  0.436545  0.218915   8.244131  23.943082   37.526932       0.6
Cocycle_CMMD_V  0.039610  0.130208   0.241265  13.790954    1.668022       NaN

=== chain5_linear ===
                  KS_int     KS_CF     W1_CF    W1_int   RMSE_CF  best_idx
flows1_best_k2  0.139077  0.060566  1.413497  0.314765  0.349331   

In [31]:
import torch
import pandas as pd
import numpy as np

# … your loading boilerplate …

k = 2   # ← only consider the first k base‐types for “best” selection

tables = {}
for scm in scms:
    data = {}

    for variant, runs in flow_sets.items():
        flow_runs = [r for r in runs if r['scm'] == scm]
        if not flow_runs:
            continue

        vals    = {met: [] for met in metrics}
        indices = []

        for run in flow_runs:
            # all possible bases in this run
            base_types = [b for b in run.keys() if b != 'scm']

            # restrict to the first k
            candidates = base_types[:k]

            # pick best among those k
            best_base = min(
                candidates,
                key=lambda b: float(np.min(run[b]['cv_scores']))
            )

            # grab its “info” (at best_idx)
            best_info = run[best_base][best_base]
            indices.append(int(best_info['index']))

            for met in metrics:
                x = best_info.get(met, [None])[0]
                vals[met].append(x.item() if hasattr(x, 'item') else float(x))

        data[f"{variant}_best_k{k}"] = {
            **{met: np.std(vals[met]) for met in metrics},
            'best_idx': np.mean(indices)
        }

    # --- cocycle methods (unchanged) ---
    cocy_runs = [r for r in cocycles if r['scm'] == scm]
    if cocy_runs:
        cocy_methods = [k for k in cocy_runs[0].keys() if k != 'scm']
        for m in cocy_methods:
            vals = {met: [] for met in metrics}
            for run in cocy_runs:
                info = run[m]
                for met in metrics:
                    for x in info.get(met, []):
                        vals[met].append(x.item() if hasattr(x, 'item') else float(x))
            data[m] = {met: np.std(vals[met]) for met in metrics}
    tables[scm] = pd.DataFrame.from_dict(
        data, orient='index', columns=metrics + ['best_idx']
    )

In [32]:
for scm_name, df in tables.items():
    print(f"\n=== {scm_name} ===")
    print(df)


=== 2var_linear ===
                  KS_int     KS_CF      W1_CF     W1_int     RMSE_CF  best_idx
flows1_best_k2  0.069968  0.123934  18.349521  19.304230  253.421648       3.0
flows2_best_k2  0.112592  0.096876   2.182663   5.282089   75.039488       2.2
flows3_best_k2  0.044320  0.144173  13.570801   9.310283  156.349793       0.8
Cocycle_CMMD_V  0.017979  0.038628   0.044434   8.199690    0.049103       NaN

=== 2var_nonlinear ===
                  KS_int     KS_CF      W1_CF     W1_int     RMSE_CF  best_idx
flows1_best_k2  0.069870  0.126862  26.200139  26.261672  240.679842       3.0
flows2_best_k2  0.106482  0.076226   1.268960   5.491354   77.018004       2.3
flows3_best_k2  0.039270  0.146936  15.288154  12.352240   56.954197       0.6
Cocycle_CMMD_V  0.019644  0.053723   0.339332   8.114449    3.145247       NaN

=== chain5_linear ===
                  KS_int     KS_CF     W1_CF    W1_int   RMSE_CF  best_idx
flows1_best_k2  0.045924  0.012208  0.082392  0.103290  0.166493   