In [1]:
import torch
import matplotlib.pyplot as plt
import os
import numpy as np
import copy
os.environ["KMP_DUPLICSCORE_LIB_OK"]="TRUE"

In [3]:
import torch
import pandas as pd
import numpy as np

# Load results
cocycles = torch.load('cocycles_csuite_mixed_bd_results_n=2000_trials=10_new.pt')
flows1   = torch.load('couplingbgm_csuite_mixed_bd_results_n=2000_corr=0.0_dag=False_affine=False_consistent=False_trials=10_new.pt')
flows2   = torch.load('causalflow_csuite_mixed_bd_results_n=2000_corr=0.0_dag=False_affine=False_consistent=True_trials=10.pt')
flows3   = torch.load('causalflow_csuite_mixed_bd_results_n=2000_corr=0.0_dag=False_affine=True_consistent=False_trials=10.pt')

metrics = ['KS_int', 'KS_CF', 'W1_CF', 'W1_int', 'RMSE_CF']

# Group your flow‐based runs into named variants
flow_sets = {
    'flows1': flows1,    # default couplingbgm
    'flows2': flows2,    # affine=False, consistent
    'flows3': flows3,    # affine=True
}

# Collect the set of all SCM names across all flow variants
scms = sorted({ r['scm'] for runs in flow_sets.values() for r in runs })

tables = {}

for scm in scms:
    data = {}

    # --- flow variants ---
    for variant, runs in flow_sets.items():
        # pick only runs for this SCM
        flow_runs = [r for r in runs if r['scm'] == scm]
        if not flow_runs:
            continue

        # each run has keys for base‐type methods (e.g. 'normal','laplace',…) plus 'scm'
        base_types = [k for k in flow_runs[0].keys() if k != 'scm']

        for base in base_types:
            vals = {met: [] for met in metrics}
            for run in flow_runs:
                # for couplingbgm‐style results you have run[base][base]
                info = run[base][base]
                for met in metrics:
                    for x in info.get(met, []):
                        vals[met].append(x.item() if hasattr(x, 'item') else float(x))
            # Median across all runs
            key = f"{variant}_{base}"
            data[key] = {met: np.nanmean(vals[met]) for met in metrics}

    # --- cocycle methods ---
    cocy_runs = [r for r in cocycles if r['scm'] == scm]
    if cocy_runs:
        cocy_methods = [k for k in cocy_runs[0].keys() if k != 'scm']
        for m in cocy_methods:
            vals = {met: [] for met in metrics}
            for run in cocy_runs:
                info = run[m]   # cocycle stores metrics directly under run[m]
                for met in metrics:
                    for x in info.get(met, []):
                        vals[met].append(x.item() if hasattr(x, 'item') else float(x))
            data[m] = {met: np.nanmean(vals[met]) for met in metrics}

    # Build DataFrame
    tables[scm] = pd.DataFrame.from_dict(data, orient='index', columns=metrics)


In [4]:
import torch
import pandas as pd
import numpy as np

# … (load your torch files and define `metrics`, `cocycles`, `flow_sets`, etc.) …

tables = {}

for scm in scms:
    data = {}

    # --- flow variants, best‐of‐base per run ---
    for variant, runs in flow_sets.items():
        flow_runs = [r for r in runs if r['scm'] == scm]
        if not flow_runs:
            continue

        # accumulate the best‐of‐base metrics across runs
        vals = {met: [] for met in metrics}
        indices = []     # if you also want the best_idx per run
        bases   = []     # to record which base won

        for run in flow_runs:
            # find all candidate base‐names in this run
            base_types = [k for k in run.keys() if k != 'scm']

            # pick the base with smallest min(cv_scores)
            best_base = min(
                base_types,
                key=lambda b: float(np.min(run[b]['cv_scores']))
            )
            bases.append(best_base)

            # get the info dict for that base
            best_info = run[best_base][best_base]

            # record its index
            indices.append(int(best_info['index']))

            # and each metric (they’re single‐element lists at the best idx)
            for met in metrics:
                x = best_info.get(met, [])[0]
                # handle torch.Tensor vs float
                vals[met].append(x.item() if hasattr(x, 'item') else float(x))

        # now average over runs
        # you can also choose np.median instead of np.nanmean
        data[f"{variant}_best"] = {
            met: np.median(vals[met]) for met in metrics
        }
        # if you want to store the average best index:
        data[f"{variant}_best"]['best_idx'] = np.median(indices)
        # or the modal best_base:
        # from collections import Counter
        # data[f"{variant}_best"]['base'] = Counter(bases).most_common(1)[0][0]

    # --- cocycle methods (unchanged) ---
    cocy_runs = [r for r in cocycles if r['scm'] == scm]
    if cocy_runs:
        cocy_methods = [k for k in cocy_runs[0].keys() if k != 'scm']
        for m in cocy_methods:
            vals = {met: [] for met in metrics}
            for run in cocy_runs:
                info = run[m]
                for met in metrics:
                    for x in info.get(met, []):
                        vals[met].append(x.item() if hasattr(x, 'item') else float(x))
            data[m] = {met: np.median(vals[met]) for met in metrics}

    # Build DataFrame
    tables[scm] = pd.DataFrame.from_dict(data, orient='index', columns=metrics + ['best_idx'])

'generate_backdoor_linear'

In [5]:
import torch
import pandas as pd
import numpy as np

# … your loading boilerplate …

k = 2   # ← only consider the first k base‐types for “best” selection

tables = {}
for scm in scms:
    data = {}

    for variant, runs in flow_sets.items():
        flow_runs = [r for r in runs if r['scm'] == scm]
        if not flow_runs:
            continue

        vals    = {met: [] for met in metrics}
        indices = []

        for run in flow_runs:
            # all possible bases in this run
            base_types = [b for b in run.keys() if b != 'scm']

            # restrict to the first k
            candidates = base_types[:k]

            # pick best among those k
            best_base = min(
                candidates,
                key=lambda b: float(np.min(run[b]['cv_scores']))
            )

            # grab its “info” (at best_idx)
            best_info = run[best_base][best_base]
            indices.append(int(best_info['index']))

            for met in metrics:
                x = best_info.get(met, [None])[0]
                vals[met].append(x.item() if hasattr(x, 'item') else float(x))

        data[f"{variant}_best_k{k}"] = {
            **{met: np.mean(vals[met]) for met in metrics},
            'best_idx': np.mean(indices)
        }

    # --- cocycle methods (unchanged) ---
    cocy_runs = [r for r in cocycles if r['scm'] == scm]
    if cocy_runs:
        cocy_methods = [k for k in cocy_runs[0].keys() if k != 'scm']
        for m in cocy_methods:
            vals = {met: [] for met in metrics}
            for run in cocy_runs:
                info = run[m]
                for met in metrics:
                    for x in info.get(met, []):
                        vals[met].append(x.item() if hasattr(x, 'item') else float(x))
            data[m] = {met: np.mean(vals[met]) for met in metrics}
    tables[scm] = pd.DataFrame.from_dict(
        data, orient='index', columns=metrics + ['best_idx']
    )

In [4]:
for scm_name, df in tables.items():
    print(f"\n=== {scm_name} ===")
    print(df)


=== 2var_linear ===
                   KS_int     KS_CF         W1_CF      W1_int     RMSE_CF
flows1_normal    0.783831  0.655291  1.015557e+02  663.902987  335.675162
flows1_laplace   0.826410  0.684385  1.011011e+02   96.906648  340.470044
flows1_studentt  0.850335  0.681736  4.638239e+11   97.408549  404.549719
flows2_normal    0.409096  0.443407  1.245856e+02   53.614232  234.415611
flows2_laplace   0.300178  0.226788  3.795476e-01   18.104623    0.390728
flows2_studentt  0.073123  0.157006  2.298207e-01   13.645127    0.551037
flows3_normal    0.422683  0.442570  9.610837e+01   54.212164  236.924892
flows3_laplace   0.430105  0.111586  3.137124e+00   19.845828   21.157070
flows3_studentt  0.210896  0.089414  1.791132e+00   13.756433    3.137680
Cocycle_CMMD_V   0.020375  0.023895  3.876947e-02   10.763269    0.024385

=== 2var_nonlinear ===
                   KS_int     KS_CF         W1_CF      W1_int     RMSE_CF
flows1_normal    0.748372  0.631330  1.008780e+02  671.698329  337.

In [31]:
import torch
import pandas as pd
import numpy as np

# … your loading boilerplate …

k = 2   # ← only consider the first k base‐types for “best” selection

tables = {}
for scm in scms:
    data = {}

    for variant, runs in flow_sets.items():
        flow_runs = [r for r in runs if r['scm'] == scm]
        if not flow_runs:
            continue

        vals    = {met: [] for met in metrics}
        indices = []

        for run in flow_runs:
            # all possible bases in this run
            base_types = [b for b in run.keys() if b != 'scm']

            # restrict to the first k
            candidates = base_types[:k]

            # pick best among those k
            best_base = min(
                candidates,
                key=lambda b: float(np.min(run[b]['cv_scores']))
            )

            # grab its “info” (at best_idx)
            best_info = run[best_base][best_base]
            indices.append(int(best_info['index']))

            for met in metrics:
                x = best_info.get(met, [None])[0]
                vals[met].append(x.item() if hasattr(x, 'item') else float(x))

        data[f"{variant}_best_k{k}"] = {
            **{met: np.std(vals[met]) for met in metrics},
            'best_idx': np.mean(indices)
        }

    # --- cocycle methods (unchanged) ---
    cocy_runs = [r for r in cocycles if r['scm'] == scm]
    if cocy_runs:
        cocy_methods = [k for k in cocy_runs[0].keys() if k != 'scm']
        for m in cocy_methods:
            vals = {met: [] for met in metrics}
            for run in cocy_runs:
                info = run[m]
                for met in metrics:
                    for x in info.get(met, []):
                        vals[met].append(x.item() if hasattr(x, 'item') else float(x))
            data[m] = {met: np.std(vals[met]) for met in metrics}
    tables[scm] = pd.DataFrame.from_dict(
        data, orient='index', columns=metrics + ['best_idx']
    )

In [8]:
for scm_name, df in tables.items():
    print(f"\n=== {scm_name} ===")
    print(df)


=== 2var_linear ===
                   KS_int     KS_CF       W1_CF     W1_int     RMSE_CF
flows1_normal    0.193606  0.265996   79.198449  49.972140  236.931084
flows1_laplace   0.130257  0.184517   15.161492  21.284450  202.000206
flows1_studentt  0.079353  0.122142    1.208272  11.638862   25.435562
flows2_normal    0.409096  0.443407  124.585585  53.614232  234.415611
flows2_laplace   0.311757  0.239388    1.460300  17.746339   32.702805
flows2_studentt  0.073123  0.157006    0.229821  13.645127    0.551037
flows3_normal    0.422683  0.442570   96.108373  54.212164  236.924892
flows3_laplace   0.402669  0.147898    7.194635  19.011633   80.918861
flows3_studentt  0.213441  0.065560    1.685639  13.719285    0.028256

=== 2var_nonlinear ===
                   KS_int     KS_CF        W1_CF     W1_int     RMSE_CF
flows1_normal    0.282324  0.373825   430.004306  59.172328  233.964162
flows1_laplace   0.129977  0.244358    15.093713  22.109622  200.067438
flows1_studentt  0.064685  0.