In [1]:
import torch
import matplotlib.pyplot as plt
import os
import numpy as np
import copy
os.environ["KMP_DUPLICSCORE_LIB_OK"]="TRUE"

In [5]:
import torch
import pandas as pd
import numpy as np

# Reload data due to state reset
cocycles = torch.load('cocycles_linear_results_n=1000_corr=0.0_dag=False_learnflow=True_trial=50.pt')
flows = torch.load('causalflow_linear_results_n=1000_corr=0.0_dag=False_learnflow=True_trial=50_shift.pt')
urrs = torch.load('urr_linear_results_n=1000_corr=0.0_dag=False_learnflow=True_trial=50_learnable.pt')
bgms = torch.load('couplingbgm_linear_results_n=1000_corr=0.0_dag=False_learnflow=True_trial=50.pt')

metrics = ['KS_int', 'KS_CF', 'W1_CF', 'W1_int', 'RMSE_CF']
noises = sorted({r['noise'] for r in flows + urrs})
tables = {noise: {} for noise in noises}

for noise in noises:
    flow_runs = [r for r in flows if r['noise'] == noise]
    cocy_runs = [r for r in cocycles if r['noise'] == noise]
    urr_runs = [r for r in urrs if r['noise'] == noise]
    bgm_runs = [r for r in bgms if r['noise'] == noise]

    if not (flow_runs or cocy_runs or urr_runs):
        continue

    data = {}

    if flow_runs:
        flow_methods = [k for k in flow_runs[0].keys() if k not in ('noise', 'scm')]
        for m in flow_methods:
            vals = {met: [] for met in metrics}
            for run in flow_runs:
                info = run[m][m]
                for met in metrics:
                    for x in info.get(met, []):
                        vals[met].append(x.item() if hasattr(x, 'item') else float(x))
            data[m] = {met: np.nanmean(vals[met]) for met in metrics}

    if cocy_runs:
        cocy_methods = [k for k in cocy_runs[0].keys() if k not in ('noise', 'scm')]
        for m in cocy_methods:
            vals = {met: [] for met in metrics}
            for run in cocy_runs:
                info = run[m]
                for met in metrics:
                    for x in info.get(met, []):
                        vals[met].append(x.item() if hasattr(x, 'item') else float(x))
            data[m] = {met: np.nanmean(vals[met]) for met in metrics}

    if urr_runs:
        urr_methods = [k for k in urr_runs[0].keys() if k not in ('noise', 'scm')]
        for m in urr_methods:
            vals = {met: [] for met in metrics}
            for run in urr_runs:
                info = run[m]
                for met in metrics:
                    for x in info.get(met, []):
                        vals[met].append(x.item() if hasattr(x, 'item') else float(x))
            data[m] = {met: np.nanmean(vals[met]) for met in metrics}

    if bgm_runs:
        bgm_methods = [k for k in bgm_runs[0].keys() if k not in ('noise', 'scm')]
        for m in bgm_methods:
            vals = {met: [] for met in metrics}
            for run in bgm_runs:
                info = run[m][m]
                for met in metrics:
                    for x in info.get(met, []):
                        vals[met].append(x.item() if hasattr(x, 'item') else float(x))
            data[f'bgm_{m}'] = {met: np.nanmean(vals[met]) for met in metrics}

    df = pd.DataFrame.from_dict(data, orient='index', columns=metrics)
    tables[noise] = df

In [30]:
import torch
import pandas as pd
import numpy as np

# Reload data due to state reset
cocycles = torch.load('cocycles_linear_results_n=1000_corr=0.0_dag=False_learnflow=True_trial=50.pt')
urrs     = torch.load('urr_linear_results_n=1000_corr=0.0_dag=False_learnflow=True_trial=50_learnable.pt')
bgms     = torch.load('couplingbgm_linear_results_n=1000_corr=0.0_dag=False_learnflow=True_trial=50.pt')

def build_metrics_tables(cocycles, flows, urrs, bgms, metrics=None):
    if metrics is None:
        metrics = ['KS_int', 'KS_CF', 'W1_CF', 'W1_int', 'RMSE_CF']

    noises = sorted({r['noise'] for r in flows + cocycles + urrs + bgms})
    tables = {}

    for noise in noises:
        #flow_runs = [r for r in flows    if r['noise'] == noise]
        cocy_runs = [r for r in cocycles if r['noise'] == noise]
        urr_runs  = [r for r in urrs     if r['noise'] == noise]
        bgm_runs  = [r for r in bgms     if r['noise'] == noise]

        if not (flow_runs or cocy_runs or urr_runs or bgm_runs):
            continue

        data = {}

        # cocycle methods
        if cocy_runs:
            methods = [k for k in cocy_runs[0].keys() if k not in ('noise','scm')]
            for m in methods:
                vals, idxs = {met: [] for met in metrics}, []
                for run in cocy_runs:
                    info = run[m]
                    for met in metrics:
                        vals[met].extend(
                            x.item() if hasattr(x, 'item') else float(x)
                            for x in info.get(met, [])
                        )
                    if 'index' in info:
                        idxs.append(int(info['index']))
                entry = {met: np.nanmean(vals[met]) for met in metrics}
                entry['pct_index0'] = 100 * np.mean([i == 0 for i in idxs]) if idxs else np.nan
                data[m] = entry

        # URR methods
        if urr_runs:
            methods = [k for k in urr_runs[0].keys() if k not in ('noise','scm')]
            for m in methods:
                vals, idxs = {met: [] for met in metrics}, []
                for run in urr_runs:
                    info = run[m]
                    for met in metrics:
                        vals[met].extend(
                            x.item() if hasattr(x, 'item') else float(x)
                            for x in info.get(met, [])
                        )
                    if 'index' in info:
                        idxs.append(int(info['index']))
                entry = {met: np.nanmean(vals[met]) for met in metrics}
                entry['pct_index0'] = 100 * np.mean([i == 0 for i in idxs]) if idxs else np.nan
                data[m] = entry

        # BGM methods
        if bgm_runs:
            methods = [k for k in bgm_runs[0].keys() if k not in ('noise','scm')]
            for m in methods:
                vals, idxs = {met: [] for met in metrics}, []
                for run in bgm_runs:
                    info = run[m][m]
                    for met in metrics:
                        vals[met].extend(
                            x.item() if hasattr(x, 'item') else float(x)
                            for x in info.get(met, [])
                        )
                    if 'index' in info:
                        idxs.append(int(info['index']))
                entry = {met: np.nanmean(vals[met]) for met in metrics}
                entry['pct_index0'] = 100 * np.mean([i == 0 for i in idxs]) if idxs else np.nan
                data[f'bgm_{m}'] = entry

        # build DataFrame
        df = pd.DataFrame.from_dict(data, orient='index', columns=metrics+['pct_index0'])
        tables[noise] = df

    return tables

# usage
tables_mu = build_metrics_tables(cocycles, flows, urrs, bgms)
for noise, df in tables_mu.items():
    print(f"\nNoise level: {noise}")
    print(df)


Noise level: cauchy
                  KS_int     KS_CF      W1_CF     W1_int     RMSE_CF  \
Cocycle_CMMD_V  0.026959  0.032903   0.056667   8.736446    0.039640   
Cocycle_CMMD_U  0.008360  0.031908   0.052415   0.011566    0.033198   
URR_G           0.083004  0.061925   0.191893   8.170640    1.742931   
URR_L           0.048810  0.041813   0.085710   8.013987    0.193021   
URR_T           0.023931  0.050222   0.089543  10.072847    0.173351   
bgm_normal      0.120662  0.178745  18.656052  16.453004  113.666810   
bgm_laplace     0.109537  0.134827  10.137122  10.176834   97.745206   
bgm_studentt    0.029461  0.054422   0.597446   7.256052    0.043857   

                pct_index0  
Cocycle_CMMD_V  100.000000  
Cocycle_CMMD_U  100.000000  
URR_G            68.627451  
URR_L            90.196078  
URR_T            88.235294  
bgm_normal        0.000000  
bgm_laplace       4.000000  
bgm_studentt     94.000000  

Noise level: gamma
                  KS_int     KS_CF     W1_CF    W

In [29]:
import torch
import pandas as pd
import numpy as np

# Reload data due to state reset
cocycles = torch.load('cocycles_linear_results_n=1000_corr=0.0_dag=False_learnflow=True_trial=50.pt')
urrs     = torch.load('urr_linear_results_n=1000_corr=0.0_dag=False_learnflow=True_trial=50_learnable.pt')
bgms     = torch.load('couplingbgm_linear_results_n=1000_corr=0.0_dag=False_learnflow=True_trial=50.pt')

def build_metrics_tables(cocycles, flows, urrs, bgms, metrics=None):
    if metrics is None:
        metrics = ['KS_int', 'KS_CF', 'W1_CF', 'W1_int', 'RMSE_CF']

    noises = sorted({r['noise'] for r in flows + cocycles + urrs + bgms})
    tables = {}

    for noise in noises:
        #flow_runs = [r for r in flows    if r['noise'] == noise]
        cocy_runs = [r for r in cocycles if r['noise'] == noise]
        urr_runs  = [r for r in urrs     if r['noise'] == noise]
        bgm_runs  = [r for r in bgms     if r['noise'] == noise]

        if not (flow_runs or cocy_runs or urr_runs or bgm_runs):
            continue

        data = {}

        # cocycle methods
        if cocy_runs:
            methods = [k for k in cocy_runs[0].keys() if k not in ('noise','scm')]
            for m in methods:
                vals, idxs = {met: [] for met in metrics}, []
                for run in cocy_runs:
                    info = run[m]
                    for met in metrics:
                        vals[met].extend(
                            x.item() if hasattr(x, 'item') else float(x)
                            for x in info.get(met, [])
                        )
                    if 'index' in info:
                        idxs.append(int(info['index']))
                entry = {met: np.std(vals[met]) for met in metrics}
                entry['pct_index0'] = 100 * np.mean([i == 0 for i in idxs]) if idxs else np.nan
                data[m] = entry

        # URR methods
        if urr_runs:
            methods = [k for k in urr_runs[0].keys() if k not in ('noise','scm')]
            for m in methods:
                vals, idxs = {met: [] for met in metrics}, []
                for run in urr_runs:
                    info = run[m]
                    for met in metrics:
                        vals[met].extend(
                            x.item() if hasattr(x, 'item') else float(x)
                            for x in info.get(met, [])
                        )
                    if 'index' in info:
                        idxs.append(int(info['index']))
                entry = {met: np.std(vals[met]) for met in metrics}
                entry['pct_index0'] = 100 * np.mean([i == 0 for i in idxs]) if idxs else np.nan
                data[m] = entry

        # BGM methods
        if bgm_runs:
            methods = [k for k in bgm_runs[0].keys() if k not in ('noise','scm')]
            for m in methods:
                vals, idxs = {met: [] for met in metrics}, []
                for run in bgm_runs:
                    info = run[m][m]
                    for met in metrics:
                        vals[met].extend(
                            x.item() if hasattr(x, 'item') else float(x)
                            for x in info.get(met, [])
                        )
                    if 'index' in info:
                        idxs.append(int(info['index']))
                entry = {met: np.std(vals[met]) for met in metrics}
                entry['pct_index0'] = 100 * np.mean([i == 0 for i in idxs]) if idxs else np.nan
                data[f'bgm_{m}'] = entry

        # build DataFrame
        df = pd.DataFrame.from_dict(data, orient='index', columns=metrics+['pct_index0'])
        tables[noise] = df

    return tables

# usage
tables_sd = build_metrics_tables(cocycles, flows, urrs, bgms)
for noise, df in tables_sd.items():
    print(f"\nNoise level: {noise}")
    print(df)


Noise level: cauchy
                  KS_int     KS_CF      W1_CF     W1_int     RMSE_CF  \
Cocycle_CMMD_V  0.009479  0.009009   0.022063  11.514008    0.030910   
Cocycle_CMMD_U  0.003375  0.008904   0.019691   0.007314    0.027133   
URR_G           0.034170  0.056379   0.270730   4.708630    4.543963   
URR_L           0.022478  0.025690   0.116386   4.669668    0.674765   
URR_T           0.019857  0.056843   0.130510  11.763247    0.757929   
bgm_normal      0.079151  0.086165  34.849693  25.983843  341.875389   
bgm_laplace     0.068689  0.055771  23.405036  12.076095  351.814960   
bgm_studentt    0.007462  0.018985   0.096117   4.709804    0.053443   

                pct_index0  
Cocycle_CMMD_V  100.000000  
Cocycle_CMMD_U  100.000000  
URR_G            68.627451  
URR_L            90.196078  
URR_T            88.235294  
bgm_normal        0.000000  
bgm_laplace       4.000000  
bgm_studentt     94.000000  

Noise level: gamma
                  KS_int     KS_CF     W1_CF    W

In [34]:
tables_mu

{'cauchy':                   KS_int     KS_CF      W1_CF     W1_int     RMSE_CF  \
 Cocycle_CMMD_V  0.026959  0.032903   0.056667   8.736446    0.039640   
 Cocycle_CMMD_U  0.008360  0.031908   0.052415   0.011566    0.033198   
 URR_G           0.083004  0.061925   0.191893   8.170640    1.742931   
 URR_L           0.048810  0.041813   0.085710   8.013987    0.193021   
 URR_T           0.023931  0.050222   0.089543  10.072847    0.173351   
 bgm_normal      0.120662  0.178745  18.656052  16.453004  113.666810   
 bgm_laplace     0.109537  0.134827  10.137122  10.176834   97.745206   
 bgm_studentt    0.029461  0.054422   0.597446   7.256052    0.043857   
 
                 pct_index0  
 Cocycle_CMMD_V  100.000000  
 Cocycle_CMMD_U  100.000000  
 URR_G            68.627451  
 URR_L            90.196078  
 URR_T            88.235294  
 bgm_normal        0.000000  
 bgm_laplace       4.000000  
 bgm_studentt     94.000000  ,
 'gamma':                   KS_int     KS_CF     W1_CF    W1

In [46]:
import pandas as pd

def combine_mean_se_tables(mean_tables: dict, se_tables: dict, fmt: str = "{:.3f}") -> dict:
    """
    Combine two dicts of pandas DataFrames (means and SEs) into a dict of DataFrames
    where each cell is formatted as "mean ± se".
    
    Parameters:
    - mean_tables: dict mapping noise names to DataFrame of mean values.
    - se_tables: dict mapping noise names to DataFrame of SE values.
    - fmt: format string for numeric formatting (default 3 decimal places).
    
    Returns:
    - combined_tables: dict mapping noise names to DataFrame of formatted strings.
    """
    combined = {}
    for noise, mean_df in mean_tables.items():
        if noise not in se_tables:
            raise KeyError(f"Missing SE table for noise '{noise}'")
        se_df = se_tables[noise]
        # Verify matching structure
        if not (mean_df.index.equals(se_df.index) and mean_df.columns.equals(se_df.columns)):
            raise ValueError(f"Mean/SE table mismatch for noise '{noise}'")
        # Create combined DataFrame
        df_comb = mean_df.copy().astype(str)
        for col in mean_df.columns:
            df_comb[col] = [
                f"{fmt.format(mean_df.loc[idx, col])} ± {fmt.format(se_df.loc[idx, col])}"
                for idx in mean_df.index
            ]
        combined[noise] = df_comb
    return combined

# Usage example (uncomment when mean_tables and se_tables are defined):
combined_tables = combine_mean_se_tables(tables_mu, tables_sd)
print(combined_tables['rademacher'])

                       KS_int          KS_CF          W1_CF         W1_int  \
Cocycle_CMMD_V  0.271 ± 0.031  0.031 ± 0.009  0.046 ± 0.015  0.039 ± 0.018   
Cocycle_CMMD_U  0.268 ± 0.008  0.030 ± 0.008  0.045 ± 0.014  0.011 ± 0.009   
URR_G           0.401 ± 0.037  0.071 ± 0.031  0.108 ± 0.054  0.197 ± 0.049   
URR_L           0.418 ± 0.045  0.087 ± 0.135    0.104 ± nan    0.257 ± nan   
URR_T           0.413 ± 0.032  0.068 ± 0.027  0.114 ± 0.075  0.214 ± 0.055   
bgm_normal      0.405 ± 0.069  0.031 ± 0.017  0.092 ± 0.052  0.122 ± 0.109   
bgm_laplace     0.415 ± 0.067  0.067 ± 0.026  0.249 ± 0.141  0.150 ± 0.119   
bgm_studentt    0.412 ± 0.062  0.040 ± 0.029  0.112 ± 0.057  0.104 ± 0.068   

                      RMSE_CF         pct_index0  
Cocycle_CMMD_V  0.017 ± 0.019    98.000 ± 98.000  
Cocycle_CMMD_U  0.014 ± 0.012  100.000 ± 100.000  
URR_G           0.240 ± 0.158      0.000 ± 0.000  
URR_L             0.242 ± nan      4.082 ± 4.082  
URR_T           0.260 ± 0.194      0.000 ±