In [1]:
import torch
import matplotlib.pyplot as plt
import os
import numpy as np
import copy
os.environ["KMP_DUPLICSCORE_LIB_OK"]="TRUE"

In [2]:
import torch
import pandas as pd
import numpy as np

# Reload data due to state reset
cocycles = torch.load('cocycles_linear_results_n=1000_corr=0.0_dag=False_learnflow=True_trial=50.pt')
bgms     = torch.load('couplingbgm_linear_results_n=1000_corr=0.0_dag=False_learnflow=True_trial=50.pt')

def build_metrics_tables(cocycles, bgms, metrics=None):
    if metrics is None:
        metrics = ['KS_int', 'KS_CF', 'W1_CF', 'W1_int', 'RMSE_CF']

    noises = sorted({r['noise'] for r in cocycles + bgms})
    tables = {}

    for noise in noises:
        #flow_runs = [r for r in flows    if r['noise'] == noise]
        cocy_runs = [r for r in cocycles if r['noise'] == noise]
        bgm_runs  = [r for r in bgms     if r['noise'] == noise]

        if not (cocy_runs or bgm_runs):
            continue

        data = {}

        # cocycle methods
        if cocy_runs:
            methods = [k for k in cocy_runs[0].keys() if k not in ('noise','scm')]
            for m in methods:
                vals, idxs = {met: [] for met in metrics}, []
                for run in cocy_runs:
                    info = run[m]
                    for met in metrics:
                        vals[met].extend(
                            x.item() if hasattr(x, 'item') else float(x)
                            for x in info.get(met, [])
                        )
                    if 'index' in info:
                        idxs.append(int(info['index']))
                entry = {met: np.nanmean(vals[met]) for met in metrics}
                entry['pct_index0'] = 100 * np.mean([i == 0 for i in idxs]) if idxs else np.nan
                data[m] = entry

        # BGM methods
        if bgm_runs:
            methods = [k for k in bgm_runs[0].keys() if k not in ('noise','scm')]
            for m in methods:
                vals, idxs = {met: [] for met in metrics}, []
                for run in bgm_runs:
                    info = run[m][m]
                    for met in metrics:
                        vals[met].extend(
                            x.item() if hasattr(x, 'item') else float(x)
                            for x in info.get(met, [])
                        )
                    if 'index' in info:
                        idxs.append(int(info['index']))
                entry = {met: np.nanmean(vals[met]) for met in metrics}
                entry['pct_index0'] = 100 * np.mean([i == 0 for i in idxs]) if idxs else np.nan
                data[f'bgm_{m}'] = entry

        # build DataFrame
        df = pd.DataFrame.from_dict(data, orient='index', columns=metrics+['pct_index0'])
        tables[noise] = df

    return tables

# usage
tables_mu = build_metrics_tables(cocycles, bgms)
for noise, df in tables_mu.items():
    print(f"\nNoise level: {noise}")
    print(df)


Noise level: cauchy
                  KS_int     KS_CF      W1_CF     W1_int     RMSE_CF  \
Cocycle_CMMD_V  0.026959  0.032903   0.056667   8.736446    0.039640   
Cocycle_CMMD_U  0.008360  0.031908   0.052415   0.011566    0.033198   
bgm_normal      0.119621  0.176226  18.952671  16.193027  113.655442   
bgm_laplace     0.101811  0.139123  10.854441   9.905204  107.302449   
bgm_studentt    0.032337  0.053572   0.602800   7.262445    0.044520   

                pct_index0  
Cocycle_CMMD_V       100.0  
Cocycle_CMMD_U       100.0  
bgm_normal             0.0  
bgm_laplace            0.0  
bgm_studentt          98.0  

Noise level: gamma
                  KS_int     KS_CF     W1_CF    W1_int   RMSE_CF  pct_index0
Cocycle_CMMD_V  0.026569  0.030542  0.045385  0.036997  0.019929       100.0
Cocycle_CMMD_U  0.011620  0.031451  0.046794  0.005554  0.021808       100.0
bgm_normal      0.091818  0.057641  0.107262  0.174371  0.252470        24.0
bgm_laplace     0.075705  0.094035  0.186644

In [3]:
import torch
import pandas as pd
import numpy as np

# Reload data due to state reset
cocycles = torch.load('cocycles_linear_results_n=1000_corr=0.0_dag=False_learnflow=True_trial=50.pt')
bgms     = torch.load('couplingbgm_linear_results_n=1000_corr=0.0_dag=False_learnflow=True_trial=50.pt')

def build_metrics_tables(cocycles, bgms, metrics=None):
    if metrics is None:
        metrics = ['KS_int', 'KS_CF', 'W1_CF', 'W1_int', 'RMSE_CF']

    noises = sorted({r['noise'] for r in cocycles + bgms})
    tables = {}

    for noise in noises:
        #flow_runs = [r for r in flows    if r['noise'] == noise]
        cocy_runs = [r for r in cocycles if r['noise'] == noise]
        bgm_runs  = [r for r in bgms     if r['noise'] == noise]

        if not (cocy_runs or bgm_runs):
            continue

        data = {}

        # cocycle methods
        if cocy_runs:
            methods = [k for k in cocy_runs[0].keys() if k not in ('noise','scm')]
            for m in methods:
                vals, idxs = {met: [] for met in metrics}, []
                for run in cocy_runs:
                    info = run[m]
                    for met in metrics:
                        vals[met].extend(
                            x.item() if hasattr(x, 'item') else float(x)
                            for x in info.get(met, [])
                        )
                    if 'index' in info:
                        idxs.append(int(info['index']))
                entry = {met: np.std(vals[met]) for met in metrics}
                entry['pct_index0'] = 100 * np.mean([i == 0 for i in idxs]) if idxs else np.nan
                data[m] = entry

        # BGM methods
        if bgm_runs:
            methods = [k for k in bgm_runs[0].keys() if k not in ('noise','scm')]
            for m in methods:
                vals, idxs = {met: [] for met in metrics}, []
                for run in bgm_runs:
                    info = run[m][m]
                    for met in metrics:
                        vals[met].extend(
                            x.item() if hasattr(x, 'item') else float(x)
                            for x in info.get(met, [])
                        )
                    if 'index' in info:
                        idxs.append(int(info['index']))
                entry = {met: np.std(vals[met]) for met in metrics}
                entry['pct_index0'] = 100 * np.mean([i == 0 for i in idxs]) if idxs else np.nan
                data[f'bgm_{m}'] = entry

        # build DataFrame
        df = pd.DataFrame.from_dict(data, orient='index', columns=metrics+['pct_index0'])
        tables[noise] = df

    return tables

# usage
tables_sd = build_metrics_tables(cocycles, bgms)
for noise, df in tables_sd.items():
    print(f"\nNoise level: {noise}")
    print(df)


Noise level: cauchy
                  KS_int     KS_CF      W1_CF     W1_int     RMSE_CF  \
Cocycle_CMMD_V  0.009479  0.009009   0.022063  11.514008    0.030910   
Cocycle_CMMD_U  0.003375  0.008904   0.019691   0.007314    0.027133   
bgm_normal      0.074399  0.079518  35.064360  25.944073  341.947000   
bgm_laplace     0.058137  0.054536  23.274883  12.119518  351.572114   
bgm_studentt    0.020853  0.028904   0.109998   4.712865    0.091183   

                pct_index0  
Cocycle_CMMD_V       100.0  
Cocycle_CMMD_U       100.0  
bgm_normal             0.0  
bgm_laplace            0.0  
bgm_studentt          98.0  

Noise level: gamma
                  KS_int     KS_CF     W1_CF    W1_int   RMSE_CF  pct_index0
Cocycle_CMMD_V  0.007682  0.008241  0.014520  0.014807  0.015375       100.0
Cocycle_CMMD_U  0.005161  0.008293  0.015187  0.002996  0.016316       100.0
bgm_normal      0.048967  0.031548  0.065373  0.093801  0.141891        24.0
bgm_laplace     0.039210  0.024385  0.067517

In [4]:
import pandas as pd

def combine_mean_se_tables(mean_tables: dict, se_tables: dict, fmt: str = "{:.3f}") -> dict:
    """
    Combine two dicts of pandas DataFrames (means and SEs) into a dict of DataFrames
    where each cell is formatted as "mean ± se".
    
    Parameters:
    - mean_tables: dict mapping noise names to DataFrame of mean values.
    - se_tables: dict mapping noise names to DataFrame of SE values.
    - fmt: format string for numeric formatting (default 3 decimal places).
    
    Returns:
    - combined_tables: dict mapping noise names to DataFrame of formatted strings.
    """
    combined = {}
    for noise, mean_df in mean_tables.items():
        if noise not in se_tables:
            raise KeyError(f"Missing SE table for noise '{noise}'")
        se_df = se_tables[noise]
        # Verify matching structure
        if not (mean_df.index.equals(se_df.index) and mean_df.columns.equals(se_df.columns)):
            raise ValueError(f"Mean/SE table mismatch for noise '{noise}'")
        # Create combined DataFrame
        df_comb = mean_df.copy().astype(str)
        for col in mean_df.columns:
            df_comb[col] = [
                f"{fmt.format(mean_df.loc[idx, col])} ± {fmt.format(se_df.loc[idx, col])}"
                for idx in mean_df.index
            ]
        combined[noise] = df_comb
    return combined

# Usage example (uncomment when mean_tables and se_tables are defined):
combined_tables = combine_mean_se_tables(tables_mu, tables_sd)
print(combined_tables['rademacher'])

                       KS_int          KS_CF          W1_CF         W1_int  \
Cocycle_CMMD_V  0.271 ± 0.031  0.031 ± 0.009  0.046 ± 0.015  0.039 ± 0.018   
Cocycle_CMMD_U  0.268 ± 0.008  0.030 ± 0.008  0.045 ± 0.014  0.011 ± 0.009   
bgm_normal      0.408 ± 0.071  0.031 ± 0.017  0.094 ± 0.051  0.114 ± 0.092   
bgm_laplace     0.415 ± 0.067  0.067 ± 0.026  0.249 ± 0.141  0.150 ± 0.119   
bgm_studentt    0.412 ± 0.062  0.040 ± 0.029  0.112 ± 0.057  0.104 ± 0.068   

                      RMSE_CF         pct_index0  
Cocycle_CMMD_V  0.017 ± 0.019    98.000 ± 98.000  
Cocycle_CMMD_U  0.014 ± 0.012  100.000 ± 100.000  
bgm_normal      0.331 ± 0.254      0.000 ± 0.000  
bgm_laplace     0.480 ± 0.294      0.000 ± 0.000  
bgm_studentt    0.391 ± 0.307      0.000 ± 0.000  
