# Synergy Graph Sufficiency Analysis

Evaluates whether synergy graph structural properties (density, clique coverage, threshold sensitivity) explain the SG-FIGS accuracy gap across datasets.

**5 Core Metrics:**
1. Graph Density vs. Accuracy Gap Correlation
2. Threshold Sensitivity Profile
3. Oblique Activation Rate vs. Accuracy
4. Sparse Graph Correction Counterfactual
5. Stability-Performance Relationship

**Part 1** runs a quick demo on a curated 5-dataset subset.
**Part 2** runs the full analysis on all 12 datasets with original parameters.

In [None]:
import json
import math
import os

import matplotlib.pyplot as plt

In [None]:
GITHUB_FULL_DATA_URL = "https://raw.githubusercontent.com/AMGrobelnik/ai-invention-ac2586-synergy-guided-oblique-splits-using-part/main/synergygraph_ev/demo/full_demo_data.json"
GITHUB_MINI_DATA_URL = "https://raw.githubusercontent.com/AMGrobelnik/ai-invention-ac2586-synergy-guided-oblique-splits-using-part/main/synergygraph_ev/demo/mini_demo_data.json"

def _load_json(url, local_path):
    try:
        import urllib.request
        with urllib.request.urlopen(url) as response:
            return json.loads(response.read().decode())
    except Exception: pass
    if os.path.exists(local_path):
        with open(local_path) as f: return json.load(f)
    raise FileNotFoundError(f"Could not load {local_path}")

def load_mini():
    return _load_json(GITHUB_MINI_DATA_URL, "mini_demo_data.json")

def load_full():
    return _load_json(GITHUB_FULL_DATA_URL, "full_demo_data.json")

## Helper Functions

Statistical helper functions copied from `eval.py` — Spearman/Pearson correlation with tied-rank handling and approximate p-values (scipy-free).

In [None]:
def safe_float(val: float) -> float:
    """Convert NaN/Inf to None-safe float for JSON serialization."""
    if val is None or math.isnan(val) or math.isinf(val):
        return 0.0
    return float(val)


def _normal_cdf(x: float) -> float:
    """Approximate normal CDF using error function approximation."""
    return 0.5 * (1.0 + math.erf(x / math.sqrt(2)))


def spearman_correlation(x: list[float], y: list[float]) -> tuple[float, float]:
    """Compute Spearman rank correlation and approximate p-value.

    Uses scipy-free implementation with tied-rank handling.
    """
    n = len(x)
    if n < 3:
        return 0.0, 1.0

    def rank_data(data: list[float]) -> list[float]:
        """Assign ranks with average tie-breaking."""
        indexed = sorted(enumerate(data), key=lambda t: t[1])
        ranks = [0.0] * n
        i = 0
        while i < n:
            j = i
            while j < n - 1 and indexed[j + 1][1] == indexed[j][1]:
                j += 1
            avg_rank = (i + j) / 2.0 + 1.0
            for k in range(i, j + 1):
                ranks[indexed[k][0]] = avg_rank
            i = j + 1
        return ranks

    rx = rank_data(x)
    ry = rank_data(y)

    # Pearson on ranks
    mean_rx = sum(rx) / n
    mean_ry = sum(ry) / n
    num = sum((rx[i] - mean_rx) * (ry[i] - mean_ry) for i in range(n))
    den_x = math.sqrt(sum((rx[i] - mean_rx) ** 2 for i in range(n)))
    den_y = math.sqrt(sum((ry[i] - mean_ry) ** 2 for i in range(n)))

    if den_x == 0 or den_y == 0:
        return 0.0, 1.0

    rho = num / (den_x * den_y)
    rho = max(-1.0, min(1.0, rho))

    # Approximate two-sided p-value using t-distribution approximation
    if abs(rho) >= 1.0:
        p_val = 0.0
    else:
        t_stat = rho * math.sqrt((n - 2) / (1 - rho**2))
        p_val = 2 * (1 - _normal_cdf(abs(t_stat)))

    return rho, p_val


def pearson_correlation(x: list[float], y: list[float]) -> tuple[float, float]:
    """Compute Pearson correlation and approximate p-value."""
    n = len(x)
    if n < 3:
        return 0.0, 1.0

    mean_x = sum(x) / n
    mean_y = sum(y) / n
    num = sum((x[i] - mean_x) * (y[i] - mean_y) for i in range(n))
    den_x = math.sqrt(sum((x[i] - mean_x) ** 2 for i in range(n)))
    den_y = math.sqrt(sum((y[i] - mean_y) ** 2 for i in range(n)))

    if den_x == 0 or den_y == 0:
        return 0.0, 1.0

    r = num / (den_x * den_y)
    r = max(-1.0, min(1.0, r))

    if abs(r) >= 1.0:
        p_val = 0.0
    else:
        t_stat = r * math.sqrt((n - 2) / (1 - r**2))
        p_val = 2 * (1 - _normal_cdf(abs(t_stat)))

    return r, p_val

In [None]:
def extract_dataset_summaries(data: dict) -> list[dict]:
    """Extract one summary row per dataset from the synergy_graph_sufficiency examples."""
    summaries = []
    for ds_block in data["datasets"]:
        # The first example per dataset is the summary (analysis=synergy_graph_sufficiency)
        for ex in ds_block["examples"]:
            inp = json.loads(ex["input"])
            if inp.get("analysis") == "synergy_graph_sufficiency":
                summaries.append(ex)
                break
    return summaries


def extract_method_examples(data: dict) -> list[dict]:
    """Extract per-method performance examples."""
    methods = []
    for ds_block in data["datasets"]:
        for ex in ds_block["examples"]:
            inp = json.loads(ex["input"])
            if inp.get("analysis") == "method_performance":
                methods.append(ex)
    return methods


def run_analysis(data: dict) -> dict:
    """Run all 5 metrics analysis on loaded data. Returns dict of results."""
    summaries = extract_dataset_summaries(data)
    method_examples = extract_method_examples(data)
    metrics_agg = data["metrics_agg"]
    datasets = [json.loads(s["input"])["dataset"] for s in summaries]

    # --- Metric 1: Graph Density vs Accuracy Gap ---
    densities = [s.get("eval_graph_density_25pct", 0) for s in summaries]
    acc_gaps = [s.get("eval_accuracy_gap", 0) for s in summaries]
    rho1, p1 = spearman_correlation(densities, acc_gaps)

    m1 = {
        "spearman_rho": round(rho1, 4),
        "spearman_p_value": round(p1, 6),
        "per_dataset": {
            json.loads(s["input"])["dataset"]: {
                "graph_density_25pct": s.get("eval_graph_density_25pct", 0),
                "accuracy_gap": s.get("eval_accuracy_gap", 0),
                "avg_synergy_edges": s.get("eval_avg_synergy_edges", 0),
                "n_features": s.get("metadata_n_features", 0),
            }
            for s in summaries
        },
    }

    # --- Metric 2: Threshold Sensitivity ---
    profile_counts = {"monotonic_improving": 0, "monotonic_worsening": 0, "non_monotonic": 0, "flat": 0}
    m2_per_ds = {}
    for s in summaries:
        ds = json.loads(s["input"])["dataset"]
        profile = s.get("metadata_threshold_profile", "unknown")
        if profile in profile_counts:
            profile_counts[profile] += 1
        m2_per_ds[ds] = {
            "figs_accuracy": s.get("eval_figs_accuracy", 0),
            "sg10_accuracy": s.get("eval_sg10_accuracy", 0),
            "sg25_accuracy": s.get("eval_sg25_accuracy", 0),
            "sg50_accuracy": s.get("eval_sg50_accuracy", 0),
            "profile": profile,
        }
    m2 = {"profile_distribution": profile_counts, "per_dataset": m2_per_ds}

    # --- Metric 3: Oblique Activation Rate ---
    oblique_fracs = []
    oblique_accs = []
    zero_oblique = []
    nonzero_oblique = []
    for s in summaries:
        ds = json.loads(s["input"])["dataset"]
        of = s.get("eval_oblique_fraction_sg25", 0)
        acc = s.get("eval_accuracy_sg25", 0)
        oblique_fracs.append(of)
        oblique_accs.append(acc)
        if of == 0.0:
            zero_oblique.append(ds)
        else:
            nonzero_oblique.append(ds)
    r3, p3 = pearson_correlation(oblique_fracs, oblique_accs)
    m3 = {
        "pearson_r": round(r3, 4),
        "pearson_p_value": round(p3, 6),
        "zero_oblique_datasets": zero_oblique,
        "nonzero_oblique_datasets": nonzero_oblique,
    }

    # --- Metric 4: Sparse Graph Correction ---
    discrepancies = []
    m4_per_ds = {}
    for s in summaries:
        ds = json.loads(s["input"])["dataset"]
        if s.get("eval_oblique_fraction_sg25", 0) == 0.0:
            disc = s.get("eval_max_fold_discrepancy", 0)
            identical = s.get("eval_is_identical_to_figs", 1.0) == 1.0
            m4_per_ds[ds] = {"max_fold_discrepancy": disc, "is_identical_to_figs": identical}
            if not identical:
                discrepancies.append(ds)
    m4 = {"n_zero_oblique": len(m4_per_ds), "discrepancies": discrepancies, "per_dataset": m4_per_ds}

    # --- Metric 5: Stability-Performance ---
    jaccards = []
    stab_gaps = []
    m5_per_ds = {}
    for s in summaries:
        ds = json.loads(s["input"])["dataset"]
        j = s.get("eval_mean_jaccard")
        g = s.get("eval_stability_accuracy_gap")
        if j is not None and g is not None:
            jaccards.append(j)
            stab_gaps.append(g)
            m5_per_ds[ds] = {"mean_jaccard": j, "accuracy_gap": g}
    rho5, p5 = spearman_correlation(jaccards, stab_gaps)
    m5 = {"spearman_rho": round(rho5, 4), "spearman_p_value": round(p5, 6), "per_dataset": m5_per_ds}

    return {
        "metrics_agg": metrics_agg,
        "metric1": m1,
        "metric2": m2,
        "metric3": m3,
        "metric4": m4,
        "metric5": m5,
        "summaries": summaries,
        "method_examples": method_examples,
        "datasets": datasets,
    }

## Part 1 — Quick Demo (5-Dataset Subset)

Runs on a curated 5-dataset subset: australian, banknote, breast_cancer, sonar, wine — covering all 4 threshold sensitivity profiles.

In [None]:
data = load_mini()
print(f"Loaded {len(data['datasets'])} datasets, "
      f"{sum(len(d['examples']) for d in data['datasets'])} total examples")
print("Datasets:", [d["dataset"] for d in data["datasets"]])

### Run All 5 Metrics

Extract per-dataset summaries, compute Spearman/Pearson correlations, classify threshold profiles, and check sparse graph correction counterfactuals.

In [None]:
results = run_analysis(data)

print("=" * 60)
print("METRIC 1: Graph Density vs. Accuracy Gap")
print(f"  Spearman rho = {results['metric1']['spearman_rho']}, "
      f"p = {results['metric1']['spearman_p_value']}")
print()
print("METRIC 2: Threshold Sensitivity Profiles")
print(f"  Distribution: {results['metric2']['profile_distribution']}")
for ds, info in results['metric2']['per_dataset'].items():
    print(f"    {ds:20s} FIGS={info['figs_accuracy']:.4f}  "
          f"SG10={info['sg10_accuracy']:.4f}  SG25={info['sg25_accuracy']:.4f}  "
          f"SG50={info['sg50_accuracy']:.4f}  -> {info['profile']}")
print()
print("METRIC 3: Oblique Activation Rate")
print(f"  Pearson r = {results['metric3']['pearson_r']}, "
      f"p = {results['metric3']['pearson_p_value']}")
print(f"  Zero-oblique datasets: {results['metric3']['zero_oblique_datasets']}")
print(f"  Nonzero-oblique datasets: {results['metric3']['nonzero_oblique_datasets']}")
print()
print("METRIC 4: Sparse Graph Correction")
print(f"  Zero-oblique datasets checked: {results['metric4']['n_zero_oblique']}")
print(f"  Discrepancies from FIGS: {results['metric4']['discrepancies']}")
for ds, info in results['metric4']['per_dataset'].items():
    print(f"    {ds:20s} max_discrepancy={info['max_fold_discrepancy']:.8f}  "
          f"identical={info['is_identical_to_figs']}")
print()
print("METRIC 5: Stability-Performance")
print(f"  Spearman rho = {results['metric5']['spearman_rho']}, "
      f"p = {results['metric5']['spearman_p_value']}")
print("=" * 60)

### Visualization

Plots: (1) Graph density vs accuracy gap scatter, (2) Threshold sensitivity bar chart across SG-FIGS thresholds, (3) Stability (Jaccard) vs accuracy gap.

In [None]:
def visualize_results(results: dict, title_prefix: str = "") -> None:
    """Reusable visualization for synergy graph sufficiency analysis."""
    m1 = results["metric1"]
    m2 = results["metric2"]
    m5 = results["metric5"]
    datasets = results["datasets"]

    fig, axes = plt.subplots(1, 3, figsize=(16, 5))

    # --- Plot 1: Graph Density vs Accuracy Gap ---
    ax = axes[0]
    densities = [m1["per_dataset"][ds]["graph_density_25pct"] for ds in datasets if ds in m1["per_dataset"]]
    gaps = [m1["per_dataset"][ds]["accuracy_gap"] for ds in datasets if ds in m1["per_dataset"]]
    ds_labels = [ds for ds in datasets if ds in m1["per_dataset"]]
    ax.scatter(densities, gaps, c="steelblue", edgecolors="black", s=80, zorder=3)
    for i, ds in enumerate(ds_labels):
        ax.annotate(ds, (densities[i], gaps[i]), fontsize=7, ha="center", va="bottom",
                    xytext=(0, 5), textcoords="offset points")
    ax.set_xlabel("Graph Density (25% threshold)")
    ax.set_ylabel("Accuracy Gap (FIGS - SG-FIGS-25)")
    ax.set_title(f"{title_prefix}Density vs Acc Gap\n"
                 f"rho={m1['spearman_rho']}, p={m1['spearman_p_value']:.4f}")
    ax.axhline(y=0, color="gray", linestyle="--", alpha=0.5)
    ax.grid(alpha=0.3)

    # --- Plot 2: Threshold Sensitivity ---
    ax = axes[1]
    ds_names = list(m2["per_dataset"].keys())
    x = range(len(ds_names))
    figs_acc = [m2["per_dataset"][ds]["figs_accuracy"] for ds in ds_names]
    sg10_acc = [m2["per_dataset"][ds]["sg10_accuracy"] for ds in ds_names]
    sg25_acc = [m2["per_dataset"][ds]["sg25_accuracy"] for ds in ds_names]
    sg50_acc = [m2["per_dataset"][ds]["sg50_accuracy"] for ds in ds_names]
    width = 0.2
    ax.bar([i - 1.5*width for i in x], figs_acc, width, label="FIGS", color="gray", alpha=0.8)
    ax.bar([i - 0.5*width for i in x], sg10_acc, width, label="SG-10%", color="lightcoral")
    ax.bar([i + 0.5*width for i in x], sg25_acc, width, label="SG-25%", color="steelblue")
    ax.bar([i + 1.5*width for i in x], sg50_acc, width, label="SG-50%", color="seagreen")
    ax.set_xticks(list(x))
    ax.set_xticklabels(ds_names, rotation=45, ha="right", fontsize=8)
    ax.set_ylabel("Accuracy")
    ax.set_title(f"{title_prefix}Threshold Sensitivity")
    ax.legend(fontsize=7, loc="lower left")
    ax.grid(axis="y", alpha=0.3)

    # --- Plot 3: Stability vs Accuracy Gap ---
    ax = axes[2]
    jaccards = [m5["per_dataset"][ds]["mean_jaccard"] for ds in datasets if ds in m5["per_dataset"]]
    stab_gaps = [m5["per_dataset"][ds]["accuracy_gap"] for ds in datasets if ds in m5["per_dataset"]]
    ds_labels5 = [ds for ds in datasets if ds in m5["per_dataset"]]
    ax.scatter(jaccards, stab_gaps, c="darkorange", edgecolors="black", s=80, zorder=3)
    for i, ds in enumerate(ds_labels5):
        ax.annotate(ds, (jaccards[i], stab_gaps[i]), fontsize=7, ha="center", va="bottom",
                    xytext=(0, 5), textcoords="offset points")
    ax.set_xlabel("Mean Jaccard Stability")
    ax.set_ylabel("Accuracy Gap (FIGS - SG-FIGS-25)")
    ax.set_title(f"{title_prefix}Stability vs Acc Gap\n"
                 f"rho={m5['spearman_rho']}, p={m5['spearman_p_value']:.4f}")
    ax.axhline(y=0, color="gray", linestyle="--", alpha=0.5)
    ax.grid(alpha=0.3)

    plt.tight_layout()
    plt.show()


visualize_results(results, title_prefix="[Mini] ")

## Full Run — Original Parameters

Loads all 12 datasets and re-runs the same analysis with full data. All parameters match the original `eval.py` script.

In [None]:
data = load_full()
print(f"Loaded {len(data['datasets'])} datasets, "
      f"{sum(len(d['examples']) for d in data['datasets'])} total examples")
print("Datasets:", [d["dataset"] for d in data["datasets"]])

results_full = run_analysis(data)

print("=" * 60)
print("METRIC 1: Graph Density vs. Accuracy Gap")
print(f"  Spearman rho = {results_full['metric1']['spearman_rho']}, "
      f"p = {results_full['metric1']['spearman_p_value']}")
print()
print("METRIC 2: Threshold Sensitivity Profiles")
print(f"  Distribution: {results_full['metric2']['profile_distribution']}")
for ds, info in results_full['metric2']['per_dataset'].items():
    print(f"    {ds:20s} FIGS={info['figs_accuracy']:.4f}  "
          f"SG10={info['sg10_accuracy']:.4f}  SG25={info['sg25_accuracy']:.4f}  "
          f"SG50={info['sg50_accuracy']:.4f}  -> {info['profile']}")
print()
print("METRIC 3: Oblique Activation Rate")
print(f"  Pearson r = {results_full['metric3']['pearson_r']}, "
      f"p = {results_full['metric3']['pearson_p_value']}")
print(f"  Zero-oblique datasets: {results_full['metric3']['zero_oblique_datasets']}")
print(f"  Nonzero-oblique datasets: {results_full['metric3']['nonzero_oblique_datasets']}")
print()
print("METRIC 4: Sparse Graph Correction")
print(f"  Zero-oblique datasets checked: {results_full['metric4']['n_zero_oblique']}")
print(f"  Discrepancies from FIGS: {results_full['metric4']['discrepancies']}")
for ds, info in results_full['metric4']['per_dataset'].items():
    print(f"    {ds:20s} max_discrepancy={info['max_fold_discrepancy']:.8f}  "
          f"identical={info['is_identical_to_figs']}")
print()
print("METRIC 5: Stability-Performance")
print(f"  Spearman rho = {results_full['metric5']['spearman_rho']}, "
      f"p = {results_full['metric5']['spearman_p_value']}")
print("=" * 60)

In [None]:
visualize_results(results_full, title_prefix="[Full] ")