# KSG Global Density Analysis
---

## Data & Inputs

- **Session-level networks (KSG):**  
  Files: `Results/ksg_results/sub-XXX_ses-YYY_combined_ksg.pkl` (one per session).  
  Each file is the combined `ResultsNetworkInference` object created from the 23 per-target IDTxl results (see aggregation notebook).

- **Metadata:**  
  `subject_session_metadata.csv` with columns at minimum: `subject`, `session`, `group` (values: `healthy`, `PD-off`, `PD-on`).  
  We form a key `sub_ses = subject_session` to join sessions to groups.

- **Adjacency used:**  
  From each combined result we extract `res.get_adjacency_matrix('binary', fdr=False)`.  
  *Important:* `fdr=False` (no across-edge FDR post-correction); diagonal is zeroed to count only directed, non-self edges.

---

## What the code computes

### Session-level **density**

- **Binary network**
- **Number of candidate directed edges**
- **Edges present**
- **Density**


In [1]:
# ============================
# STEP 1: Global metric = density (KSG)
# ============================
from pathlib import Path
import pickle, math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from itertools import combinations

BASE      = Path("/lustre/majlepy2/myproject")
RESULTS   = BASE / "Results" / "ksg_results"
META_CSV  = BASE / "subject_session_metadata.csv"

OUTBASE   = Path("/home/majlepy2/myproject/Step-wise/Global-level")
OUTBASE.mkdir(parents=True, exist_ok=True)

# --- Load metadata ---
meta = pd.read_csv(META_CSV)
meta["sub_ses"] = meta["subject"] + "_" + meta["session"]
subses_to_group = dict(zip(meta["sub_ses"], meta["group"]))

# --- Find combined sessions ---
session_pkls = sorted(RESULTS.glob("sub-*_*_combined_ksg.pkl"))
print(f"Found {len(session_pkls)} combined session files.")

def get_binary(res, fdr=False):
    A = np.array(res.get_adjacency_matrix("binary", fdr=fdr))  # AdjacencyMatrix -> array
    A = A.astype(np.uint8)
    np.fill_diagonal(A, 0)
    return A

rows = []
for pkl in session_pkls:
    stem = pkl.name.replace("_combined_ksg.pkl", "")  # "sub-XXX_ses-YY"
    sub, ses = stem.split("_", 1)
    sub_ses = f"{sub}_{ses}"
    group = subses_to_group.get(sub_ses)
    if group is None:
        print(f"[WARN] {sub_ses} not in metadata; skipping.")
        continue

    with open(pkl, "rb") as f:
        res = pickle.load(f)

    A = get_binary(res, fdr=False)         # (N,N)
    N = A.shape[0]
    E = N*(N-1)
    edges_present = int(A.sum())
    density = edges_present / E

    rows.append({
        "subject": sub,
        "session": ses,
        "sub_ses": sub_ses,
        "group": group,
        "N_nodes": N,
        "E_directed": E,
        "edges_present": edges_present,
        "density": density,
    })

df = pd.DataFrame(rows)
csv_metrics = OUTBASE / "ksg_session_metrics.csv"
df.to_csv(csv_metrics, index=False)
print(f"Saved session metrics: {csv_metrics}")

# ------------------------------
# Permutation tests
# ------------------------------
rng = np.random.default_rng(20250829)

def paired_signflip_p(on_vals, off_vals):
    d = np.asarray(on_vals) - np.asarray(off_vals)   # S
    S = d.size
    if S == 0:
        return np.nan, 0
    M = 1 << S  # exact 2^S
    obs = d.mean()
    hits = 0
    for code in range(M):
        signs = np.fromiter(((1 if (code >> b) & 1 else -1) for b in range(S)), dtype=np.int8)
        perm = (signs * d).mean()
        if abs(perm) >= abs(obs):
            hits += 1
    p = (hits + 1.0) / (M + 1.0)
    return p, M

def indep_label_exchange_p(a_vals, b_vals, exact_cap=50000, desired_perm=20000):
    a_vals = np.asarray(a_vals); b_vals = np.asarray(b_vals)
    SA, SB = a_vals.size, b_vals.size
    if SA == 0 or SB == 0:
        return np.nan, 0
    allx = np.concatenate([a_vals, b_vals])
    obs = a_vals.mean() - b_vals.mean()
    S_total = SA + SB
    total_unique = math.comb(S_total, SA)
    if total_unique <= exact_cap:
        hits = 0
        idx = np.arange(S_total)
        for A_idx in combinations(idx, SA):
            A_idx = np.fromiter(A_idx, dtype=int)
            B_idx = np.setdiff1d(idx, A_idx, assume_unique=False)
            perm = allx[A_idx].mean() - allx[B_idx].mean()
            if abs(perm) >= abs(obs):
                hits += 1
        p = (hits + 1.0) / (total_unique + 1.0)
        return p, total_unique
    else:
        n_perm = min(desired_perm, total_unique)
        hits = 0
        labels = np.array([0]*SA + [1]*SB)
        for _ in range(n_perm):
            pl = rng.permutation(labels)
            perm = allx[pl==0].mean() - allx[pl==1].mean()
            if abs(perm) >= abs(obs):
                hits += 1
        p = (hits + 1.0) / (n_perm + 1.0)
        return p, n_perm

def group_vals(metric, g):
    return df[df.group==g][metric].dropna().values

def paired_vals(metric):
    off = df[df.group=="PD-off"][["subject", metric]].dropna()
    on  = df[df.group=="PD-on" ][["subject", metric]].dropna()
    j = pd.merge(off, on, on="subject", how="inner")
    return j[metric + "_y"].values, j[metric + "_x"].values  # ON, OFF

# Only one metric here: density
metric = "density"
p_HvOFF, n1 = indep_label_exchange_p(group_vals(metric,"healthy"), group_vals(metric,"PD-off"))
p_HvON,  n2 = indep_label_exchange_p(group_vals(metric,"healthy"), group_vals(metric,"PD-on"))
onv, offv   = paired_vals(metric)
p_ONmOFF, n3 = paired_signflip_p(onv, offv)

tests_df = pd.DataFrame([{
    "metric": metric,
    "p_Healthy_vs_PDoff": p_HvOFF, "perms_HvsOFF": int(n1),
    "p_Healthy_vs_PDon":  p_HvON,  "perms_HvsON":  int(n2),
    "p_PDon_minus_PDoff": p_ONmOFF, "perms_paired": int(n3),
}])

out_tests = OUTBASE / "ksg_global_permutation_tests.csv"
tests_df.to_csv(out_tests, index=False)
print("Permutation results:\n", tests_df.to_string(index=False))
print(f"Saved tests summary: {out_tests}")

# ------------------------------
# Plots
# ------------------------------
def violin_simple(metric, ylabel, fname):
    order = ["healthy","PD-off","PD-on"]
    data = [group_vals(metric, g) for g in order]
    fig, ax = plt.subplots(figsize=(6,4), dpi=150)
    ax.violinplot(data, showmeans=False, showmedians=False, showextrema=False)
    # strip overlay
    xpos = np.repeat(np.arange(1, len(order)+1), [len(d) for d in data])
    vals = np.concatenate(data) if len(data) else np.array([])
    if vals.size:
        ax.scatter(xpos, vals, s=14, alpha=0.7)
    ax.set_xticks(range(1, len(order)+1))
    ax.set_xticklabels(order)
    ax.set_ylabel(ylabel); ax.set_title(f"KSG {metric} by group")
    fig.tight_layout()
    fig.savefig(OUTBASE / fname, bbox_inches="tight"); plt.close(fig)
    print(f"Saved: {OUTBASE/fname}")

violin_simple("density", "Density (fraction of edges)", "ksg_density_by_group.png")

def spaghetti(metric, ylabel, fname):
    off = df[df.group=="PD-off"][["subject", metric]].dropna()
    on  = df[df.group=="PD-on" ][["subject", metric]].dropna()
    j = pd.merge(off, on, on="subject", how="inner")
    if j.empty:
        print(f"No paired data for {metric}; skipping.")
        return
    fig, ax = plt.subplots(figsize=(5.5,4), dpi=150)
    for _, r in j.iterrows():
        ax.plot([0,1], [r[metric+"_x"], r[metric+"_y"]], marker="o", alpha=0.8)
    ax.set_xticks([0,1]); ax.set_xticklabels(["OFF","ON"])
    ax.set_ylabel(ylabel); ax.set_title(f"KSG paired OFF→ON: {metric}")
    fig.tight_layout()
    fig.savefig(OUTBASE / fname, bbox_inches="tight"); plt.close(fig)
    print(f"Saved: {OUTBASE/fname}")

spaghetti("density", "Density", "ksg_density_pd_spaghetti.png")


Found 36 combined session files.
Saved session metrics: /home/majlepy2/myproject/Step-wise/Global-level/ksg_session_metrics.csv


  A = np.array(res.get_adjacency_matrix("binary", fdr=fdr))  # AdjacencyMatrix -> array


Permutation results:
  metric  p_Healthy_vs_PDoff  perms_HvsOFF  p_Healthy_vs_PDon  perms_HvsON  p_PDon_minus_PDoff  perms_paired
density            0.698065         20000           0.876756        20000            0.522089          4096
Saved tests summary: /home/majlepy2/myproject/Step-wise/Global-level/ksg_global_permutation_tests.csv
Saved: /home/majlepy2/myproject/Step-wise/Global-level/ksg_density_by_group.png
Saved: /home/majlepy2/myproject/Step-wise/Global-level/ksg_density_pd_spaghetti.png
