# KSG Results Aggregation

This pipeline combines the per-target **KSG estimator** results (GPU/OpenCL) into session-level and group-level datasets.

## Workflow

1. **Per-target results**
   - Each `(subject, session, target)` produces a `.pkl` result file.  
   - Expected: **23 results per session** (one per target node).  

2. **Session-level combination**
   - Check each `sub-*/ses-*` folder contains exactly 23 `.pkl` files.  
   - Load all per-target results.  
   - Clean per-target settings (e.g., `target`, `filename_ckp`) to avoid conflicts.  
   - Combine into a `ResultsNetworkInference` object.  
   - Save as:  
     ```
     sub-XXX_ses-YYY_combined_ksg.pkl
     ```

3. **Group-level aggregation**
   - Metadata (`subject_session_metadata.csv`) provides group labels.  
   - Collect all session-level results per group.  
   - Save one mega-`.pkl` per group:  
     ```
     Healthy_ksg_all_sessions.pkl
     PD-off_ksg_all_sessions.pkl
     PD-on_ksg_all_sessions.pkl
     ```

4. **Summary CSV**
   - Generate `ksg_results_summary.csv` listing subject, session, group, and file paths.  

## Outputs

- **Session combined files:** one `.pkl` per `(subject, session)`  
- **Group combined files:** one `.pkl` per group  
- **CSV summary:** `ksg_results_summary.csv`  


In [None]:
import os
from pathlib import Path
import pickle
import pandas as pd
from idtxl.results import ResultsNetworkInference

# ----------------------------------------------------------------------
# 1. Path setup
# ----------------------------------------------------------------------
BASE_DIR = Path("/lustre/majlepy2/myproject")
RESULTS_DIR = BASE_DIR / "Results"
ksg_source_dir = BASE_DIR / "ksg_mte_50"

ksg_combined_dir = RESULTS_DIR / "ksg_results"
ksg_combined_dir.mkdir(exist_ok=True, parents=True)

# ----------------------------------------------------------------------
# 2. Load metadata (for cross-checking)
# ----------------------------------------------------------------------
meta = pd.read_csv(BASE_DIR / "subject_session_metadata.csv")
meta['sub_ses'] = meta['subject'] + '/' + meta['session']

# ----------------------------------------------------------------------
# 3. Identify session dirs with 23 target results
# ----------------------------------------------------------------------
session_dirs = []
for subses_dir in sorted(ksg_source_dir.glob("sub-*/ses-*")):
    pkl_files = list(subses_dir.glob("*.pkl"))
    if len(pkl_files) == 23:
        session_dirs.append(subses_dir)
    else:
        print(f"Skipping {subses_dir}: found {len(pkl_files)} pkl files (expected 23)")
print(f"Found {len(session_dirs)} sessions with 23 targets.")

# ----------------------------------------------------------------------
# 4. Combine 23 per-target results into one session object
# ----------------------------------------------------------------------
for subses_dir in session_dirs:
    subj = subses_dir.parts[-2]
    sess = subses_dir.parts[-1]
    combined_name = f"{subj}_{sess}_combined_ksg.pkl"
    combined_path = ksg_combined_dir / combined_name

    pkl_files = sorted(subses_dir.glob("*.pkl"))
    results = []
    for pf in pkl_files:
        with open(pf, "rb") as f:
            obj = pickle.load(f)
            # Drop per-target settings not needed for session-level merge
            for k in ['target', 'filename_ckp', 'write_ckp', 'loglevel']:
                obj.settings.pop(k, None)
            results.append(obj)

    example = results[0]
    combined_result = ResultsNetworkInference(
        n_nodes=example.data_properties['n_nodes'],
        n_realisations=example.data_properties['n_realisations'],
        normalised=example.data_properties['normalised'],
    )
    combined_result.combine_results(*results)

    with open(combined_path, "wb") as f:
        pickle.dump(combined_result, f)
    print(f"Combined and saved: {combined_path}")

# ----------------------------------------------------------------------
# 5. Validate one combined file
# ----------------------------------------------------------------------
with open(combined_path, "rb") as f:
    comb = pickle.load(f)
print("Combined object type:", type(comb))
print("Targets analyzed:", comb.targets_analysed)

# ----------------------------------------------------------------------
# 6. Group aggregation and summary export
# ----------------------------------------------------------------------
ksg_dir = RESULTS_DIR / "ksg_results"
meta = pd.read_csv("/lustre/majlepy2/myproject/subject_session_metadata.csv")
meta['sub_ses'] = meta['subject'] + '_' + meta['session']
group_dict = dict(zip(meta['sub_ses'], meta['group']))

combined_files = sorted(ksg_dir.glob("*_combined_ksg.pkl"))

group_results = {}     # { group_name: [list of session Results objects] }
summary_rows = []

for f in combined_files:
    fname = f.name
    try:
        stem = fname.replace("_combined_ksg.pkl", "")
        subj, sess = stem.split("_", 1)
        sub_ses = f"{subj}_{sess}"
    except Exception as e:
        print(f"Skipping {fname}: couldn't parse subject/session ({e})")
        continue

    group = group_dict.get(sub_ses)
    if group is None:
        print(f"WARNING: {sub_ses} not found in metadata.")
        continue

    with open(f, "rb") as pf:
        res = pickle.load(pf)

    group_results.setdefault(group, []).append(res)
    summary_rows.append({
        "file": str(f),
        "subject": subj,
        "session": sess,
        "group": group
    })

for group, results_list in group_results.items():
    out_path = ksg_dir / f"{group}_ksg_all_sessions.pkl"
    with open(out_path, "wb") as pf:
        pickle.dump(results_list, pf)
    print(f"Saved {group}: {out_path} ({len(results_list)} sessions)")

summary_df = pd.DataFrame(summary_rows)
summary_csv = ksg_dir / "ksg_results_summary.csv"
summary_df.to_csv(summary_csv, index=False)
print(f"Saved session-to-group mapping: {summary_csv}")
