In [1]:
"""
Generate descriptive SNBB figures that match the slide template colours.
--------------------------------------------------------------------------

Prereqs:
    pip install matplotlib pandas numpy seaborn
    (seaborn only used for its convenient styling helper)

Replace CSV_PATH if your file lives elsewhere.
"""

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from matplotlib import font_manager as fm

In [2]:
# Standard imports
from pathlib import Path
import pandas as pd
import numpy as np

# Import our refactored modules
from athletes_brain.fig1 import (
    Fig1Config,
    AthletesBrainDataLoader,
    GroupComparison,
    BrainPlotter,
    generate_figure1,
)

# Import specific functions for focused analysis
from athletes_brain.fig1.main import analyze_specific_comparison, get_most_significant_regions

[32m2025-09-26 11:54:05.343[0m | [1mINFO    [0m | [36mathletes_brain.config[0m:[36m<module>[0m:[36m11[0m - [1mPROJ_ROOT path is: /home/galkepler/Projects/athletes_brain[0m


In [3]:
config = Fig1Config()

In [4]:
SOFTWARE = "mrtrix3"
ATLAS = "schaefer2018tian2020_400_7"
RECONSTRUCTION = "SDStream"
SCALE = "None"
WEIGHT = "SIFT2"


DISTRIBUTION_METRIC = "qfmean"
DIFFUSION_MEASURES = ["adc", "fa", "rd", "ad"]
STRUCTURAL_MEASURES = ["gm_vol", "wm_vol", "csf_vol"]
CONNECTOME_MEASURES = [
    "degree",
    "eigenvector_centrality",
    "betweenness_centrality",
    "degree_norm",
    "regional_efficiency",
]

COLUMNS_TO_DROP = ["path", "measure", "subject_id"]

In [5]:
EF_REPLACE_DICT = {
    "1-2": 1.5,
    "3-4": 3.5,
    "5+": 6,
    "1-1": 1.5,
    "12-31": 2.5,
    "1-4": 2.5,
    "פחות מפעם בשבוע": 0.5,
    "יותר מ-3 פעמים בשבוע": 4,
    "פעם בשבוע": 1,
    "פעמיים-שלוש בשבוע": 2.5,
    "1-5": 3,
    "1-3": 2,
}

In [6]:
finished_subjects = (
    pd.read_csv(f"/media/storage/phd/papers/aging/{ATLAS}/subjects.csv").values.flatten().tolist()
)

In [7]:
destination = Path(f"/media/storage/phd/papers/aging/{ATLAS}")

# unique_measures = DIFFUSION_MEASURES + STRUCTURAL_MEASURES + CONNECTOME_MEASURES
unique_measures = DIFFUSION_MEASURES + STRUCTURAL_MEASURES


results = {m: {} for m in unique_measures}

for m in unique_measures:
    print(f"Processing {m}")
    # m = "degree"
    try:
        if m not in CONNECTOME_MEASURES:
            m_data = pd.read_pickle(destination / f"{m}.pkl").reset_index(drop=True)
        else:
            m_data = pd.read_pickle(
                destination / f"connectome_{RECONSTRUCTION}_{SCALE}_{WEIGHT}.pkl"
            ).reset_index(drop=True)
            m_data["measure"] = m
    except FileNotFoundError:
        raise FileNotFoundError(f"Could not find {m}.pkl. Skipping...")
    data = m_data.drop_duplicates(subset=["subject_code", "index"], keep="last")
    # data = m_data.drop_duplicates(subset=["subject_code"], keep="last")
    data = m_data[m_data["age_at_scan"] > 18]
    data = data[data["subject_code"].isin(finished_subjects)]
    data = data.drop(columns=COLUMNS_TO_DROP)
    data["exercise_frequency"] = data["exercise_frequency"].replace(EF_REPLACE_DICT).astype(float)
    filler_metric = "nanmean" if "mean" in DISTRIBUTION_METRIC else "nanmedian"
    if DISTRIBUTION_METRIC in data.columns:
        data[DISTRIBUTION_METRIC] = data[DISTRIBUTION_METRIC].fillna(data[filler_metric])
    # break
    print(f"Collected {data['subject_code'].nunique()} subjects for {m} measure.")
    data.to_csv(f"/media/storage/phd/athletes_brain/data/interim/{m}.csv")
    # df = collect_data(m_data, m)
    # break

Processing adc
Collected 2779 subjects for adc measure.


KeyboardInterrupt: 