# Group Comparison analysis pRF propertis 

In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.colors as mcolors

In [3]:
# Main variables
MAIN_PATH = '/Volumes/FedericaCardillo/pre-processing/projects/PROJECT_EGRET-AAA/derivatives/CFM'
cumulative = os.path.join(MAIN_PATH, 'CDFs')
hemis = ['lh', 'rh']
atlas = 'benson'
denoising = 'nordic'
visual_areas = ['V2', 'V3']
restingstate_exclusions = ['sub-17', 'sub-18', 'sub-37', 'sub-43']
ecc_bins = [0, 1, 2, 3, 4, 5, 6, 7]
threshold = 0.3

# Subject groups
poag = [f'sub-{i:02}' for i in range(2, 21)]
hc = [f'sub-{i:02}' for i in range(21, 47)]
groups = {"HC": hc, "POAG": poag}

# CDF x-axis
x = np.linspace(0, 3, 500)

# Setup
visual_areas = ['V2', 'V3']
atlases = ['benson', 'manual']
# tasks = ['RET', 'RET2', 'RestingState']
hemispheres = ['lh', 'rh']
denoising = 'nordic'
source = 'V1'

# Tasks
tasks = {
    "RET":    {"label": "Task RET", "dir": "RET", "rest": False},
    "RET2":   {"label": "Task RET2", "dir": "RET2", "rest": False},
    "REST1":  {"label": "RestingState Run-1", "dir": "RestingState", "rest": True, "run": "run-1"},
    "REST2":  {"label": "RestingState Run-2", "dir": "RestingState", "rest": True, "run": "run-2"}}

# Mapping of task names to output labels
task_name_map = {
    "RET": "RET",
    "RET2": "RET2",
    "REST1": "REST_OPEN",
    "REST2": "REST_CLOSED"}

oct_values_combined = {
    'sub-02': 73, 'sub-04': 57, 'sub-05': 59, 'sub-06': 72, 'sub-07': 86.5,
    'sub-08': 71, 'sub-09': 65, 'sub-10': 64.5, 'sub-11': 59.5, 'sub-12': 56,
    'sub-13': 63, 'sub-14': 73.5, 'sub-15': 74.5, 'sub-16': 65.5, 'sub-17': 70.5,
    'sub-18': 71, 'sub-19': 55, 'sub-20': 64, 'sub-21': 72.5, 'sub-22': 89,
    'sub-23': 68, 'sub-25': 105, 'sub-26': 87, 'sub-27': 100.5, 'sub-28': 69,
    'sub-29': 82.5, 'sub-30': 100, 'sub-31': 89.5, 'sub-32': 80.5, 'sub-33': 97.5,
    'sub-34': 100.5, 'sub-35': 100, 'sub-36': 103, 'sub-37': 95.5, 'sub-38': 100,
    'sub-39': 104.5, 'sub-40': 101, 'sub-41': 101, 'sub-42': 102, 'sub-43': 110.5,
    'sub-44': 105, 'sub-45': 100.5, 'sub-46': 94.5}
hfa_values_combined = {
    'sub-02': -15.83, 'sub-04': -20.76, 'sub-05': -9.25, 'sub-06': 0.62, 'sub-07': -2.88,
    'sub-08': -6.1, 'sub-09': -4.35, 'sub-10': -11.54, 'sub-11': -2.08, 'sub-12': -8.68,
    'sub-13': -9.76, 'sub-14': -8.06, 'sub-15': -1.9, 'sub-16': -1.68, 'sub-17': -5.46,
    'sub-18': -12.41, 'sub-19': -7.78, 'sub-20': -15.24, 'sub-21': -1.94, 'sub-22': -0.91,
    'sub-23': -7.54, 'sub-25': -0.38, 'sub-26': -0.38, 'sub-27': 1.42, 'sub-28': 0.29,
    'sub-29': -0.14, 'sub-30': 0.19, 'sub-31': 0.37, 'sub-32': 1.64, 'sub-33': -0.12,
    'sub-34': -2.16, 'sub-35': 0.07, 'sub-36': 0.84, 'sub-37': -2.01, 'sub-38': -0.39,
    'sub-39': 0.5, 'sub-40': 0.9, 'sub-41': -1.01, 'sub-42': 1.17, 'sub-43': -1.08,
    'sub-44': 1.23, 'sub-45': -0.28, 'sub-46': 0.7}
hfa_values_besteye = {
    'sub-02': -1.12, 'sub-04': -18.43, 'sub-05': -7.5, 'sub-06': 0.95, 'sub-07': -2.1,
    'sub-08': -0.09, 'sub-09': -2.56, 'sub-10': -8.37, 'sub-11': -2.05, 'sub-12': -4.7,
    'sub-13': -4.24, 'sub-14': -3.52, 'sub-15': 0.52, 'sub-16': -1.1, 'sub-17': -4.56,
    'sub-18': -0.92, 'sub-19': -6.11, 'sub-20': -2.36, 'sub-21': 0.38, 'sub-22': 0.24,
    'sub-23': -2.67, 'sub-25': -0.11, 'sub-26': 0.71, 'sub-27': 1.75, 'sub-28': 0.6,
    'sub-29': 0.2, 'sub-30': 0.97, 'sub-31': 0.51, 'sub-32': 1.73, 'sub-33': 0.26,
    'sub-34': -1.58, 'sub-35': 0.45, 'sub-36': 1.25, 'sub-37': -0.87, 'sub-38': 0.84,
    'sub-39': 1.35, 'sub-40': 1.69, 'sub-41': -0.89, 'sub-42': 1.76, 'sub-43': -0.95,
    'sub-44': 1.91, 'sub-45': -0.22, 'sub-46': 0.97}
oct_values_besteye = {
    'sub-02': 101, 'sub-04': 58, 'sub-05': 64, 'sub-06': 73, 'sub-07': 91,
    'sub-08': 78, 'sub-09': 76, 'sub-10': 66, 'sub-11': 60, 'sub-12': 63,
    'sub-13': 66, 'sub-14': 75, 'sub-15': 86, 'sub-16': 71, 'sub-17': 72,
    'sub-18': 86, 'sub-19': 60, 'sub-20': 71, 'sub-21': 77, 'sub-22': 95,
    'sub-23': 75, 'sub-25': 108, 'sub-26': 89, 'sub-27': 101, 'sub-28': 93,
    'sub-29': 86, 'sub-30': 101, 'sub-31': 91, 'sub-32': 82, 'sub-33': 99,
    'sub-34': 101, 'sub-35': 104, 'sub-36': 112, 'sub-37': 105, 'sub-38': 101,
    'sub-39': 105, 'sub-40': 102, 'sub-41': 101, 'sub-42': 104, 'sub-43': 111,
    'sub-44': 108, 'sub-45': 103, 'sub-46': 96}
oct_values_worsteye = {
    'sub-02': 79, 'sub-04': 56, 'sub-05': 54, 'sub-06': 71, 'sub-07': 82,
    'sub-08': 64, 'sub-09': 54, 'sub-10': 63, 'sub-11': 59, 'sub-12': 49,
    'sub-13': 60, 'sub-14': 72, 'sub-15': 63, 'sub-16': 60, 'sub-17': 69,
    'sub-18': 56, 'sub-19': 50, 'sub-20': 57, 'sub-21': 68, 'sub-22': 83,
    'sub-23': 61, 'sub-25': 102, 'sub-26': 85, 'sub-27': 100, 'sub-28': 45,
    'sub-29': 79, 'sub-30': 99, 'sub-31': 88, 'sub-32': 79, 'sub-33': 96,
    'sub-34': 100, 'sub-35': 96, 'sub-36': 94, 'sub-37': 86, 'sub-38': 99,
    'sub-39': 104, 'sub-40': 100, 'sub-41': 101, 'sub-42': 100, 'sub-43': 110,
    'sub-44': 102, 'sub-45': 98, 'sub-46': 93}
hfa_values_worsteye = {
    'sub-02': -30.54, 'sub-04': -23.08, 'sub-05': -11, 'sub-06': 0.3, 'sub-07': -3.67,
    'sub-08': -12.11, 'sub-09': -6.14, 'sub-10': -14.72, 'sub-11': -2.12, 'sub-12': -12.66,
    'sub-13': -15.29, 'sub-14': -12.6, 'sub-15': -4.32, 'sub-16': -2.25, 'sub-17': -6.36,
    'sub-18': -23.9, 'sub-19': -9.45, 'sub-20': -28.11, 'sub-21': -4.25, 'sub-22': -2.06,
    'sub-23': -12.4, 'sub-25': -0.65, 'sub-26': -1.47, 'sub-27': 1.08, 'sub-28': -0.02,
    'sub-29': -0.48, 'sub-30': -0.59, 'sub-31': 0.23, 'sub-32': 1.55, 'sub-33': -0.49,
    'sub-34': -2.73, 'sub-35': -0.31, 'sub-36': 0.44, 'sub-37': -3.15, 'sub-38': -1.62,
    'sub-39': -0.36, 'sub-40': 0.12, 'sub-41': -1.12, 'sub-42': 0.58, 'sub-43': -1.21,
    'sub-44': 0.55, 'sub-45': -0.34, 'sub-46': 0.43}

binocular_advanced = {'sub-04': -23.08}
monocular_advanced = {'sub-02': -30.54, 'sub-04': -23.08, 'sub-08': -12.11, 'sub-10': -14.72, 'sub-12': -12.66, 'sub-13': -15.29, 'sub-14': -12.6, 'sub-18': -23.9, 'sub-20': -28.11}
#monocular_advanced = {'sub-02': -30.54, 'sub-04': -23.08, 'sub-18': -23.9, 'sub-20': -28.11}

# Eccentricity Ranges
eccentricity_ranges_all = {"0.5-6": (0.5, 6),"7-25": (10, 25)}
eccentricity_ranges_ret = {"0.5-6": (0.5, 6)}
norm = mcolors.Normalize(vmin=-21, vmax=2)

In [None]:
def create_output_dirs(base_dir):
    """Create output structure."""
    for task_key, task_label in task_name_map.items():
        if task_key in ["RET", "RET2"]:
            ecc_labels = ["0.5-6"]
        else:
            ecc_labels = ["0.5-6", "7-25"]
        for ecc_label in ecc_labels:
            for subdir in ['group_comparison', 'hfa_colored_subjects']:
                ecc_subdir = os.path.join(base_dir, task_label, ecc_label, subdir)
                os.makedirs(ecc_subdir, exist_ok=True)

def load_sigma(subj, visual_area, task_key, ecc_range=None, atlas="benson"):
    task_info = tasks[task_key]
    base_path = f"{MAIN_PATH}/{subj}/ses-02/{atlas}/{task_info['dir']}"
    sigma_vals = []

    for hemi in hemis:
        run_folder = f"{task_info['run']}/{denoising}" if task_info.get("rest") else denoising
        path = f"{base_path}/{run_folder}/GM/{hemi}/{visual_area}-V1/best_fits_prf.csv"
        if not os.path.exists(path):
            continue
        df = pd.read_csv(path)
        df = df.dropna(subset=["Best Sigma Finer", "Best Variance Explained Finer", "Source Eccentricity"])
        df = df[(df["Best Sigma Finer"] > 0.05) & (df["Best Variance Explained Finer"] > 0.3)]
        if ecc_range:
            df = df[(df["Source Eccentricity"] >= ecc_range[0]) & (df["Source Eccentricity"] <= ecc_range[1])]
        sigma_vals.extend(df["Best Sigma Finer"].values)
    return sigma_vals

def plot_group_comparison(visual_area, task_key, ecc_label, ecc_range, save_dir, current_atlas):
    """Plot group comparison median + CI."""
    plt.figure(figsize=(6,6))
    for group_name, subjects in groups.items():
        all_cdfs = []
        for s
        bj in subjects:
            if tasks[task_key].get("rest") and subj in restingstate_exclusions:
                continue
            sigmas = load_sigma(subj, visual_area, task_key, ecc_range, atlas)
            if sigmas:
                sorted_sigma = np.sort(sigmas)
                cdf = np.linspace(0, 1, len(sorted_sigma))
                print(x)
                grid_cdf = np.interp(x, sorted_sigma, cdf, left=0, right=1)
                all_cdfs.append(grid_cdf)

        if all_cdfs:
            all_cdfs = np.array(all_cdfs)
            median = np.median(all_cdfs, axis=0)
            p25 = np.percentile(all_cdfs, 25, axis=0)
            p75 = np.percentile(all_cdfs, 75, axis=0)
            color = 'red' if group_name == "POAG" else 'blue'
            plt.plot(x, median, color=color, linewidth=2.5, label=f"{group_name} (N={len(all_cdfs)})")
            plt.fill_between(x, p25, p75, color=color, alpha=0.2)

    plt.title(f"{visual_area} - {task_key} - {ecc_label}")
    plt.xlabel("Connective Field Sigma (mm)")
    plt.ylabel("Cumulative Probability")
    plt.grid(True)
    plt.xlim(0, 2)
    plt.legend()
    plt.savefig(os.path.join(save_dir, f"{visual_area}_{task_key}_{ecc_label}_group_comparison.jpg"), dpi=300)
    plt.close()

def plot_hfa_colored_with_all_controls(visual_area, task_key, ecc_label, ecc_range, save_dir, current_atlas):
    """Plot POAG subjects colored by HFA + all controls individually."""
    plt.figure(figsize=(6,6))
    for group_name, subjects in groups.items():
        color_map = cm.Reds if group_name == "POAG" else cm.Blues
        for subj in subjects:
            if tasks[task_key].get("rest") and subj in restingstate_exclusions:
                continue
            sigmas = load_sigma(subj, visual_area, task_key, ecc_range, atlas)
            if sigmas:
                if group_name == "POAG":
                    hfa_value = hfa_values_combined.get(subj, 0)
                    color = color_map(norm(hfa_value))
                    plt.plot(x, np.interp(x, np.sort(sigmas), np.linspace(0, 1, len(sigmas)), left=0, right=1),
                             linestyle='--', color=color, alpha=0.7)
                else:
                    plt.plot(x, np.interp(x, np.sort(sigmas), np.linspace(0, 1, len(sigmas)), left=0, right=1),
                             linestyle='--', color='lightblue', alpha=0.5)

    plt.title(f"{visual_area} - {task_key} - {ecc_label} (All controls)")
    plt.xlabel("Connective Field Sigma (mm)")
    plt.ylabel("Cumulative Probability")
    plt.grid(True)
    plt.xlim(0, 2)
    sm = plt.cm.ScalarMappable(cmap=cm.Reds, norm=norm)
    sm.set_array([])
    plt.colorbar(sm, label="HFA Value (Combined Eye)")
    plt.savefig(os.path.join(save_dir, f"{visual_area}_{task_key}_{ecc_label}_hfa_colored_allcontrols.jpg"), dpi=300)
    plt.close()

def plot_hfa_colored_with_median_controls_nooutliers(visual_area, task_key, ecc_label, ecc_range, save_dir, current_atlas):
    """Plot POAG subjects colored by HFA + only the control median and confidence interval."""
    plt.figure(figsize=(8,6))

    # First, compute HC median and CI
    hc_cdfs = []
    for subj in groups["HC"]:
        if tasks[task_key].get("rest") and subj in restingstate_exclusions:
            continue
        sigmas = load_sigma(subj, visual_area, task_key, ecc_range, atlas)
        if sigmas:
            sorted_sigma = np.sort(sigmas)
            cdf = np.linspace(0, 1, len(sorted_sigma))
            grid_cdf = np.interp(x, sorted_sigma, cdf, left=0, right=1)
            hc_cdfs.append(grid_cdf)

    if hc_cdfs:
        hc_cdfs = np.array(hc_cdfs)
        median_hc = np.median(hc_cdfs, axis=0)
        p25_hc = np.percentile(hc_cdfs, 25, axis=0)
        p75_hc = np.percentile(hc_cdfs, 75, axis=0)

        # Plot HC median and confidence interval
        plt.plot(x, median_hc, color='blue', linewidth=2.5, label=f"HC Median (N={len(hc_cdfs)})")
        plt.fill_between(x, p25_hc, p75_hc, color='blue', alpha=0.2)

    # Now plot individual POAG subjects
    for subj in groups["POAG"]:
        if tasks[task_key].get("rest") and subj in restingstate_exclusions:
            continue
        sigmas = load_sigma(subj, visual_area, task_key, ecc_range, atlas)
        if sigmas:
            hfa_value = hfa_values_combined.get(subj, 0)
            color = cm.Reds(norm(hfa_value))
            sorted_sigma = np.sort(sigmas)
            cdf = np.linspace(0, 1, len(sorted_sigma))
            grid_cdf = np.interp(x, sorted_sigma, cdf, left=0, right=1)
            plt.plot(x, grid_cdf, linestyle='--', color=color, alpha=0.7)

    plt.title(f"{visual_area} - {task_key} - {ecc_label} (HC Median only)")
    plt.xlabel("Connective Field Sigma (mm)")
    plt.ylabel("Cumulative Probability")
    plt.grid(True)
    plt.xlim(0, 2)
    sm = plt.cm.ScalarMappable(cmap=cm.Reds, norm=norm)
    sm.set_array([])
    plt.colorbar(sm, label="HFA Value (Combined Eye)")
    plt.legend()
    plt.savefig(os.path.join(save_dir, f"{visual_area}_{task_key}_{ecc_label}_hfa_colored_HCmedian.jpg"), dpi=300)
    plt.close()

def create_histogram_output_dirs(base_dir):
    """Create histogram output structure."""
    for task_key, task_label in task_name_map.items():
        if task_key in ["RET", "RET2"]:
            ecc_labels = ["0.5-6"]
        else:
            ecc_labels = ["0.5-6", "7-25"]
        for ecc_label in ecc_labels:
            hist_subdir = os.path.join(base_dir, task_label, ecc_label, 'histograms')
            os.makedirs(hist_subdir, exist_ok=True)

def plot_histograms(visual_area, task_key, ecc_label, ecc_range, save_dir, current_atlas):
    """Plot histograms for each subject (raw frequencies, not density)."""
    hist_dir = os.path.join(save_dir, 'histograms')
    os.makedirs(hist_dir, exist_ok=True)

    # Collect sigma values
    sigma_values = {group: [] for group in groups}
    included_subjects = {group: 0 for group in groups}
    
    for group_name, subjects in groups.items():
        for subj in subjects:
            if tasks[task_key].get("rest") and subj in restingstate_exclusions:
                continue
            sigmas = load_sigma(subj, visual_area, task_key, ecc_range, atlas)
            if sigmas:
                sigma_values[group_name].append((subj, np.array(sigmas)))
                included_subjects[group_name] += 1

    bins = np.linspace(0, 3, 30)  # Same bins for all plots

    ## ----- A) Plot: All subjects individually (POAG and HC) -----

    plt.figure(figsize=(6, 6))

    # Common bin centers
    centers = (bins[:-1] + bins[1:]) / 2

    for group_name in ["HC", "POAG"]:
        subj_histograms = []

        for subj, sigmas in sigma_values[group_name]:
            counts, _ = np.histogram(sigmas, bins=bins)
            subj_histograms.append(counts)

        if subj_histograms:
            hist_array = np.array(subj_histograms)
            median = np.median(hist_array, axis=0)
            p25 = np.percentile(hist_array, 25, axis=0)
            p75 = np.percentile(hist_array, 75, axis=0)

            color = 'blue' if group_name == "HC" else 'red'
            plt.plot(centers, median, color=color, linewidth=2.5,
                    label=f'{group_name} Median (N={included_subjects[group_name]})')
            plt.fill_between(centers, p25, p75, color=color, alpha=0.2)

    plt.title(f"{visual_area} - {task_key} - {ecc_label} Histogram (Group CIs)")
    plt.xlabel("Connective Field Sigma (mm)")
    plt.ylabel("Frequency")
    plt.grid(True)
    plt.legend()
    plt.xlim(0, 2)
    plt.savefig(os.path.join(hist_dir, f"{visual_area}_{task_key}_{ecc_label}_hist_groupCI.jpg"), dpi=300)
    plt.close()

    ## ----- B) Plot: POAG individually + HC group median/CI -----
    plt.figure(figsize=(6,6))

    # HC group median and CI
    if sigma_values["HC"]:
        histograms = []
        for subj, sigmas in sigma_values["HC"]:
            counts, _ = np.histogram(sigmas, bins=bins)
            histograms.append(counts)
        histograms = np.array(histograms)

        median_hc = np.median(histograms, axis=0)
        p25_hc = np.percentile(histograms, 25, axis=0)
        p75_hc = np.percentile(histograms, 75, axis=0)

        centers = (bins[:-1] + bins[1:]) / 2
        plt.plot(centers, median_hc, color='blue', linewidth=2.5, label=f'HC Median (N={included_subjects["HC"]})')
        plt.fill_between(centers, p25_hc, p75_hc, color='blue', alpha=0.2)

    # Plot POAG individually
    for subj, sigmas in sigma_values["POAG"]:
        hfa_value = hfa_values_combined.get(subj, 0)
        color = cm.Reds(norm(hfa_value))
        counts, _ = np.histogram(sigmas, bins=bins)
        centers = (bins[:-1] + bins[1:]) / 2
        plt.plot(centers, counts, linestyle='--', color=color, alpha=0.7)

    plt.title(f"{visual_area} - {task_key} - {ecc_label} Histogram (POAG + HC Median)")
    plt.xlabel("Connective Field Sigma (mm)")
    plt.ylabel("Frequency")
    plt.legend()
    plt.grid(True)
    plt.xlim(0, 2)
    plt.savefig(os.path.join(hist_dir, f"{visual_area}_{task_key}_{ecc_label}_hist_poag_hcmedian.jpg"), dpi=300)
    plt.close()

def load_eccentricity(subj, visual_area, task_key, ecc_range=None, atlas="benson"):
    task_info = tasks[task_key]
    base_path = f"{MAIN_PATH}/{subj}/ses-02/{atlas}/{task_info['dir']}"
    ecc_vals = []

    for hemi in hemis:
        run_folder = f"{task_info['run']}/{denoising}" if task_info.get("rest") else denoising
        path = f"{base_path}/{run_folder}/GM/{hemi}/{visual_area}-V1/best_fits_prf.csv"
        if not os.path.exists(path):
            continue
        df = pd.read_csv(path)
        df = df.dropna(subset=["Source Eccentricity", "Best Variance Explained Finer"])
        df = df[df["Best Variance Explained Finer"] > 0.3]
        if ecc_range:
            df = df[(df["Source Eccentricity"] >= ecc_range[0]) & (df["Source Eccentricity"] <= ecc_range[1])]
        ecc_vals.extend(df["Source Eccentricity"].values)
    return ecc_vals

def group_subject(hfa_value, subject_id):
    try:
        subject_number = int(subject_id.replace('sub-', '').strip())
    except ValueError:
        raise ValueError(f"Invalid subject ID format: {subject_id}")

    if 21 <= subject_number <= 46:
        return 'HC'
    elif -6 < hfa_value <= 0:
        return 'Early'
    elif -12 < hfa_value <= -6:
        return 'Moderate'
    else:
        return 'Severe'
    
def plot_group_comparison_eccentricity(visual_area, task_key, ecc_label, ecc_range, save_dir, current_atlas):
    """Plot group comparison for eccentricity."""
    plt.figure(figsize=(6, 6))
    if ecc_label == "0.5-6":
        x_ecc = np.linspace(0, 6, 500)
    elif ecc_label == "7-25":
        x_ecc = np.linspace(7, 25, 500)
    else:
        x_ecc = np.linspace(0, 25, 500)

    for group_name, subjects in groups.items():
        all_cdfs = []
        for subj in subjects:
            if tasks[task_key].get("rest") and subj in restingstate_exclusions:
                continue
            eccs = load_eccentricity(subj, visual_area, task_key, ecc_range, atlas)
            if eccs:
                sorted_ecc = np.sort(eccs)
                cdf = np.linspace(0, 1, len(sorted_ecc))
                grid_cdf = np.interp(x_ecc, sorted_ecc, cdf, left=0, right=1)
                all_cdfs.append(grid_cdf)

        if all_cdfs:
            all_cdfs = np.array(all_cdfs)
            median = np.median(all_cdfs, axis=0)
            p25 = np.percentile(all_cdfs, 25, axis=0)
            p75 = np.percentile(all_cdfs, 75, axis=0)
            color = 'red' if group_name == "POAG" else 'blue'
            plt.plot(x_ecc, median, color=color, linewidth=2.5, label=f"{group_name} (N={len(all_cdfs)})")
            plt.fill_between(x_ecc, p25, p75, color=color, alpha=0.2)

    plt.title(f"{visual_area} - {task_key} - {ecc_label} (Eccentricity)")
    plt.xlabel("Source Eccentricity (deg)")
    plt.ylabel("Cumulative Probability")
    plt.grid(True)
    plt.xlim(x_ecc[0], x_ecc[-1])
    plt.legend()
    plt.savefig(os.path.join(save_dir, f"{visual_area}_{task_key}_{ecc_label}_group_comparison_eccentricity.jpg"), dpi=300)
    plt.close()

def plot_hfa_colored_with_median_controls_eccentricity(visual_area, task_key, ecc_label, ecc_range, save_dir, current_atlas):
    """Plot POAG subjects colored by HFA + HC median/CI (eccentricity)."""
    plt.figure(figsize=(8,6))
    x_ecc = np.linspace(0, 6, 500) if ecc_label == "0.5-6" else np.linspace(7, 25, 500)

    hc_cdfs = []
    for subj in groups["HC"]:
        if tasks[task_key].get("rest") and subj in restingstate_exclusions:
            continue
        eccs = load_eccentricity(subj, visual_area, task_key, ecc_range, atlas)
        if eccs:
            sorted_ecc = np.sort(eccs)
            cdf = np.linspace(0, 1, len(sorted_ecc))
            grid_cdf = np.interp(x_ecc, sorted_ecc, cdf, left=0, right=1)
            hc_cdfs.append(grid_cdf)

    if hc_cdfs:
        hc_cdfs = np.array(hc_cdfs)
        median_hc = np.median(hc_cdfs, axis=0)
        p25_hc = np.percentile(hc_cdfs, 25, axis=0)
        p75_hc = np.percentile(hc_cdfs, 75, axis=0)

        plt.plot(x_ecc, median_hc, color='blue', linewidth=2.5, label=f"HC Median (N={len(hc_cdfs)})")
        plt.fill_between(x_ecc, p25_hc, p75_hc, color='blue', alpha=0.2)

    for subj in groups["POAG"]:
        if tasks[task_key].get("rest") and subj in restingstate_exclusions:
            continue
        eccs = load_eccentricity(subj, visual_area, task_key, ecc_range, atlas)
        if eccs:
            hfa_value = hfa_values_combined.get(subj, 0)
            color = cm.Reds(norm(hfa_value))
            sorted_ecc = np.sort(eccs)
            cdf = np.linspace(0, 1, len(sorted_ecc))
            grid_cdf = np.interp(x_ecc, sorted_ecc, cdf, left=0, right=1)
            plt.plot(x_ecc, grid_cdf, linestyle='--', color=color, alpha=0.7)

    plt.title(f"{visual_area} - {task_key} - {ecc_label} (Eccentricity, HC Median only)")
    plt.xlabel("Source Eccentricity (deg)")
    plt.ylabel("Cumulative Probability")
    plt.grid(True)
    plt.xlim(x_ecc[0], x_ecc[-1])
    sm = plt.cm.ScalarMappable(cmap=cm.Reds, norm=norm)
    sm.set_array([])
    plt.colorbar(sm, label="HFA Value (Combined Eye)")
    plt.legend()
    plt.savefig(os.path.join(save_dir, f"{visual_area}_{task_key}_{ecc_label}_hfa_colored_HCmedian_eccentricity.jpg"), dpi=300)
    plt.close()

def plot_hfa_colored_with_all_controls_eccentricity(visual_area, task_key, ecc_label, ecc_range, save_dir, current_atlas):
    """Plot POAG subjects colored by HFA + all controls (eccentricity)."""
    plt.figure(figsize=(6,6))
    x_ecc = np.linspace(0, 6, 500) if ecc_label == "0.5-6" else np.linspace(7, 25, 500)

    for group_name, subjects in groups.items():
        color_map = cm.Reds if group_name == "POAG" else cm.Blues
        for subj in subjects:
            if tasks[task_key].get("rest") and subj in restingstate_exclusions:
                continue
            eccs = load_eccentricity(subj, visual_area, task_key, ecc_range, atlas)
            if eccs:
                sorted_ecc = np.sort(eccs)
                cdf = np.linspace(0, 1, len(sorted_ecc))
                grid_cdf = np.interp(x_ecc, sorted_ecc, cdf, left=0, right=1)
                if group_name == "POAG":
                    hfa_value = hfa_values_combined.get(subj, 0)
                    color = color_map(norm(hfa_value))
                    plt.plot(x_ecc, grid_cdf, linestyle='--', color=color, alpha=0.7)
                else:
                    plt.plot(x_ecc, grid_cdf, linestyle='--', color='lightblue', alpha=0.5)

    plt.title(f"{visual_area} - {task_key} - {ecc_label} (Eccentricity, All controls)")
    plt.xlabel("Source Eccentricity (deg)")
    plt.ylabel("Cumulative Probability")
    plt.grid(True)
    plt.xlim(x_ecc[0], x_ecc[-1])
    sm = plt.cm.ScalarMappable(cmap=cm.Reds, norm=norm)
    sm.set_array([])
    plt.colorbar(sm, label="HFA Value (Combined Eye)")
    plt.savefig(os.path.join(save_dir, f"{visual_area}_{task_key}_{ecc_label}_hfa_colored_allcontrols_eccentricity.jpg"), dpi=300)
    plt.close()

def plot_histograms_eccentricity(visual_area, task_key, ecc_label, ecc_range, save_dir, current_atlas):
    """Plot histograms for eccentricity."""
    hist_dir = os.path.join(save_dir, 'histograms_eccentricity')
    os.makedirs(hist_dir, exist_ok=True)

    ecc_values = {group: [] for group in groups}
    included_subjects = {group: 0 for group in groups}

    # Define bins according to eccentricity range
    if ecc_label == "0.5-6":
        bins = np.linspace(0, 6, 30)
    elif ecc_label == "7-25":
        bins = np.linspace(7, 25, 30)
    else:
        bins = np.linspace(0, 25, 30)
    centers = (bins[:-1] + bins[1:]) / 2

    for group_name, subjects in groups.items():
        for subj in subjects:
            if tasks[task_key].get("rest") and subj in restingstate_exclusions:
                continue
            eccs = load_eccentricity(subj, visual_area, task_key, ecc_range, atlas)
            if eccs:
                ecc_values[group_name].append((subj, np.array(eccs)))
                included_subjects[group_name] += 1

    # A) Group-level histograms with confidence intervals
    plt.figure(figsize=(6, 6))
    for group_name in ["HC", "POAG"]:
        subj_histograms = []
        for subj, eccs in ecc_values[group_name]:
            counts, _ = np.histogram(eccs, bins=bins)
            subj_histograms.append(counts)

        if subj_histograms:
            hist_array = np.array(subj_histograms)
            median = np.median(hist_array, axis=0)
            p25 = np.percentile(hist_array, 25, axis=0)
            p75 = np.percentile(hist_array, 75, axis=0)
            color = 'blue' if group_name == "HC" else 'red'
            plt.plot(centers, median, color=color, linewidth=2.5, label=f'{group_name} Median (N={included_subjects[group_name]})')
            plt.fill_between(centers, p25, p75, color=color, alpha=0.2)

    plt.title(f"{visual_area} - {task_key} - {ecc_label} Histogram (Eccentricity, Group CIs)")
    plt.xlabel("Source Eccentricity (deg)")
    plt.ylabel("Frequency")
    plt.grid(True)
    plt.legend()
    plt.xlim(bins[0], bins[-1])
    plt.savefig(os.path.join(hist_dir, f"{visual_area}_{task_key}_{ecc_label}_hist_groupCI_eccentricity.jpg"), dpi=300)
    plt.close()

    # B) POAG individuals + HC group median
    plt.figure(figsize=(6, 6))
    if ecc_values["HC"]:
        histograms = []
        for subj, eccs in ecc_values["HC"]:
            counts, _ = np.histogram(eccs, bins=bins)
            histograms.append(counts)
        histograms = np.array(histograms)
        median_hc = np.median(histograms, axis=0)
        p25_hc = np.percentile(histograms, 25, axis=0)
        p75_hc = np.percentile(histograms, 75, axis=0)
        plt.plot(centers, median_hc, color='blue', linewidth=2.5, label=f'HC Median (N={included_subjects["HC"]})')
        plt.fill_between(centers, p25_hc, p75_hc, color='blue', alpha=0.2)

    for subj, eccs in ecc_values["POAG"]:
        hfa_value = hfa_values_combined.get(subj, 0)
        color = cm.Reds(norm(hfa_value))
        counts, _ = np.histogram(eccs, bins=bins)
        plt.plot(centers, counts, linestyle='--', color=color, alpha=0.7)

    plt.title(f"{visual_area} - {task_key} - {ecc_label} Histogram (Eccentricity, POAG + HC)")
    plt.xlabel("Source Eccentricity (deg)")
    plt.ylabel("Frequency")
    plt.legend()
    plt.grid(True)
    plt.xlim(bins[0], bins[-1])
    plt.savefig(os.path.join(hist_dir, f"{visual_area}_{task_key}_{ecc_label}_hist_poag_hcmedian_eccentricity.jpg"), dpi=300)
    plt.close()

def plot_hfa_colored_with_median_controls(visual_area, task_key, ecc_label, ecc_range, save_dir, current_atlas):
    """Plot POAG subjects colored by HFA + HC median/CI. Highlight binocular/monocular advanced in specific colors."""
    plt.figure(figsize=(8,6))

    # 1. Compute HC median and CI
    hc_cdfs = []
    for subj in groups["HC"]:
        if tasks[task_key].get("rest") and subj in restingstate_exclusions:
            continue
        sigmas = load_sigma(subj, visual_area, task_key, ecc_range, atlas)
        if sigmas:
            sorted_sigma = np.sort(sigmas)
            cdf = np.linspace(0, 1, len(sorted_sigma))
            grid_cdf = np.interp(x, sorted_sigma, cdf, left=0, right=1)
            hc_cdfs.append(grid_cdf)

    if hc_cdfs:
        hc_cdfs = np.array(hc_cdfs)
        median_hc = np.median(hc_cdfs, axis=0)
        p25_hc = np.percentile(hc_cdfs, 25, axis=0)
        p75_hc = np.percentile(hc_cdfs, 75, axis=0)

        plt.plot(x, median_hc, color='blue', linewidth=2.5, label=f"HC Median (N={len(hc_cdfs)})")
        plt.fill_between(x, p25_hc, p75_hc, color='blue', alpha=0.2)

    # 2. Plot POAG individuals, coloring special cases if RET or RET2
    for subj in groups["POAG"]:
        if tasks[task_key].get("rest") and subj in restingstate_exclusions:
            continue
        sigmas = load_sigma(subj, visual_area, task_key, ecc_range, atlas)
        if not sigmas:
            continue

        hfa_value = hfa_values_combined.get(subj, 0)
        sorted_sigma = np.sort(sigmas)
        cdf = np.linspace(0, 1, len(sorted_sigma))
        grid_cdf = np.interp(x, sorted_sigma, cdf, left=0, right=1)

        # Special plotting logic
        if task_key == "RET" and subj in binocular_advanced:
            plt.plot(x, grid_cdf, linestyle='-', color='green', linewidth=2.5,
                     label=f"{subj} = {binocular_advanced[subj]} MD")
        elif task_key == "RET2" and subj in monocular_advanced:
            plt.plot(x, grid_cdf, linestyle='-', color='green', linewidth=2.5,
                     label=f"{subj} = {monocular_advanced[subj]} MD")
        else:
            color = cm.Reds(norm(hfa_value))
            plt.plot(x, grid_cdf, linestyle='--', color=color, alpha=0.7)

    # Final touches
    plt.title(f"Connective Field size for {visual_area} to V1")
    plt.xlabel("Connective Field Sigma (mm)")
    plt.ylabel("Cumulative Probability)")
    plt.grid(True)
    plt.xlim(0, 2)

    sm = plt.cm.ScalarMappable(cmap=cm.Reds, norm=norm)
    sm.set_array([])
    plt.colorbar(sm, label="HFA Value (Most Affected Eye)")
    plt.legend()
    plt.savefig(os.path.join(save_dir, f"{visual_area}_{task_key}_{ecc_label}_hfa_colored_HCmedian.jpg"), dpi=300)
    plt.close()

In [None]:
# Main Script
# Step 1: Create output directory structure
create_output_dirs(cumulative)

# Step 2: Loop through tasks, ecc ranges and generate plots
for visual_area in visual_areas:

    for task_key, task_info in tasks.items():
        task_label = task_name_map[task_key]
        if task_key in ["RET", "RET2"]:
            ecc_ranges = eccentricity_ranges_ret
        else:
            ecc_ranges = eccentricity_ranges_all
        for ecc_label, ecc_range in ecc_ranges.items():
            # Determine atlas usage
            if ecc_label == "0.5-6":
                atlases = ["manual"]
                if task_key in ["REST1", "REST2"]:
                    atlases.append("benson")  # For resting state, do both
            else:
                atlases = ["benson"]

            for current_atlas in atlases:
                base_dir = os.path.join(cumulative, task_label, ecc_label, current_atlas)
                group_comp_dir = os.path.join(base_dir, 'group_comparison')
                hfa_colored_dir = os.path.join(base_dir, 'hfa_colored_subjects')

                # Ensure directories
                os.makedirs(group_comp_dir, exist_ok=True)
                os.makedirs(hfa_colored_dir, exist_ok=True)

                # 1) Group comparison plot
                plot_group_comparison(visual_area, task_key, ecc_label, ecc_range, group_comp_dir, current_atlas)
                plot_group_comparison_eccentricity(visual_area, task_key, ecc_label, ecc_range, group_comp_dir, current_atlas)

                # 2) Subject plots
                plot_hfa_colored_with_all_controls(visual_area, task_key, ecc_label, ecc_range, hfa_colored_dir, current_atlas)
                plot_hfa_colored_with_all_controls_eccentricity(visual_area, task_key, ecc_label, ecc_range, hfa_colored_dir, current_atlas)
                plot_hfa_colored_with_median_controls(visual_area, task_key, ecc_label, ecc_range, hfa_colored_dir, current_atlas)
                plot_hfa_colored_with_median_controls_eccentricity(visual_area, task_key, ecc_label, ecc_range, hfa_colored_dir, current_atlas)

                # 3) Histogram plots
                #plot_histograms(visual_area, task_key, ecc_label, ecc_range, base_dir, current_atlas)
                #plot_histograms_eccentricity(visual_area, task_key, ecc_label, ecc_range, base_dir, current_atlas)

  plt.colorbar(sm, label="HFA Value (Combined Eye)")
  plt.colorbar(sm, label="HFA Value (Combined Eye)")
  plt.colorbar(sm, label="HFA Value (Most Affected Eye)")
  plt.colorbar(sm, label="HFA Value (Combined Eye)")
  plt.colorbar(sm, label="HFA Value (Combined Eye)")
  plt.colorbar(sm, label="HFA Value (Combined Eye)")
  plt.colorbar(sm, label="HFA Value (Most Affected Eye)")
  plt.colorbar(sm, label="HFA Value (Combined Eye)")
No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
  plt.colorbar(sm, label="HFA Value (Combined Eye)")
  plt.colorbar(sm, label="HFA Value (Combined Eye)")
  plt.colorbar(sm, label="HFA Value (Most Affected Eye)")
No artists with labels found to put in legend.  Note that artists whose label start with

In [None]:
def plot_group_comparison(visual_area, task, save_dir, atlas):
    """Plot group comparison median + CI."""
    plt.figure(figsize=(6,6))
    for group_name, subjects in groups.items():
        all_cdfs = []
        for subj in subjects:
            sigmas = load_sigma(subj, visual_area, task_key, ecc_range, atlas)
            # np.sort sort sigma values 
            sorted_sigma = np.sort(sigmas)
            # np.linspace creates a linear CDF from 0 to 1 for the sorted values
            cdf = np.linspace(0, 1, len(sorted_sigma))
            # np.interp basically connects the dots with straight lines over a common grid x so that all subjects’ can be averaged
            grid_cdf = np.interp(x, sorted_sigma, cdf, left=0, right=1)
            all_cdfs.append(grid_cdf)

        if all_cdfs:
            all_cdfs = np.array(all_cdfs)
            # Compute median and 25–75% confidence intervals
            median = np.median(all_cdfs, axis=0)
            p25 = np.percentile(all_cdfs, 25, axis=0)
            p75 = np.percentile(all_cdfs, 75, axis=0)
            color = 'red' if group_name == "POAG" else 'blue'
            plt.plot(x, median, color=color, linewidth=2.5, label=f"{group_name} (N={len(all_cdfs)})")
            plt.fill_between(x, p25, p75, color=color, alpha=0.2)

    plt.title(f"{visual_area} - {task}")
    plt.xlabel("Connective Field Sigma (mm)")
    plt.ylabel("Cumulative Probability")
    plt.grid(True)
    plt.xlim(0, 2)
    plt.legend()
    plt.savefig(os.path.join(save_dir, f"{visual_area}_{task}_group_comparison.jpg"))
    plt.close()

visual_area = 'V2'
task = 'RET'
atlas = 'benson'
save_dir = '/Volumes/FedericaCardillo/pre-processing/projects/PROJECT_EGRET-AAA/derivatives/CFM'
plot_group_comparison(visual_area, task, save_dir, atlas)

NameError: name 'plt' is not defined

In [26]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt

# Define eccentricity bins
ecc_bins = [0, 1, 2, 3, 4, 5, 6, 7]
threshold = 0.3

# Assuming hfa_values_combined and group_subject are defined elsewhere
subject_groups = {subj: group_subject(hfa, subj) for subj, hfa in hfa_values_combined.items()}# Setup

visual_areas = ['V2', 'V3']
atlases = ['manual', 'benson']
tasks = ['RET', 'RET2', 'RestingState']
hemispheres = ['lh', 'rh']
denoising = 'nordic'
source = 'V1'

# Output path
MAIN_PATH = '/Volumes/FedericaCardillo/pre-processing/projects/PROJECT_EGRET-AAA/derivatives'
save_dir = f'{MAIN_PATH}/analysis_results/plots/'
os.makedirs(save_dir, exist_ok=True)

# Group name mapping
group_labels = {
    'HC': 'Healthy Controls',
    'Early': 'Glaucoma Early',
    'Moderate': 'Glaucoma Moderate',
    'Severe': 'Glaucoma Severe'
}

# Count subjects per group
group_subject_counts = {
    'HC': sum(1 for g in subject_groups.values() if g == 'HC'),
    'Early': sum(1 for g in subject_groups.values() if g == 'Early'),
    'Moderate': sum(1 for g in subject_groups.values() if g == 'Moderate'),
    'Severe': sum(1 for g in subject_groups.values() if g == 'Severe')
}

# Main loop
for atlas in atlases:
    for task in tasks:
        # Skip Benson atlas for RET and RET2
        if task in ['RET', 'RET2'] and atlas == 'benson':
            continue

        runs = [''] if task != 'RestingState' else ['run-1', 'run-2']

        for run in runs:
            for visual_area in visual_areas:
                fig, ax = plt.subplots(figsize=(6, 6))
                group_counts_table = []

                for group in ['HC', 'Early', 'Moderate', 'Severe']:
                    percent_below_all_bins = []
                    group_total = 0
                    group_below = 0

                    for bin_start, bin_end in zip(ecc_bins[:-1], ecc_bins[1:]):
                        n_total = 0
                        n_below = 0

                        for subj, group_label in subject_groups.items():
                            if group_label != group:
                                continue

                            for hemi in hemispheres:
                                path = f"{MAIN_PATH}/CFM/{subj}/ses-02/{atlas}/{task}/{run}/{denoising}/GM/{hemi}/{visual_area}-{source}/best_fits_prf.csv"
                                if not os.path.exists(path):
                                    continue
                                try:
                                    data = pd.read_csv(path)
                                except Exception as e:
                                    print(f"Error reading {path}: {e}")
                                    continue

                                mask = (data['Source Eccentricity'] >= bin_start) & (data['Source Eccentricity'] < bin_end)
                                n_bin_total = mask.sum()
                                n_bin_below = ((data['Best Variance Explained Finer'] < threshold) & mask).sum()
                                n_total += n_bin_total
                                n_below += n_bin_below

                        group_total += n_total
                        group_below += n_below

                        percent_below = (n_below / n_total) * 100 if n_total > 0 else np.nan
                        percent_below_all_bins.append(percent_below)

                    if not all(np.isnan(percent_below_all_bins)):
                        # Handle name switch for RET2
                        label = 'HC sim' if group == 'HC' and task == 'RET2' else group_labels[group]
                        ax.plot(ecc_bins[:-1], percent_below_all_bins, label=label)

                        # Add to summary table
                        n_subjects = group_subject_counts[group]
                        group_counts_table.append({
                            'Task': task,
                            'Run': run if run else 'single',
                            'Atlas': atlas,
                            'Visual Area': visual_area,
                            'Group': label,
                            'N Subjects': n_subjects,
                            'Total Vertices': group_total,
                            'Below Threshold': group_below
                        })

                ax.set_xlabel('Eccentricity (deg)')
                ax.set_ylabel('% vertices below VE threshold (0.3)')
                ax.set_title(f'{visual_area} - {atlas} - {task} {run if run else ""}')
                ax.set_ylim(0, 50)
                ax.legend()
                ax.grid(True)
                plt.tight_layout()

                # Save plot
                plot_filename = f'{visual_area}_{atlas}_{task}_{run if run else "single"}.png'
                plt.savefig(os.path.join(save_dir, plot_filename))
                plt.close()

                # Save group counts table
                df_table = pd.DataFrame(group_counts_table)
                table_filename = f'counts_table_{visual_area}_{atlas}_{task}_{run if run else "single"}.csv'
                df_table.to_csv(os.path.join(save_dir, table_filename), index=False)


No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
