In [1]:
# Stop warnings
import warnings
warnings.filterwarnings("ignore")

import os
import time
import numpy as np
import pandas as pd
import neuropythy as ny
import matplotlib.pyplot as plt

# personal import 
from math_utils import weighted_regression, weighted_nan_percentile, weighted_nan_median


In [2]:
def bootstrap_ci_median(data, n_boot=1000, ci=95, random_state=None):
    """
    Compute bootstrapped confidence interval of the median.

    Parameters
    ----------
    data : array-like (Pandas Series, DataFrame column, or NumPy array)
        Input data (will ignore NaNs).
    n_boot : int, optional
        Number of bootstrap samples (default is 1000).
    ci : float, optional
        Confidence level (default is 95 for 95% CI).
    random_state : int or None
        Random seed for reproducibility.

    Returns
    -------
    tuple of (lower_bound, upper_bound)
        Lower and upper bounds of the bootstrapped confidence interval.
    """
    if isinstance(data, pd.DataFrame):
        if data.shape[1] != 1:
            raise ValueError("DataFrame input must have exactly one column.")
        data = data.iloc[:, 0]

    data = pd.Series(data).dropna().values  # ensure 1D array, remove NaNs

    if len(data) == 0:
        return np.nan, np.nan

    rng = np.random.default_rng(seed=random_state)
    boot_medians = [np.median(rng.choice(data, size=len(data), replace=True)) for _ in range(n_boot)]

    lower = np.percentile(boot_medians, (100 - ci) / 2)
    upper = np.percentile(boot_medians, 100 - (100 - ci) / 2)
    return lower, upper

In [3]:
# Settings
num_ecc_pcm_bins = 10
max_ecc = 8
rois = ['V1', 'V2', 'V3']

In [4]:
# Get subject numbers
allsub = ny.data['hcp_lines'].subject_list

In [5]:
path = '/home/jovyan/projects/pRF-project_NH2025/data/tsv/group'
grouptsv = pd.read_csv('{}/group_prf_parameters_5mm.tsv'.format(path), sep='\t')


In [6]:
ecc_bins = np.concatenate(([0], np.logspace(np.log10(0.1), np.log10(max_ecc), num=num_ecc_pcm_bins)))

# ecc_bins = np.concatenate(([0], np.linspace(0.1, np.max_ecc, num=num_ecc_pcm_bins)))
start_time = time.time()
group_bin_df = pd.DataFrame()
for n_sub, subject_id in enumerate(allsub):
    for num_roi, roi in enumerate(rois):
        df_subject_roi = grouptsv.loc[(grouptsv['roi'] == roi) & (grouptsv['subject'] == subject_id)]
        df_bins = df_subject_roi.groupby(pd.cut(df_subject_roi['prf_ecc'], bins=ecc_bins))
        
        df_subject_roi_bin = pd.DataFrame()
        df_subject_roi_bin['roi'] = [roi]*num_ecc_pcm_bins
        df_subject_roi_bin['subject_id'] = [subject_id]*num_ecc_pcm_bins
        
        df_subject_roi_bin['num_bins'] = np.arange(num_ecc_pcm_bins)
        
        df_subject_roi_bin['prf_ecc_bins'] = df_bins.apply(lambda x: weighted_nan_median(x['prf_ecc'].values, x['prf_r2'].values)).values
        df_subject_roi_bin['prf_pcm_bins_median'] = df_bins.apply(lambda x: weighted_nan_median(x['pRF_CM'].values, x['prf_r2'].values)).values
        df_subject_roi_bin['prf_r2_bins_median'] = np.array(df_bins['prf_r2'].median())

    
        df_subject_roi_bin['prf_pcm_bins_ci_upper_bound'] = df_bins.apply(lambda x: weighted_nan_percentile(x['pRF_CM'].values, x['prf_r2'].values, 95)).values
        df_subject_roi_bin['prf_pcm_bins_ci_lower_bound'] = df_bins.apply(lambda x: weighted_nan_percentile(x['pRF_CM'].values, x['prf_r2'].values, 5)).values
        
        if num_roi == 0: df_ecc_pcm_bins = df_subject_roi_bin
        else: df_ecc_pcm_bins = pd.concat([df_ecc_pcm_bins, df_subject_roi_bin]) 
            
    group_bin_df = pd.concat([group_bin_df, df_ecc_pcm_bins])
end_time = time.time()
print("Execution time: {:.2f} seconds".format(end_time - start_time))

Execution time: 79.20 seconds


In [10]:
final_group_df = pd.DataFrame()

for n_roi, roi in enumerate(rois):
    final_group_df_roi = pd.DataFrame()
    df_roi = group_bin_df.loc[(group_bin_df['roi'] == roi)]
    group_subid_roi_df = df_roi.groupby(['num_bins'])
    
    final_group_df_roi['roi'] = [roi]*num_ecc_pcm_bins

    # median 
    final_group_df_roi['prf_ecc_bins'] = group_subid_roi_df.apply(lambda x: weighted_nan_median(x['prf_ecc_bins'].values, x['prf_r2_bins_median'].values)).values
    final_group_df_roi['prf_pcm_bins_median'] = group_subid_roi_df.apply(lambda x: weighted_nan_median(x['prf_pcm_bins_median'].values, x['prf_r2_bins_median'].values)).values
    final_group_df_roi['prf_r2_bins_median'] = np.array(group_subid_roi_df['prf_r2_bins_median'].median())

    # Ci 
    final_group_df_roi['prf_pcm_bins_median_upper_bound'] = group_subid_roi_df.apply(lambda x: weighted_nan_percentile(x['prf_pcm_bins_median'].values, x['prf_r2_bins_median'].values, 95)).values
    final_group_df_roi['prf_pcm_bins_median_lower_bound'] = group_subid_roi_df.apply(lambda x: weighted_nan_percentile(x['prf_pcm_bins_median'].values, x['prf_r2_bins_median'].values, 5)).values

    # Apply bootstrapped CI and extract both bounds in one go
    ci_results = group_subid_roi_df['prf_pcm_bins_median'].apply(
        lambda x: bootstrap_ci_median(x, n_boot=1000, ci=95)
    )
    
    # Assign lower and upper bounds to new columns
    final_group_df_roi['prf_pcm_bins_median_lower_bound_ci'] = ci_results.apply(lambda ci: ci[0])
    final_group_df_roi['prf_pcm_bins_median_upper_bound_ci'] = ci_results.apply(lambda ci: ci[1])
    
    final_group_df = pd.concat([final_group_df, final_group_df_roi])

final_group_df = final_group_df.reset_index().rename(columns={'index': 'num_bin'})
   

In [11]:
# Export DF 
tsv_dir = '/home/jovyan/projects/pRF-project_NH2025/data/tsv/group'
os.makedirs(tsv_dir, exist_ok=True)

tsv_fn = '{}/final_group_df.tsv'.format(tsv_dir)
tsv_fn2 = '{}/group_bin_df.tsv'.format(tsv_dir)
group_bin_df.to_csv(tsv_fn2, sep="\t", na_rep='NaN', index=False)
final_group_df.to_csv(tsv_fn, sep="\t", na_rep='NaN', index=False)