## Setup

### Initial Imports

In [None]:
import os
import pandas as pd
import numpy as np
import sys
from pathlib import Path

# Ensure the parent directory is in the system path for module imports
sys.path.append(str(Path.cwd().parent))

from dataclasses import dataclass
from typing import List, Optional

from pytest import param
from zmq import has

### Plot Setup

In [None]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import matplotlib.pyplot as plt
import matplotx

# plt.style.use('science')  # Use ggplot style for all plots
plt.rcParams['figure.figsize'] = (10, 6)  # Default figure size
plt.rcParams['figure.dpi'] = 300  # Default figure dpi
plt.rcParams['font.size'] = 12  # Default font size
plt.rcParams['lines.linewidth'] = 2  # Default line width
plt.rcParams['axes.labelsize'] = 14  # Default label size
plt.rcParams['axes.titlesize'] = 16  # Default title size
plt.rcParams['xtick.labelsize'] = 12  # Default x-tick label size
plt.rcParams['ytick.labelsize'] = 12  # Default y-tick label size
plt.rcParams['legend.fontsize'] = 12  # Default legend font size
plt.rcParams['figure.titlesize'] = 18  # Default figure title size

## Gathering subjects' data

### Data Structures

data structure to store subject data;

In [None]:
from dataclasses import dataclass
import pandas as pd
import numpy as np

@dataclass
class SubjectData:
    name: str = None
    pid: str = None
    nb: int = None
    session: str = None

    width_nas: float = None
    width_tem: float = None
    width_inf: float = None
    width_sup: float = None
    max_slope_nas: float = None
    max_slope_tem: float = None
    max_slope_inf: float = None
    max_slope_sup: float = None

    oct_bump_X: float = None
    oct_bump_Y: float = None
    oct_width_X: float = None
    oct_width_Y: float = None
    oct_max_slope: float = None
    oct_depth: float = None
    oct_flatness: float = None

    age: float = None
    axial_length: float = None
    spherical_equiv: float = None
    sex: int = None

    eccs: np.ndarray = None
    density_X: pd.Series = None
    density_Y: pd.Series = None
    density_fit_X: pd.Series = None
    density_fit_Y: pd.Series = None

    cvi_X: pd.Series = None
    cvi_Y: pd.Series = None
    gcl_ipl_X: pd.Series = None
    gcl_ipl_Y: pd.Series = None
    onl_X: pd.Series = None
    onl_Y: pd.Series = None
    inl_opl_X: pd.Series = None
    inl_opl_Y: pd.Series = None
    rnfl_X: pd.Series = None
    rnfl_Y: pd.Series = None
    chrd_X: pd.Series = None
    chrd_Y: pd.Series = None
    pr_rpe_X: pd.Series = None
    pr_rpe_Y: pd.Series = None
    os_X: pd.Series = None
    os_Y: pd.Series = None

    nb_cones: float = None
    nb_cones_fit: float = None

    width_gcl_X: float = None
    width_gcl_Y: float = None
    min_thick_gcl: float = None



@dataclass
class FoveaParams:
    """Class for storing fovea 3D fitted parameters."""
    # Patient information
    subject: str
    patient_id: str
    subject_folder: str
    trial_name: str
    age: Optional[int] = None

    # Fitted parameters
    A00: Optional[float] = None
    A10: Optional[float] = None
    A01: Optional[float] = None
    A20: Optional[float] = None
    A02: Optional[float] = None
    A11: Optional[float] = None
    foveal_depth: Optional[float] = None
    foveal_center_X: Optional[float] = None
    foveal_width_X: Optional[float] = None
    foveal_center_Y: Optional[float] = None
    foveal_width_Y: Optional[float] = None
    foveal_max_slope: Optional[float] = None
    foveal_flatness: Optional[float] = None
    foveal_volume: Optional[float] = None



In [None]:

# here to avoid having to rerun the pipeline for -
# all subjects everytime i want to test something on the model.
 
# Since the list of subjects is ordered by strings , it goes from 10 to 100 to 103 etc...
# which requires a bit of work to get the first 5 subjects

#It will later be used to extract the first 5 subjects from the list of subject_data

take_first_five = False
first_five_subjects = ["Subject10","Subject100","Subject101","Subject104","Subject105"]

### Function definitions

#### Foveal Data Extraction

In [None]:
def extract_fovea_data(base_path: str) -> List[FoveaParams]:
    """
    Extract fovea parameters from CSV files with known structure.
    
    Args:
        base_path: Path to the base directory containing subject folders
        (subjfolder/trialfolder/layer_new/fovea_3d_fitted_params.csv)
    
    Returns:
        List of FoveaParams objects, one for each found CSV file
    """
    fovea_data = []
    
    # Get only the subject directories (directories starting with "Subject")
    try:
        subject_dirs = [d for d in os.listdir(base_path) 
                      if os.path.isdir(os.path.join(base_path, d)) and d.startswith("Subject")]
    except FileNotFoundError:
        print(f"Base path not found: {base_path}")
        return []
    
    # For each subject directory, get the session directories
    for subject_dir in subject_dirs:
        subject_path = os.path.join(base_path, subject_dir)
        
        # Extract subject number
        import re
        subject_match = re.search(r'Subject(\d+)', subject_dir)
        if not subject_match:
            continue
            
        subject_num = subject_match.group(1)
        patient_id = f"{subject_num}"
        
        try:
            # Get session directories (directories starting with "Session")
            session_dirs = [d for d in os.listdir(subject_path) 
                          if os.path.isdir(os.path.join(subject_path, d)) and d.startswith("Session")]
        except FileNotFoundError:
            continue
        
        # For each session directory, check if the CSV file exists
        for session_dir in session_dirs:
            session_path = os.path.join(subject_path, session_dir)
            csv_path = os.path.join(session_path, "layer_new", "fovea_3d_fitted_params.csv")
            
            # Check if the CSV file exists
            if os.path.isfile(csv_path):
                # try:
                # Read CSV file
                df = pd.read_csv(csv_path, sep=';', header=None, names=['param', 'value'])
                
                # Create basic FoveaParams object with patient info
                fovea_obj = FoveaParams(
                    patient_id=str(patient_id),
                    subject=f"Subject{patient_id}",
                    subject_folder=subject_dir,
                    trial_name=session_dir
                )
                print(f"Processing file: {csv_path} for patient {patient_id}")
                
                # Fill in parameter values
                for _, row in df.iterrows():
                    param_name = row['param']
                    param_value = row['value']
                    print(f"Processing {param_name} with value {param_value} for patient {patient_id}")
                    
                    if param_value == "params":
                        print(f"Skipping parameter {param_name} for patient {patient_id} as it is 'params'")
                        continue
                    
                    # Check if this parameter exists in our class
                    if hasattr(fovea_obj, param_name) or hasattr(fovea_obj, f"foveal_{param_name}"):
                        print(f"Setting {param_name} for patient {patient_id}")
                        try:
                            # Convert to float and set attribute
                            print(f"Trying to set {param_name} for patient {patient_id}")
                            setattr(fovea_obj, param_name, float(param_value))
                            print(f"Successfully set {param_name} for patient {patient_id}")
                        except:
                            
                            print(f"Error setting {param_name} for patient {patient_id}: {param_value}")

                    if hasattr(fovea_obj, f"foveal_{param_name}"):
                        try:
                            print(f"Trying to set foveal_{param_name} for patient {patient_id}")
                            setattr(fovea_obj, f"foveal_{param_name}", float(param_value))

                        except:
                            print(f"Skipping parameter {param_name} for patient {patient_id} ")

                            pass
                    
                fovea_data.append(fovea_obj)
                # except Exception as e:
                    # print(f"Error processing file {csv_path}: {str(e)}")
    
    return fovea_data

def save_to_dataframe(fovea_data: List[FoveaParams], output_file: str = "fovea_parameters.csv") -> pd.DataFrame:
    """
    Convert list of FoveaParams objects to a pandas DataFrame and save to CSV.
    
    Args:
        fovea_data: List of FoveaParams objects
        output_file: Path to save the CSV file
        
    Returns:
        DataFrame containing all fovea data
    """
    # Convert to list of dictionaries
    data_dicts = [vars(f) for f in fovea_data]
    
    # Create DataFrame
    df = pd.DataFrame(data_dicts)
    
    # Save to CSV
    df.to_csv(output_file, index=False)
    
    return df



## Loading data

### Imports


In [None]:
from pathlib import Path
from typing import List, Tuple, Dict




from src.cell.analysis.constants import MM_PER_DEGREE
from src.cell.layer.helpers import gaussian_filter_nan
from src.configs.parser import Parser

### Loading

In [None]:
Parser.initialize()

subjects_sessions = [[int(n) for n in s.strip().split()] for s in open('../src/processed.txt').readlines()] 


try:
    sheet = pd.ExcelFile(r'V:\Studies\AOSLO\data\cohorts\AOSLO healthy\DATA_HC+DM.xlsx').parse('Healthy', header=0, nrows=45, index_col=0)
    sheet.index = sheet.index.map(lambda x: f'Subject{x}')
    age_dict = ((sheet['Date of visit'] - sheet['DDN']).dt.days / 365).to_dict()
    axial_dict = sheet['AL D (mm)'].where(sheet['Laterality'] == 'OD', sheet['AL G (mm)']).to_dict()
    spherical_dict = sheet['Equi Sph D'].where(sheet['Laterality'] == 'OD', sheet['Equi Sph G']).to_dict()
    sex_dict = sheet['Sexe'].map(lambda x: 1 if x == 'F' else 0).to_dict()
except:
    # if the excel file is not found, use a hardcoded dictionary
    age_dict = {}
base_path = Path(r'P:\AOSLO\_automation\_PROCESSED\Photoreceptors\Healthy\_Results')

# look-up table for subject and session numbers


# subject for which OCTs are tilted (white dot is not well aligned with PR+RPE peak)
# see explanation in `PRxRLT_expmanual.ipynb`
oct_to_exclude = {
    13, 18, 20, 25, 26, 30, 35, 42, 46, 66, 100, 105,
} 


subjects_data: List[SubjectData] = []
for subject_n, session_n in subjects_sessions:
    if subject_n in oct_to_exclude:
        continue

    sd = SubjectData()
    sd.name = f'Subject{subject_n}'
    sd.pid = f'AOHC_{subject_n}'
    sd.nb = subject_n
    sd.session = f'Session{session_n}'

    #
    path = base_path / sd.name / sd.session
    print(f'Loading {sd.name} {sd.session}...')

    # record subject's metadata from the excel sheet
    sd.age = age_dict[sd.name]
    sd.axial_length = axial_dict[sd.name]
    sd.spherical_equiv = spherical_dict[sd.name]
    sd.sex = sex_dict[sd.name]

    # record foveal shape parameters (populated by `src/save_layer_features.ipynb`)
    df_oct = pd.read_csv(path / Parser.get_layer_thickness_dir() / 'fovea_3d_fitted_params.csv', sep=';', index_col=0)
    sd.oct_bump_X = df_oct.loc['A20', 'params']
    sd.oct_bump_Y = df_oct.loc['A02', 'params']
    sd.oct_width_X = df_oct.loc['width_X', 'params'] * np.sqrt(2 * 2.8) / MM_PER_DEGREE # in °
    sd.oct_width_Y = df_oct.loc['width_Y', 'params'] * np.sqrt(2 * 2.8) / MM_PER_DEGREE # in °
    sd.oct_max_slope = df_oct.loc['max_slope', 'params']
    sd.oct_depth = df_oct.loc['depth', 'params'] # in mm
    sd.oct_flatness = df_oct.loc['flatness', 'params']
    # sd.oct_volume = df_oct.loc['volume', 'params']

    # record cone density and fitted parameters (populated by `src/cell/analysis/density_analysis_pipeline_manager.py`)
    df_density = pd.read_csv(path / Parser.get_density_analysis_dir() / 'densities.csv', sep=';', index_col=0)
    df_raw_density_x = pd.read_csv(path / Parser.get_density_analysis_dir() / 'densities_raw_x.csv', sep=';', index_col=0)
    df_raw_density_y = pd.read_csv(path / Parser.get_density_analysis_dir() / 'densities_raw_y.csv', sep=';', index_col=0)
    
    sd.width_nas = df_density['width_nasal'].iloc[0]
    sd.width_tem = df_density['width_temporal'].iloc[0]
    sd.width_inf = df_density['width_inferior'].iloc[0]
    sd.width_sup = df_density['width_superior'].iloc[0]
    sd.max_slope_nas = df_density['max_slope_nasal'].iloc[0]
    sd.max_slope_tem = df_density['max_slope_temporal'].iloc[0]
    sd.max_slope_inf = df_density['max_slope_inferior'].iloc[0]
    sd.max_slope_sup = df_density['max_slope_superior'].iloc[0]
    sd.density_X = df_density['dens_smthd_X']
    sd.density_Y = df_density['dens_smthd_Y']
    sd.density_fit_X = df_density['dens_fit_X']
    sd.density_fit_Y = df_density['dens_fit_Y']
    
    sd.eccs = df_density.index.to_numpy()

    # record layer thicknesses (populated by `src/save_layer_features.ipynb`)
    df_thick = pd.read_csv(path / Parser.get_density_analysis_dir() / 'results.csv', sep=',', index_col=0, skiprows=1).query('-10 <= index <= 10')
    sd.cvi_X = df_thick['CVI_X']
    sd.cvi_Y = df_thick['CVI_Y']
    sd.gcl_ipl_X = df_thick['GCL+IPL_X']
    sd.gcl_ipl_Y = df_thick['GCL+IPL_Y']
    sd.onl_X = df_thick['ONL_X']
    sd.onl_Y = df_thick['ONL_Y']
    sd.inl_opl_X = df_thick['INL+OPL_X']
    sd.inl_opl_Y = df_thick['INL+OPL_Y']
    sd.rnfl_X = df_thick['RNFL_X']
    sd.rnfl_Y = df_thick['RNFL_Y']
    sd.chrd_X = df_thick['Choroid_X']
    sd.chrd_Y = df_thick['Choroid_Y']
    sd.pr_rpe_X = df_thick['PhotoR+RPE_X']
    sd.pr_rpe_Y = df_thick['PhotoR+RPE_Y']
    sd.os_X = df_thick['OS_X']
    sd.os_Y = df_thick['OS_Y']

    subjects_data.append(sd)

#### Populating Additional fields based on the previously gathered data

In [None]:
def get_nb_cones(ecc: np.ndarray, dens_X: pd.Series, dens_Y: pd.Series, radius: float, smoothen: bool = True) -> float:
    
    '''
    Given the cone density profiles along the X and Y axes, compute the total number of cones within a disk of radius `radius` (in degree) centered at the fovea by linearly interpolating (radially) the density profiles and integrating over the disk.
    '''
    smthd_x = gaussian_filter_nan(dens_X, sigma=4) if smoothen else dens_X.to_numpy()
    smthd_y = gaussian_filter_nan(dens_Y, sigma=4) if smoothen else dens_Y.to_numpy()
   
    x_amax = np.nanargmax(smthd_x)
    p = np.polyfit(ecc[x_amax-2:x_amax+3], smthd_x[x_amax-2:x_amax+3], 2)
    x_amax = -p[1] / (2 * p[0])

    y_amax = np.nanargmax(smthd_y)
    p = np.polyfit(ecc[y_amax-2:y_amax+3], smthd_y[y_amax-2:y_amax+3], 2)
    y_amax = -p[1] / (2 * p[0])

    R = np.linspace(0.0001, radius, 500) # radius in degrees
    disk = np.r_[
        np.interp(x_amax + R, ecc, smthd_x),
        np.interp(x_amax - R, ecc, smthd_x),
        np.interp(y_amax + R, ecc, smthd_y),
        np.interp(y_amax - R, ecc, smthd_y)
    ]
    
    norm_coef = MM_PER_DEGREE**2 * 2 * np.pi
    # integrate cone density over disk to get total nb of cones
    return norm_coef * np.trapz(np.nanmean(disk, axis=0) * R, R)

RADIUS = 3.33 # degree
for sd in subjects_data:
    sd.nb_cones = get_nb_cones(sd.eccs, sd.density_X, sd.density_Y, radius = RADIUS)
    sd.nb_cones_fit = get_nb_cones(sd.eccs, sd.density_fit_X, sd.density_fit_Y, radius = RADIUS, smoothen=False)

In [None]:
from scipy.signal import find_peaks

def adjust_flat(gcl_data: np.ndarray, peak_left: int, peak_right: int) -> np.ndarray:
    slope = (gcl_data[peak_right] - gcl_data[peak_left]) / (peak_right - peak_left)
    transformed_gcl = gcl_data - slope * (np.arange(len(gcl_data)) - peak_left)
    return transformed_gcl

def get_gcl_width(gcl: pd.Series) -> Tuple[float, float]:
    '''
    Given the GCL+IPL thickness profile, compute the width of the pit as well as the minimum thickness of the layer. Here, the width of the pit is defined as the distance between the two points where the thickness is 20% of the depth of the pit. The depth of the pit is defined as the difference between the thickness surrounding the pit and the thickness at the pit's bottom.
    '''
    # name = gcl.name
    gcl_to_plot = gcl.copy()
    eccs = gcl[np.abs(gcl.index) <= 6].index.to_numpy()
    gcl = gcl.interpolate(method='polynomial', order=1)[eccs].to_numpy()
    # plt.plot(eccs, gcl, label=name)
    smooth_param = 3
    peak_left = peak_right = []
    while not (len(peak_left) >= 1 and len(peak_right) >= 1) and smooth_param < 10:
        smoothed_gcl = gaussian_filter_nan(gcl, smooth_param)
        peaks = find_peaks(smoothed_gcl)[0]
        peak_left  = [peak for peak in peaks if peak < len(smoothed_gcl) / 3]
        peak_right = [peak for peak in peaks if peak > 2 * len(smoothed_gcl) / 3]
        smooth_param += 1
    assert len(peak_left) >= 1 and len(peak_right) >= 1, f'No peaks found for {gcl.name}'
    peak_left = round(np.mean(peak_left))   
    peak_right = round(np.mean(peak_right))
    adjusted_gcl = adjust_flat(gcl, peak_left, peak_right)
    smoothed_aj_gcl = gaussian_filter_nan(adjusted_gcl, 2)

    y_min = np.nanmin(smoothed_aj_gcl[peak_left:peak_right])
    y_target = y_min + (smoothed_aj_gcl[peak_left] - y_min) / 5
    intercepts = np.where(np.diff(np.sign(smoothed_aj_gcl - y_target)))[0]
    leftmost = eccs[intercepts[0]]
    rightmost = eccs[intercepts[-1]+1]
    width_pit_gcl = rightmost - leftmost

    indicies = np.argpartition(gcl, 10)[:10]
    p = np.polyfit(eccs[indicies], gcl[indicies], 2)
    if p[0] == 0:
    #     # gcl_to_plot.plot()
    #     plt.plot(eccs, gcl, label='gcl')
        plt.plot(np.sort(eccs[indicies]), np.polyval(p, np.sort(eccs[indicies])), '--')
    min_thickness_gcl = np.polyval(p, -p[1] / (2 * p[0]))
    return width_pit_gcl, min_thickness_gcl

for sd in subjects_data:
    width_gcl_x, min_thick_x = get_gcl_width(sd.gcl_ipl_X)
    width_gcl_y, min_thick_y = get_gcl_width(sd.gcl_ipl_Y)
    sd.width_gcl_X = width_gcl_x
    sd.width_gcl_Y = width_gcl_y
    sd.min_thick_gcl = min(min_thick_x, min_thick_y)
    # print(f'{sd.name:>10}: {width_gcl_x:.2f}°, {depth_gcl_x:.4f}, {width_gcl_y:.2f}°, {depth_gcl_y:.4f}')
    # plt.xlim(-6, 6)
    # plt.legend()
    # plt.title(sd.name)
    # plt.show()

In [None]:
eccs = subjects_data[0].eccs
layer_names = ['rnfl', 'gcl_ipl', 'inl_opl', 'onl', 'pr_rpe', 'os', 'chrd']
names_r = {'rnfl': 'RNFL', 'gcl_ipl': 'GCL+IPL', 'inl_opl': 'INL+OPL', 'onl': 'ONL', 'pr_rpe': 'PhotoR+RPE', 'os': 'OS', 'chrd': 'Choroid', 'cones': 'Cone Density'}

## Correlation Analysis

### Function definition

##### Corr matrix

In [None]:
import seaborn as sns
import scipy.stats

df = pd.DataFrame({k: [getattr(sd, k) for sd in subjects_data] for k,t in SubjectData.__annotations__.items() if t in (int, float)}, index=[sd.name for sd in subjects_data])
print(df)
def corr_sig(df: pd.DataFrame, drop=['nb']) -> Tuple[np.ndarray, np.ndarray]:
    cols = df.columns.drop(drop).to_list()
    corr_matrix = np.zeros(shape=(len(cols), len(cols)))
    p_matrix = np.ones_like(corr_matrix)
    for col in cols:
        for col2 in cols:
            corr , p = scipy.stats.pearsonr(df[col],df[col2])
            corr_matrix[cols.index(col),cols.index(col2)] = corr
            p_matrix[cols.index(col),cols.index(col2)] = p
    return corr_matrix, p_matrix


In [None]:
def get_cd_on_range(cone_density_fd, left, right):
        range_eccs = np.argwhere((left <= eccs) & (eccs <= right)).flatten()
        return np.mean(cone_density_fd[:, range_eccs], axis=1)
    
def plot_subjects_data(df):
    """
    Plots the cone density values for each subject from the given DataFrame.
    
    Parameters:
    - df: pandas DataFrame where each row corresponds to a subject, 
        and each column represents a specific eccentricity range.
    """
    plt.figure(figsize=(12, 6))
    
    # Get the x-axis labels (eccentricity ranges)
    x_labels = df.columns
    x_values = np.arange(len(x_labels))  # Numerical representation for plotting

    # Plot each subject's data in a different color
    for subject_id in df.index:
        plt.plot(x_values, df.loc[subject_id], marker='o', linestyle='-', label=f"Subject {subject_id}")


##### Feature Preprocessing

In [None]:
from src.shared.helpers.direction import Direction


def preprocess_functional_feature(data: np.ndarray, standardization: str = 'inter') -> np.ndarray:
    '''
    Preprocess a functional feature (functional feature such as cone density or layer thickness, for which the feature is a function of eccentricity) by (Z-)standardizing it.
    Given `data` matrix should have shape (n_subjects, n_eccentricities).
    
    - For an intra-indivual analysis, use `standardization='intra'` to standardize within subjects (i.e. within each row). This removes inter-subject variability.
    - For an inter-individual analysis, use `standardization='inter'` to standardize across subjects, eccentricity-wise (i.e. within each column). Removes eccentricity-level variability, focuses on between-patient trends
    '''
    if standardization == 'inter':
        mean = np.mean(data, axis=0, keepdims=True)
        std = np.std(data, axis=0, keepdims=True)
        return (data - mean) / std
    if standardization == 'intra':
        mean = np.nanmean(data, axis=1, keepdims=True)
        std = np.nanstd(data, axis=1, keepdims=True)
        return (data - mean) / std
    return data

def preprocess_functional_data(direction: Direction, standardization: str = 'inter', toLog : bool = True) -> Dict[str, np.ndarray]:
    '''
    Preprocess functional data (e.g. cone density, layer thicknesses) for a given direction (X or Y) by (Z-)standardizing it.
    '''

    layer_fds = {
        layer: preprocess_functional_feature(
            np.array([getattr(s, f'{layer}_{direction.value}') for s in subjects_data]), standardization
        )
        for layer in layer_names
    }
    if toLog:
        cone_density_fd = preprocess_functional_feature(
            np.array([np.log(getattr(s, f'density_fit_{direction.value}')) for s in subjects_data]), standardization
        )
    else:
        cone_density_fd = preprocess_functional_feature(
            np.array([(getattr(s, f'density_fit_{direction.value}')) for s in subjects_data]), standardization
        )

    cone_density_nonfit = preprocess_functional_feature(
            np.array([(getattr(s, f'density_{direction.value}')) for s in subjects_data]), standardization
        )
    # return {'cones': cone_density_fd, "nonfit": cone_density_nonfit, **layer_fds}
    return {'cones': cone_density_fd, "nonfit": cone_density_nonfit, **layer_fds}

##### P-values

In [None]:
from scipy.stats import kendalltau, pearsonr, spearmanr


def kendall_pval(x,y):
    return kendalltau(x,y)[1]

def pearsonr_pval(x,y):
    return pearsonr(x,y)[1]

def spearmanr_pval(x,y):
    return spearmanr(x,y, nan_policy = "omit")[1]


def calculate_pvalues(df):
    dfcols = pd.DataFrame(columns=df.columns)
    pvalues = dfcols.transpose().join(dfcols, how='outer')
    for r in df.columns:
        for c in df.columns:
            tmp = df[df[r].notnull() & df[c].notnull()]
            pvalues[r][c] = round(spearmanr(tmp[r], tmp[c])[1], 4)  # Use spearmanr instead of pearsonr
    return pvalues

# Add significance asterisks
def significance_marker(pval):
    if pval < 0.001:
        return "***"
    elif pval < 0.01:
        return "**"
    elif pval < 0.05:
        return "*"
    else:
        return ""  # No significance

#### Analysis

In [None]:
corr, pv = corr_sig(df)
sig_mask = pv < 0.05
with(plt.style.context(matplotx.styles.pacoty)):
    plt.figure(figsize=(14, 10), dpi=400)
    # sns.heatmap(corr, mask=~sig_mask, annot=True, cmap='coolwarm', annot_kws={"fontsize":8}, xticklabels=df.columns, yticklabels=df.columns)
    sns.heatmap(df.corr(), annot=True, cmap='coolwarm', annot_kws={"fontsize":8})
    plt.show()

In [None]:
for direction in Direction:
    cone_density_fd = preprocess_functional_data(direction, standardization='none', toLog=False)['cones']

    pids = np.array([s.nb for s in subjects_data])
    
    _df = pd.DataFrame({
        'Perifovea,\n Temporal': get_cd_on_range(cone_density_fd, -10, -4.16),
        'Parafovea,\n Temporal': get_cd_on_range(cone_density_fd, -4.16, -2.8),
        'Fovea,\n temporal': get_cd_on_range(cone_density_fd,-2.8, -0.8),
        'FAZ,\n temporal': get_cd_on_range(cone_density_fd,-0.8, -0.6),
        'Foveola': get_cd_on_range(cone_density_fd,-0.6, 0.6),
        'FAZ,\n nasal': get_cd_on_range(cone_density_fd,0.6, 0.8),
        'Fovea,\n nasal': get_cd_on_range(cone_density_fd,0.8, 2.8),
        'Parafovea,\n nasal': get_cd_on_range(cone_density_fd, 2.8, 4.16),
        'Perifovea,\n nasal': get_cd_on_range(cone_density_fd, 4.16, 10),

    }, index=pids).sort_values(by='Foveola')






    pvalues = calculate_pvalues(_df)

    # Create a new annotation matrix based on p-values
    annot = _df.corr(method="spearman").applymap(lambda x: f"{x:.2f}")  # Spearman correlation values
    for r in _df.columns:
        for c in _df.columns:
            pval = pvalues[r][c]
            annot[r][c] += f"\n{significance_marker(pval)}"

    # Plot the heatmap with annotations including significance
    sns.set_theme(font_scale=0.8)
    mask = np.triu(np.ones_like(_df.corr(), dtype=bool))
    for k in range(mask.shape[0]):
        mask[k, k] = False
    sns.heatmap(_df.corr(method="spearman"), annot=annot, fmt="", cmap='coolwarm', square=True, center=0, mask=mask,  annot_kws={'fontsize': 12, 'fontstyle': 'italic', 'color':'w', 'alpha': 1.0,
                       })
    plt.title(f'Correlation matrix of Cone Density at different eccentricities, {direction.value}-axis', fontsize=14)
    plt.grid
    plt.show()



    # sns.set_theme(font_scale=0.8)
    # sns.heatmap(_df.corr(method = spearmanr_pval), annot=True, fmt=".6f", cmap='coolwarm', square=True, center=0)
    # plt.title(f'Correlation matrix of Cone Density at different eccentricities, {direction.value}-axis', fontsize=14)
    # plt.show()

    # Call the function to plot the data
    plot_subjects_data(_df)


In [None]:
print(eccs)

In [None]:

cone_density_fd_X = preprocess_functional_data(Direction.X, standardization='none', toLog = False)['cones']
cone_density_fd_Y = preprocess_functional_data(Direction.Y, standardization='none', toLog = False)['cones']

def get_cd_on_range(left, right, direction: Direction):
    range_eccs = np.argwhere((left <= eccs) & (eccs <= right)).flatten()
    return np.mean((cone_density_fd_X if direction == Direction.X else cone_density_fd_Y)[:, range_eccs], axis=1)



# Fovea/Perifovea etc equivalent for x/y axis

_df = pd.DataFrame({
        'X-axis, Perifoveal Temporal': get_cd_on_range(-10, -4.16, Direction.X),
        'X-axis, Parafoveal Temporal': get_cd_on_range(-4.16, -2.8, Direction.X),
        'X-axis, Foveal Temporal': get_cd_on_range(-2.8, -0.8, Direction.X),
        'X-axis, FAZ Temporal': get_cd_on_range(-0.8, -0.6, Direction.X),
        'X-axis, Foveola': get_cd_on_range(-0.6, 0.6, Direction.X),
        'X-axis, FAZ Nasal': get_cd_on_range(0.6, 0.8, Direction.X),
        'X-axis, Foveal Nasal': get_cd_on_range(0.8, 2.8, Direction.X),
        'X-axis, Parafoveal Nasal': get_cd_on_range(2.8, 4.16, Direction.X),
        'X-axis, Perifoveal Nasal': get_cd_on_range(4.16, 10, Direction.X),
        'Y-axis, Perifoveal Superior': get_cd_on_range(-10, -4.16, Direction.Y),
        'Y-axis, Parafoveal Superior': get_cd_on_range(-4.16, -2.8, Direction.Y),
        'Y-axis, Foveal Superior': get_cd_on_range(-2.8, -0.8, Direction.Y),
        'Y-axis, FAZ Superior': get_cd_on_range(-0.8, -0.6, Direction.Y),
        'Y-axis, Foveola': get_cd_on_range(-0.6, 0.6, Direction.Y),
        'Y-axis, FAZ Inferior': get_cd_on_range(0.6, 0.8, Direction.Y),
        'Y-axis, Foveal Inferior': get_cd_on_range(0.8, 2.8, Direction.Y),
        'Y-axis, Parafoveal Inferior': get_cd_on_range(2.8, 4.16, Direction.Y),
        'Y-axis, Perifoveal Inferior': get_cd_on_range(4.16, 10, Direction.Y),
})     


sns.set_theme(font_scale=0.6)
sns.heatmap(_df.corr(method='pearson'), annot=True, fmt=".2f", cmap='coolwarm', square=True, center=0)
plt.title(f'Correlation matrix of Cone Density at different eccentricities.', fontsize=14)
plt.show()

In [None]:
key1 = 'Y-axis, Foveola'
key2 = 'Y-axis, Perifoveal Inferior'

# Select the last two entries
y_data = _df[key1]
x_data = _df[key2]


# Create the scatter plot
plt.figure(figsize=(8, 8))
plt.scatter(x_data, y_data, color='darkblue', label='Data Points')

# Fit a linear model (y = mx + b)
coeffs = np.polyfit(x_data, y_data, 1)  # Linear fit (degree 1)
linear_fit = np.poly1d(coeffs)  # Create a polynomial from the coefficients

# Plot the fitted line
x_fit = np.linspace(x_data.min(), x_data.max(), 100)
y_fit = linear_fit(x_fit)
plt.plot(x_fit, y_fit, color='red', label=f'Fit: y = {coeffs[0]:.2f}x + {coeffs[1]:.2f}')

# Add labels and title
plt.ylabel(key1)
plt.xlabel(key2)
plt.title(f'Scatter Plot: {key1} vs {key2}')

# Show grid (or not)
plt.grid(False)

# Display the plot
plt.legend()
plt.show()

### Correlation with foveal parameters

#### Imports

In [None]:
import os 

from turtle import color
from cv2 import mean


import numpy as np
import pandas as pd



from scipy import integrate, interpolate, stats
import matplotlib.pyplot as plt


from dataclasses import dataclass
from typing import List, Optional


#### Function definition

##### Integration/CD parameters

In [None]:
def integrate_cone_density_circle(eccs_in_MM: np.array,
                                  cone_density_fd_X: np.array,
                                  cone_density_fd_Y: np.array,
                                  radius: float = 1.0,
                                  num_r: int = 400,
                                  num_theta: int = 400,
                                  exclude_center: bool = False):
    """
    Integrates cone density over a circular region by interpolating between the horizontal (X)
    and vertical (Y) density measurements for each subject.
    
    Also computes:
      - The total integrated density over the circle (a single value per subject).
      - The maximum density along the horizontal and vertical meridians.
      - The cumulative integrated density as a function of radius (in mm).
    
    Parameters:
        eccs_in_MM (np.array): 1D array of eccentricities (radial positions in MM).
        cone_density_fd_X (np.array): 2D array of cone densities along the horizontal meridian.
                                      Shape: (n_subjects, len(eccs_in_MM)).
        cone_density_fd_Y (np.array): 2D array of cone densities along the vertical meridian.
                                      Shape: (n_subjects, len(eccs_in_MM)).
        radius (float): Maximum radius (in MM) for the circular integration.
        num_r (int): Number of radial grid points.
        num_theta (int): Number of angular grid points.
        exclude_center (bool): If True, excludes the central region (sets r_min to 0.3 MM).
        
    Returns:
        mean_int (float): Mean integrated density across subjects.
        std_int (float): Standard deviation of the integrated densities.
        min_int (float): Minimum integrated density.
        max_int (float): Maximum integrated density.
        cov_int (float): Coefficient of Variation (std/mean * 100).
        int_results (np.array): Array of integrated density values (one per subject).
        max_x_results (np.array): Array of maximum densities along the X meridian per subject.
        max_y_results (np.array): Array of maximum densities along the Y meridian per subject.
        r_grid (np.array): The radial grid used for integration.
        cumulative_integrations (np.array): 2D array (n_subjects x num_r) of cumulative integrated
                                             density as a function of radius.
    """
    # Set the lower bound for integration in r
    r_min = 0.3 if exclude_center else 0.0
    
    # Create the polar grid for integration:
    r = np.linspace(r_min, radius, num_r)
    theta = np.linspace(0, 2 * np.pi, num_theta)
    
    int_results = []
    max_x_results = []
    max_y_results = []
    cumulative_integrations_list = []
    n_subjects = cone_density_fd_X.shape[0]
    
    # Loop over each subject (each row in the data)
    for i in range(n_subjects):
        # Create interpolation functions for the horizontal and vertical densities
        f_x = interpolate.interp1d(eccs_in_MM, cone_density_fd_X[i, :],
                                   bounds_error=False, fill_value="extrapolate")
        f_y = interpolate.interp1d(eccs_in_MM, cone_density_fd_Y[i, :],
                                   bounds_error=False, fill_value="extrapolate")
        
        # Evaluate the interpolated densities on the radial grid
        density_x = f_x(r)  # shape: (num_r,)
        density_y = f_y(r)  # shape: (num_r,)
        
        # Compute maximum density along each meridian within the integration region
        max_x = np.max(density_x)
        max_y = np.max(density_y)
        max_x_results.append(max_x)
        max_y_results.append(max_y)
        
        # Compute the density field on the polar grid.
        # For each (r, theta) point, combine the two measurements as:
        # density(r, theta) = density_x(r)*cos²(theta) + density_y(r)*sin²(theta)
        density_field = (density_x[:, None] * np.cos(theta)**2 +
                         density_y[:, None] * np.sin(theta)**2)
        
        # Multiply by the Jacobian (r) to account for the area element in polar coordinates
        density_field_weighted = density_field * r[:, None]
        
        # Integrate first over theta (axis=1) then over r using Simpson’s rule.
        integral_theta = integrate.simpson(density_field_weighted, x=theta, axis=1)
        integrated_density = integrate.simpson(integral_theta, x=r)
        int_results.append(integrated_density)
        
        # Compute cumulative integration as a function of r using cumulative trapezoidal rule.
        # This gives the integrated cone density from r_min up to each r value.
        cumulative_integration = integrate.cumulative_trapezoid(integral_theta, r, initial=0)
        cumulative_integrations_list.append(cumulative_integration)
    
    # Convert lists to NumPy arrays for further statistics
    int_results = np.array(int_results)
    max_x_results = np.array(max_x_results)
    max_y_results = np.array(max_y_results)
    cumulative_integrations = np.array(cumulative_integrations_list)
    
    # Compute the integration metrics for the total integrated density
    mean_int = np.mean(int_results)
    std_int = np.std(int_results)
    min_int = np.min(int_results)
    max_int = np.max(int_results)
    cov_int = (std_int / mean_int * 100) if mean_int != 0 else np.nan
    
    return (mean_int, std_int, min_int, max_int, cov_int, int_results,
            max_x_results, max_y_results, r, cumulative_integrations)

In [None]:
def calculate_peak_width(density_values, positions, threshold_percent=50):
    """
    Calculate the width of a peak at a specified percentage of its maximum value.
    
    Args:
        density_values: Array of density values
        positions: Array of corresponding positions
        threshold_percent: Percentage of maximum to measure width at (default: 50 for FWHM)
        
    Returns:
        width: The width at the specified threshold
    """
    if len(density_values) == 0 or len(positions) == 0:
        return None
    

    density_values = np.array(density_values)
    positions = np.array(positions)
    

    # Find the maximum value and its index
    max_val = np.max(density_values)
    max_idx = np.argmax(density_values)
    
    # Calculate threshold value (e.g., 20% of maximum)
    threshold = max_val * (threshold_percent / 100.0)
    
    # Find indices where values are closest to threshold
    # For left side (before max)
    left_indices = np.where(density_values[:max_idx] <= threshold)[0]
    left_idx = left_indices[-1] if len(left_indices) > 0 else 0
    
    # For right side (after max)
    right_indices = np.where(density_values[max_idx:] <= threshold)[0]
    right_idx = right_indices[0] + max_idx if len(right_indices) > 0 else len(density_values) - 1
    
    # Use interpolation to find more precise positions
    # For left side
    if left_idx < max_idx - 1:
        # Get points for interpolation
        x1, y1 = positions[left_idx], density_values[left_idx]
        x2, y2 = positions[left_idx + 1], density_values[left_idx + 1]
        
        # Linear interpolation to find position at threshold
        if y2 - y1 != 0:  # Avoid division by zero
            left_pos = x1 + (threshold - y1) * (x2 - x1) / (y2 - y1)
        else:
            left_pos = x1
    else:
        left_pos = positions[left_idx]
    
    # For right side
    if right_idx > max_idx:
        # Get points for interpolation
        x1, y1 = positions[right_idx - 1], density_values[right_idx - 1]
        x2, y2 = positions[right_idx], density_values[right_idx]
        
        # Linear interpolation to find position at threshold
        if y2 - y1 != 0:  # Avoid division by zero
            right_pos = x1 + (threshold - y1) * (x2 - x1) / (y2 - y1)
        else:
            right_pos = x1
    else:
        right_pos = positions[right_idx]
    
    # Calculate width
    width = abs(right_pos - left_pos)
    
    return width #, right_pos, left_pos


##### Foveal Data Extraction

In [None]:
@dataclass
class FoveaParams:
    """Class for storing fovea 3D fitted parameters."""
    # Patient information
    subject: str
    patient_id: str
    subject_folder: str
    trial_name: str
    age: Optional[int] = None

    # Fitted parameters
    A00: Optional[float] = None
    A10: Optional[float] = None
    A01: Optional[float] = None
    A20: Optional[float] = None
    A02: Optional[float] = None
    A11: Optional[float] = None
    foveal_depth: Optional[float] = None
    foveal_center_X: Optional[float] = None
    foveal_width_X: Optional[float] = None
    foveal_center_Y: Optional[float] = None
    foveal_width_Y: Optional[float] = None
    foveal_max_slope: Optional[float] = None
    foveal_flatness: Optional[float] = None
    foveal_volume: Optional[float] = None

def extract_fovea_data(base_path: str) -> List[FoveaParams]:
    """
    Extract fovea parameters from CSV files with known structure.
    
    Args:
        base_path: Path to the base directory containing subject folders
        (subjfolder/trialfolder/layer_new/fovea_3d_fitted_params.csv)
    
    Returns:
        List of FoveaParams objects, one for each found CSV file
    """
    fovea_data = []
    
    # Get only the subject directories (directories starting with "Subject")
    try:
        subject_dirs = [d for d in os.listdir(base_path) 
                      if os.path.isdir(os.path.join(base_path, d)) and d.startswith("Subject")]
    except FileNotFoundError:
        print(f"Base path not found: {base_path}")
        return []
    
    # For each subject directory, get the session directories
    for subject_dir in subject_dirs:
        subject_path = os.path.join(base_path, subject_dir)
        
        # Extract subject number
        import re
        subject_match = re.search(r'Subject(\d+)', subject_dir)
        if not subject_match:
            continue
            
        subject_num = subject_match.group(1)
        patient_id = f"{subject_num}"
        
        try:
            # Get session directories (directories starting with "Session")
            session_dirs = [d for d in os.listdir(subject_path) 
                          if os.path.isdir(os.path.join(subject_path, d)) and d.startswith("Session")]
        except FileNotFoundError:
            continue
        
        # For each session directory, check if the CSV file exists
        for session_dir in session_dirs:
            session_path = os.path.join(subject_path, session_dir)
            csv_path = os.path.join(session_path, "layer_new", "fovea_3d_fitted_params.csv")
            
            # Check if the CSV file exists
            if os.path.isfile(csv_path):
                # try:
                # Read CSV file
                df = pd.read_csv(csv_path, sep=';', header=None, names=['param', 'value'])
                
                # Create basic FoveaParams object with patient info
                fovea_obj = FoveaParams(
                    patient_id=str(patient_id),
                    subject=f"Subject{patient_id}",
                    subject_folder=subject_dir,
                    trial_name=session_dir
                )
                print(f"Processing file: {csv_path} for patient {patient_id}")
                
                # Fill in parameter values
                for _, row in df.iterrows():
                    param_name = row['param']
                    param_value = row['value']
                    print(f"Processing {param_name} with value {param_value} for patient {patient_id}")
                    
                    if param_value == "params":
                        print(f"Skipping parameter {param_name} for patient {patient_id} as it is 'params'")
                        continue
                    
                    # Check if this parameter exists in our class
                    if hasattr(fovea_obj, param_name) or hasattr(fovea_obj, f"foveal_{param_name}"):
                        print(f"Setting {param_name} for patient {patient_id}")
                        try:
                            # Convert to float and set attribute
                            print(f"Trying to set {param_name} for patient {patient_id}")
                            setattr(fovea_obj, param_name, float(param_value))
                            print(f"Successfully set {param_name} for patient {patient_id}")
                        except:
                            
                            print(f"Error setting {param_name} for patient {patient_id}: {param_value}")

                    if hasattr(fovea_obj, f"foveal_{param_name}"):
                        try:
                            print(f"Trying to set foveal_{param_name} for patient {patient_id}")
                            setattr(fovea_obj, f"foveal_{param_name}", float(param_value))

                        except:
                            print(f"Skipping parameter {param_name} for patient {patient_id} ")

                            pass
                    
                fovea_data.append(fovea_obj)
                # except Exception as e:
                    # print(f"Error processing file {csv_path}: {str(e)}")
    
    return fovea_data

def save_to_dataframe(fovea_data: List[FoveaParams], output_file: str = "fovea_parameters.csv") -> pd.DataFrame:
    """
    Convert list of FoveaParams objects to a pandas DataFrame and save to CSV.
    
    Args:
        fovea_data: List of FoveaParams objects
        output_file: Path to save the CSV file
        
    Returns:
        DataFrame containing all fovea data
    """
    # Convert to list of dictionaries
    data_dicts = [vars(f) for f in fovea_data]
    
    # Create DataFrame
    df = pd.DataFrame(data_dicts)
    
    # Save to CSV
    df.to_csv(output_file, index=False)
    
    return df

##### Foveal Data Analysis

In [None]:
def calculate_fovea_volume(params: FoveaParams, radius_start: float, radius_end: float, num_points: int = 1000) -> float:
    """
    Calculate the volume of the fovea between radius_start and radius_end.
    
    Parameters:
    - params: FoveaParams object containing the fitted parameters
    - radius_start: Inner radius of the circular region
    - radius_end: Outer radius of the circular region
    - num_points: Number of points for numerical integration
    
    Returns:
    - volume: Volume of the fovea in the specified region
    """

    print(f"Calculating fovea volume for subject: {params.subject}, from radius {radius_start} to {radius_end} from center ({params.foveal_center_X}, {params.foveal_center_Y})")
    # Define the height function based on the model parameters
    def height_function(x, y):
        # 2D parabola part
        parabola = (params.A00 + params.A10*x + params.A01*y - 
                   params.A20*(x**2) - params.A02*(y**2) + params.A11*x*y)
        
        # 2D Gaussian part
        x_term = ((x - params.foveal_center_X)**2) / (2 * (params.foveal_width_X**2))
        y_term = ((y - params.foveal_center_Y)**2) / (2 * (params.foveal_width_Y**2))
        gaussian = params.foveal_depth * np.exp(-(x_term + y_term))
        
        return parabola - gaussian
    
    # Use polar coordinates for integration over circular regions
    def integrand(r, theta):
        x = r * np.cos(theta) + params.foveal_center_X  # x-coordinate relative to center
        y = r * np.sin(theta) + params.foveal_center_Y  # y-coordinate relative to center
        return height_function(x, y) * r  # r is the Jacobian for polar coordinates
    
    # Perform the double integration over r and theta
    volume, _ = integrate.dblquad(
        integrand, 
        0, 2*np.pi,  # theta limits: 0 to 2π
        lambda _: radius_start, lambda _: radius_end  # r limits: radius_start to radius_end
    )
    
    return volume

def calculate_all_regional_volumes(fovea_data: List[FoveaParams], retinal_regions: List[Dict]) -> Dict:
    """
    Calculate volumes for all subjects across all specified retinal regions,
    measuring from the center (0) to the absolute boundary of each region.
    
    Parameters:
    - subjects_data: List of FoveaParams objects
    - retinal_regions: List of dictionaries defining regions
    
    Returns:
    - Dictionary with volumes organized by subject and region
    """
    foveal_integration_data = {}
    
    for subject in fovea_data:
        print(f"Calculating volumes for subject: {subject.subject}")
        foveal_integration_data[subject.subject] = {}

        foveal_integration_data[subject.subject]["total"] = {
                'foveal_volume': subject.volume,
                'foveal_radius': np.inf
            }
        
        for region in retinal_regions:
            # For each region, we want to calculate from 0 to the maximum absolute radius
            region_radius = max(abs(region['start']), abs(region['end']))
            
            # For regions with infinite bounds, set a practical limit
            if region_radius == np.inf:
                region_radius = 3.0  # Practical limit for "infinite" radius
            
            # Calculate volume from center (0) to region_radius
            volume = calculate_fovea_volume(subject, 0, region_radius)
            
            # Store the result
            foveal_integration_data[subject.subject][region['name']] = {
                'foveal_volume': volume,
                'foveal_radius': region_radius
            }
    
    return foveal_integration_data

# Function to visualize the model and regions
def visualize_fovea_model(params: FoveaParams, retinal_regions: List[Dict]):
    """Create a visualization of the fovea model with marked regions"""
    # Create a grid of points
    x = np.linspace(params.foveal_center_X - 2, params.foveal_center_X + 2, 100)
    y = np.linspace(params.foveal_center_Y - 2, params.foveal_center_Y + 2, 100)
    X, Y = np.meshgrid(x, y)
    
    # Calculate Z values
    parabola = (params.A00 + params.A10*X + params.A01*Y - 
               params.A20*(X**2) - params.A02*(Y**2) + params.A11*X*Y)
    
    x_term = ((X - params.foveal_center_X)**2) / (2 * (params.foveal_width_X**2))
    y_term = ((Y - params.foveal_center_Y)**2) / (2 * (params.foveal_width_Y**2))
    gaussian = params.foveal_depth * np.exp(-(x_term + y_term))
    
    Z = parabola - gaussian
    
    # Create 3D plot
    fig = plt.figure(figsize=(12, 10))
    ax1 = fig.add_subplot(121, projection='3d')
    surf = ax1.plot_surface(X, Y, Z, cmap='viridis', alpha=0.8)
    ax1.set_xlabel('X')
    ax1.set_ylabel('Y')
    ax1.set_zlabel('Z')
    ax1.set_title('3D Fovea Model')
    
    # Add a 2D cross-section
    ax2 = fig.add_subplot(122)
    center_idx = len(x) // 2
    cross_section = Z[:, center_idx]
    ax2.plot(y, cross_section)
    
    # Mark the regions
    colors = ['red', 'green', 'blue', 'purple', 'orange', 'brown', 'pink']
    unique_regions = set()
    for i, region in enumerate(retinal_regions):
        if region['name'] not in unique_regions:
            unique_regions.add(region['name'])
            color = colors[i % len(colors)]
            
            # Handle infinite bounds for visualization
            start = region['start']
            end = region['end']
            
            if start == -np.inf:
                start = -3.0
            if end == np.inf:
                end = 3.0
                
            # Mark region on cross-section
            y_region = np.linspace(params.center_Y + start, params.center_Y + end, 100)
            idx_start = np.abs(y - (params.center_Y + start)).argmin()
            idx_end = np.abs(y - (params.center_Y + end)).argmin()
            
            ax2.axvspan(params.center_Y + start, params.center_Y + end, 
                        alpha=0.2, color=color, label=region['name'])
    
    ax2.set_xlabel('Y position')
    ax2.set_ylabel('Z height')
    ax2.set_title('Cross-section with Retinal Regions')
    ax2.legend()
    
    plt.tight_layout()
    return fig


In [None]:
def calculate_directional_slopes(params, distance=0.1):
    """
    Calculate slopes in four cardinal directions from the foveal model.
    
    Args:
        params: FoveaParams object with fitted parameters
        distance: Distance from center to calculate slope (in mm)
    
    Returns:
        dict: Slopes in four directions
    """
    
    def height_function(x, y):
        # 2D parabola part
        parabola = (params.A00 + params.A10*x + params.A01*y - 
                   params.A20*(x**2) - params.A02*(y**2) + params.A11*x*y)
        
        # 2D Gaussian part
        x_term = ((x - params.foveal_center_X)**2) / (2 * (params.foveal_width_X**2))
        y_term = ((y - params.foveal_center_Y)**2) / (2 * (params.foveal_width_Y**2))
        gaussian = params.foveal_depth * np.exp(-(x_term + y_term))
        
        return parabola - gaussian
    
    # Calculate heights at center and at distance in each direction
    center_height = height_function(params.foveal_center_X, params.foveal_center_Y)
    
    # Temporal (y=0, x<0) - negative x direction
    temporal_height = height_function(params.foveal_center_X - distance, params.foveal_center_Y)
    temporal_slope = (temporal_height - center_height) / distance
    
    # Nasal (y=0, x>0) - positive x direction  
    nasal_height = height_function(params.foveal_center_X + distance, params.foveal_center_Y)
    nasal_slope = (nasal_height - center_height) / distance
    
    # Superior (y<0, x=0) - negative y direction
    superior_height = height_function(params.foveal_center_X, params.foveal_center_Y - distance)
    superior_slope = (superior_height - center_height) / distance
    
    # Inferior (y>0, x=0) - positive y direction
    inferior_height = height_function(params.foveal_center_X, params.foveal_center_Y + distance)
    inferior_slope = (inferior_height - center_height) / distance
    
    return {
        'foveal_temporal_slope': temporal_slope,
        'foveal_nasal_slope': nasal_slope,
        'foveal_superior_slope': superior_slope,
        'foveal_inferior_slope': inferior_slope
    }

def create_foveal_integration_scatterplots(fovea_params_list, integration_data, save_path=None):
    """
    Create scatter plots of foveal parameters vs integration parameters.
    
    Args:
        fovea_params_list: List of FoveaParams objects
        integration_data: Dictionary with integration data for each subject
        save_path: Path to save plots (optional)
    """
    
    if save_path:
        Path(save_path).mkdir(parents=True, exist_ok=True)
    
    # Convert fovea data to DataFrame
    fovea_df = pd.DataFrame([vars(f) for f in fovea_params_list])
    
    # Calculate directional slopes for each subject
    directional_data = []
    for params in fovea_params_list:
        if all(hasattr(params, attr) and getattr(params, attr) is not None 
               for attr in ['A00', 'foveal_center_X', 'foveal_center_Y', 'foveal_width_X', 'foveal_width_Y', 'foveal_depth']):
            slopes = calculate_directional_slopes(params)
            slopes['subject'] = params.subject
            directional_data.append(slopes)
    
    slopes_df = pd.DataFrame(directional_data)
    
    # Merge with fovea parameters
    extended_fovea_df = pd.merge(fovea_df, slopes_df, on='subject', how='left')
    
    # Convert integration data to DataFrame with proper numeric conversion
    integration_rows = []
    for subject, data in integration_data.items():
        row = {'subject': subject}
        for key, value in data.items():
            # Handle numpy arrays - take first element if array
            if isinstance(value, np.ndarray):
                if len(value) > 0:
                    row[key] = float(value[0]) if not np.isnan(value[0]) else np.nan
                else:
                    row[key] = np.nan
            # Handle other numeric types
            elif isinstance(value, (int, float, np.integer, np.floating)):
                row[key] = float(value)
            # Handle strings that might be numeric
            elif isinstance(value, str):
                try:
                    row[key] = float(value)
                except ValueError:
                    row[key] = np.nan
            else:
                row[key] = np.nan
        integration_rows.append(row)
    
    integration_df = pd.DataFrame(integration_rows)
    
    # Ensure all integration columns (except subject) are numeric
    for col in integration_df.columns:
        if col != 'subject':
            integration_df[col] = pd.to_numeric(integration_df[col], errors='coerce')
    
    # Merge datasets
    merged_df = pd.merge(extended_fovea_df, integration_df, on='subject', how='inner')
    
    print(f"Successfully merged data for {len(merged_df)} subjects")
    
    # Define foveal parameters to plot
    foveal_params = [
        'foveal_max_slope', 'foveal_temporal_slope', 'foveal_nasal_slope', 'foveal_superior_slope', 'foveal_inferior_slope',
        'A00', 'A10', 'A01', 'A20', 'A02', 'A11', 'foveal_depth', 
        'foveal_center_X', 'foveal_width_X', 'foveal_center_Y', 'foveal_width_Y', 'foveal_flatness', 'volume'
    ]
    
    # Get integration parameter names (exclude subject)
    integration_params = [col for col in integration_df.columns if col != 'subject']
    
    print(f"Foveal parameters: {foveal_params}")
    print(f"Integration parameters: {integration_params}")
    
    # Create scatter plots
    for foveal_param in foveal_params:
        if foveal_param not in merged_df.columns:
            print(f"Skipping {foveal_param} - not found in data")
            continue
            
        # Check if we have valid data for this parameter
        valid_data = merged_df[foveal_param].dropna()
        if len(valid_data) < 5:
            print(f"Skipping {foveal_param} - insufficient data ({len(valid_data)} points)")
            continue
        
        # Create subplots for all integration parameters
        n_params = len(integration_params)
        n_cols = 3
        n_rows = (n_params + n_cols - 1) // n_cols
        
        fig, axes = plt.subplots(n_rows, n_cols, figsize=(15, 5*n_rows))
        fig.suptitle(f'{foveal_param.replace("_", " ").title()} vs Integration Parameters', 
                     fontsize=16, y=0.995)
        
        # Flatten axes array for easier indexing
        if n_rows == 1:
            axes = [axes] if n_cols == 1 else axes
        else:
            axes = axes.flatten()
        
        for i, integration_param in enumerate(integration_params):
            ax = axes[i]
            
            # Get valid data for both parameters
            valid_mask = merged_df[foveal_param].notna() & merged_df[integration_param].notna()
            x_data = merged_df.loc[valid_mask, foveal_param]
            y_data = merged_df.loc[valid_mask, integration_param]
            
            # Ensure data is numeric
            try:
                x_data = pd.to_numeric(x_data, errors='coerce').dropna()
                y_data = pd.to_numeric(y_data, errors='coerce').dropna()
                
                # Re-align the data after dropping NaNs
                common_indices = x_data.index.intersection(y_data.index)
                x_data = x_data.loc[common_indices]
                y_data = y_data.loc[common_indices]
            except Exception as e:
                print(f"Error converting data to numeric for {foveal_param} vs {integration_param}: {e}")
                ax.text(0.5, 0.5, f'Data conversion error', 
                       ha='center', va='center', transform=ax.transAxes)
                ax.set_title(integration_param)
                continue
            
            if len(x_data) < 3:
                ax.text(0.5, 0.5, f'Insufficient data\n({len(x_data)} points)', 
                       ha='center', va='center', transform=ax.transAxes)
                ax.set_title(integration_param)
                continue
            
            # Create scatter plot
            ax.scatter(x_data, y_data, alpha=0.6, s=50)
            
            # Calculate correlation
            try:
                r, p = stats.spearmanr(x_data, y_data)
                
                # Add trend line if correlation is significant
                trend_line_label = ""
                # if p < 0.05:
                z = np.polyfit(x_data, y_data, 1)
                p_line = np.poly1d(z)
                beta = z[0]  # slope coefficient
                intercept = z[1]  # intercept
                
                x_sorted = x_data.sort_values()
                ax.plot(x_sorted, p_line(x_sorted), 
                        "r--", alpha=0.8, linewidth=2, 
                        label=f'β = {beta:.3f}')
                
                # Add legend
                ax.legend(loc='best', fontsize=8, frameon=True, 
                            fancybox=True, shadow=True)
                
                # Format title with correlation info
                significance = ""
                if p < 0.001:
                    significance = "***"
                elif p < 0.01:
                    significance = "**"
                elif p < 0.05:
                    significance = "*"
                
                title = f'{integration_param}\nr={r:.3f}{significance} (n={len(x_data)})'
                ax.set_title(title, fontsize=10)
                
            except Exception as e:
                ax.set_title(f'{integration_param}\n(correlation error)')
                print(f"Error calculating correlation for {foveal_param} vs {integration_param}: {e}")
            
            # Set axis labels
            ax.set_xlabel(foveal_param.replace('_', ' ').title())
            ax.set_ylabel(integration_param.replace('_', ' ').title())
            
            # Add grid
            ax.grid(True, alpha=0.3)
        
        # Hide unused subplots
        for j in range(len(integration_params), len(axes)):
            axes[j].set_visible(False)
        
        plt.tight_layout(rect=[0, 0, 1, 0.96])  # Leave space for suptitle
        
        # Save plot if path is provided
        if save_path:
            filename = f"{foveal_param}_vs_integration_parameters.png"
            filepath = os.path.join(save_path, filename)
            plt.savefig(filepath, dpi=300, bbox_inches='tight')
            print(f"Saved: {filepath}")
        
        plt.show()

def create_summary_correlation_matrix(fovea_params_list, integration_data, save_path=None):
    """
    Create a summary correlation matrix focusing on key relationships.
    """
    
    # Convert and merge data (same as above)
    fovea_df = pd.DataFrame([vars(f) for f in fovea_params_list])
    
    # Calculate directional slopes
    directional_data = []
    for params in fovea_params_list:
        if all(hasattr(params, attr) and getattr(params, attr) is not None 
               for attr in ['A00', 'center_X', 'center_Y', 'width_X', 'width_Y', 'depth']):
            slopes = calculate_directional_slopes(params)
            slopes['subject'] = params.subject
            directional_data.append(slopes)
    
    slopes_df = pd.DataFrame(directional_data)
    extended_fovea_df = pd.merge(fovea_df, slopes_df, on='subject', how='left')
    
    integration_df = pd.DataFrame([
        {'subject': subject, **data} for subject, data in integration_data.items()
    ])
    
    merged_df = pd.merge(extended_fovea_df, integration_df, on='subject', how='inner')
    
    # Select key parameters for correlation matrix
    key_foveal_params = ['max_slope', 'temporal_slope', 'nasal_slope', 
                        'superior_slope', 'inferior_slope', 'depth', 'volume']
    integration_params = [col for col in integration_df.columns if col != 'subject']
    
    # Create correlation matrix
    selected_columns = [col for col in key_foveal_params + integration_params 
                       if col in merged_df.columns]
    
    correlation_matrix = merged_df[selected_columns].corr(method='spearman')
    
    # Plot correlation matrix
    plt.figure(figsize=(12, 10))
    
    # Create mask for upper triangle
    mask = np.triu(np.ones_like(correlation_matrix, dtype=bool))
    
    # Create heatmap
    import seaborn as sns
    sns.heatmap(correlation_matrix, mask=mask, annot=True, cmap='coolwarm', 
                center=0, square=True, linewidths=0.5, fmt='.2f',
                annot_kws={'size': 8})
    
    plt.title('Correlation Matrix: Foveal Parameters vs Integration Parameters', fontsize=14)
    plt.xticks(rotation=45, ha='right')
    plt.yticks(rotation=0)
    plt.tight_layout()
    
    if save_path:
        filepath = os.path.join(save_path, "foveal_integration_correlation_matrix.png")
        plt.savefig(filepath, dpi=300, bbox_inches='tight')
        print(f"Saved correlation matrix: {filepath}")
    
    plt.show()
    
    return correlation_matrix

##### Correlations

In [None]:
def correlate_foveal_volumes_with_cone_density(foveal_volumes, integration_data):
    """
    Create a correlation matrix between foveal volumes and cone density integration data.
    
    Args:
        foveal_volumes: Dictionary with foveal volume data
        integration_data: Dictionary with cone density integration data
    
    Returns:
        correlation_df: DataFrame with correlation coefficients
        merged_df: The merged data DataFrame
        annot: DataFrame with formatted annotations
    """
    print(f"Starting correlation analysis between foveal volumes and cone density...")
    
    # Step 1: Create a DataFrame from foveal volumes
    print(f"Processing foveal volume data for {len(foveal_volumes)} subjects")
    
    foveal_data = []
    for subject_id, regions in foveal_volumes.items():
        # Create a row for this subject
        row = {'subject': subject_id}
        

        print(f"Processing subject {subject_id} with regions: {list(regions.keys())}")
        # Add volume data for each region
        print (f"Regions found: {list(regions.items())}")
        for region_name, data in regions.items():
            print(f"Processing region {region_name} with data: {data}")
            if isinstance(data, dict) and 'foveal_volume' in data:
                
                row[f"foveal_volume_{region_name}"] = data['foveal_volume']
        
        foveal_data.append(row)
    
    # Convert to DataFrame
    foveal_df = pd.DataFrame(foveal_data)
    print(f"Created foveal volume DataFrame with {len(foveal_df)} rows and {len(foveal_df.columns)} columns")
    
    # Print the first few rows to verify
    print("\nSample of foveal volume data (first 3 rows):")
    print(foveal_df.head(3))
    
    # Print unique subjects in foveal data
    print(f"\nFound {len(foveal_df['subject'].unique())} unique subjects in foveal data:")
    print(foveal_df['subject'].unique()[:5], "..." if len(foveal_df['subject'].unique()) > 5 else "")
    
    # Step 2: Create a DataFrame for cone density data
    print(f"\nProcessing cone density integration data for {len(integration_data)} subjects")
    
    density_data = []
    for subject_id, regions in integration_data.items():
        # Create a row for this subject
        row = {'subject': subject_id}
        
        # Add region data
        for region, value in regions.items():
            if region not in ["max_X", "max_Y"]:  # Skip these special keys
                row[f"cone_density_{region}"] = value[0] if isinstance(value, np.ndarray) else value
        
        # Add max X and Y if available
        if "max_X" in regions:
            row["max_X"] = regions["max_X"]
        if "max_Y" in regions:
            row["max_Y"] = regions["max_Y"]
            
        density_data.append(row)
    
    # Convert to DataFrame
    density_df = pd.DataFrame(density_data)
    print(f"Created cone density DataFrame with {len(density_df)} rows and {len(density_df.columns)} columns")
    
    # Print the first few rows to verify
    print("\nSample of cone density data (first 3 rows):")
    print(density_df.head(3))
    
    # Print unique subjects in density data
    print(f"\nFound {len(density_df['subject'].unique())} unique subjects in density data:")
    print(density_df['subject'].unique()[:5], "..." if len(density_df['subject'].unique()) > 5 else "")
    
    # Step 3: Merge the DataFrames on subject
    print("\nMerging datasets on 'subject' column...")
    merged_df = pd.merge(foveal_df, density_df, on='subject', how='inner')
    print(f"Merged DataFrame has {len(merged_df)} rows")
    
    # Check if we have any data after merging
    if len(merged_df) == 0:
        print("ERROR: No matching subjects found between datasets!")
        print("Check that subject names match exactly between the two datasets.")
        return None, None, None
    
    # Print the subjects that were successfully matched
    print(f"Successfully matched {len(merged_df['subject'].unique())} subjects:")
    print(merged_df['subject'].unique()[:5], "..." if len(merged_df['subject'].unique()) > 5 else "")
    
    # Step 4: Select only numeric columns for correlation
    print("\nSelecting numeric columns for correlation...")
    
    # Get all columns except 'subject'
    numeric_cols = [col for col in merged_df.columns if col != 'subject']
    print(f"Numeric columns: {numeric_cols}")
    
    # Select columns for correlation
    numeric_df = merged_df[numeric_cols]
    print(f"Selected {len(numeric_df.columns)} numeric columns for correlation")
    
    # Check for and report missing data
    missing_data = numeric_df.isna().sum()
    if missing_data.sum() > 0:
        print("\nWARNING: Missing data detected in these columns:")
        print(missing_data[missing_data > 0])
    
    # Step 5: Calculate correlation matrix
    print("\nCalculating correlation matrix...")
    
    correlation_df = numeric_df.corr(method="spearman")
    
    # Using your existing function for p-values
    pvalues = calculate_pvalues(correlation_df)
    print(f"Calculated p-values for correlation matrix")
    
    # Create annotation matrix with your existing approach
    annot = correlation_df.applymap(lambda x: f"{x:.2f}")  # Format correlation values
    
    for r in correlation_df.columns:
        for c in correlation_df.columns:
            pval = pvalues.loc[r, c]
            # annot.loc[r, c] += f"\n{significance_marker(pval)}"
            annot.loc[r, c] += f"\n(p={pval:.3f})"
    
    return correlation_df, merged_df, annot




##### Correlation Matrices

In [None]:
def create_correlation_matrix(fovea_params_list, region_dict):


    """
    Create a correlation matrix between fovea parameters and region density data.
    
    Args:
        fovea_params_list: List of FoveaParams objects
        region_dict: Dictionary with subject data for different regions
    
    Returns:
        correlation_df: DataFrame with correlation coefficients
        merged_df: The merged data DataFrame
    """
    print(f"Starting correlation analysis...")
    
    # Step 1: Create a DataFrame from fovea parameters
    print(f"Processing {len(fovea_params_list)} fovea parameter objects")
    fovea_df = pd.DataFrame([vars(f) for f in fovea_params_list])
    print(f"Created fovea DataFrame with {len(fovea_df)} rows and {len(fovea_df.columns)} columns")
    
    # Print the first few rows to verify
    print("\nSample of fovea data (first 3 rows):")
    print(fovea_df[['subject', 'patient_id', 'A00', 'foveal_depth', 'foveal_volume']].head(3))
    
    # Print unique subjects in fovea data
    print(f"\nFound {len(fovea_df['subject'].unique())} unique subjects in fovea data:")
    print(fovea_df['subject'].unique()[:5], "..." if len(fovea_df['subject'].unique()) > 5 else "")
    
    # Step 2: Extract the numeric parameters from fovea_df
    param_cols = ['A00', 'A10', 'A01', 'A20', 'A02', 'A11', 'foveal_depth', 
                  'foveal_center_X', 'foveal_width_X', 'foveal_center_Y', 'foveal_width_Y', 
                  'foveal_max_slope', 'foveal_flatness', 'volume']
    
    # Check which parameters are present
    present_params = [col for col in param_cols if col in fovea_df.columns]
    print(f"\nFound {len(present_params)} parameters in fovea data: {present_params}")
    
    # Step 3: Create a DataFrame for region data
    print(f"\nProcessing region dictionary with {len(region_dict)} subjects")
    region_data = []
    
    for subj_id in region_dict:
        # Create a row for this subject
        row = {'subject': subj_id}
        
        # Add region data
        for region, value_array in region_dict[subj_id].items():
            # Use the first value if it's an array
            row[f"{region}"] = value_array[0] if isinstance(value_array, np.ndarray) else value_array
        
        region_data.append(row)
        print(f"Processed subject {subj_id} with {len(row)} region values")
    
    # Convert to DataFrame
    region_df = pd.DataFrame(region_data)
    print(f"Created region DataFrame with {len(region_df)} rows and {len(region_df.columns)} columns")
    
    # Print the first few rows to verify
    print("\nSample of region data (first 3 rows):")
    print(region_df.head(3))
    
    # Print unique subjects in region data
    print(f"\nFound {len(region_df['subject'].unique())} unique subjects in region data:")
    print(region_df['subject'].unique()[:5], "..." if len(region_df['subject'].unique()) > 5 else "")
    
    # Step 4: Merge the DataFrames on subject
    print("\nMerging datasets on 'subject' column...")
    merged_df = pd.merge(fovea_df, region_df, on='subject', how='inner')
    print(f"Merged DataFrame has {len(merged_df)} rows")
    
    # Check if we have any data after merging
    if len(merged_df) == 0:
        print("ERROR: No matching subjects found between datasets!")
        print("Check that subject names match exactly between the two datasets.")
        return None, None
    
    # Print the subjects that were successfully matched
    print(f"Successfully matched {len(merged_df['subject'].unique())} subjects:")
    print(merged_df['subject'].unique()[:5], "..." if len(merged_df['subject'].unique()) > 5 else "")
    
    # Step 5: Select only numeric columns for correlation
    print("\nSelecting numeric columns for correlation...")
    
    # Get region columns (excluding subject)
    region_cols = [col for col in region_df.columns if col != 'subject']
    print(f"Region columns: {region_cols}")
    
    # Get numeric fovea parameter columns that actually have data
    valid_param_cols = [col for col in param_cols if col in merged_df.columns and not merged_df[col].isna().all()]
    print(f"Valid parameter columns: {valid_param_cols}")
    
    # Select columns for correlation
    numeric_df = merged_df[valid_param_cols + region_cols]
    print(f"Selected {len(numeric_df.columns)} numeric columns for correlation")
    
    # Check for and report missing data
    missing_data = numeric_df.isna().sum()
    if missing_data.sum() > 0:
        print("\nWARNING: Missing data detected in these columns:")
        print(missing_data[missing_data > 0])
    
    # Step 6: Calculate correlation matrix
    print("\nCalculating correlation matrix...")
    
    correlation_df = numeric_df.corr(method="spearman")
    
    pvalues = calculate_pvalues(correlation_df)
    
    print(f"Calculated p-values for correlation matrix")
    # print("DEBUG: p-values matrix:", pvalues)

    print(f"Created correlation matrix with shape {correlation_df.shape}")

    annot = correlation_df.applymap(lambda x: f"{x:.2f}")  # Spearman correlation values

    for r in correlation_df.columns:
        # print("DEBUG: r", r)
        for c in correlation_df.columns:
            # print("DEBUG: c", c)
            # print("DEBUG: creating annotation for columns",r , "and", c)
            pval = pvalues[r][c]
            # print("DEBUG: pval", pval)
            annot[r][c] += f"\n{significance_marker(pval)}"
    
    return correlation_df, merged_df, annot

def plot_correlation_matrix(corr_df, annot,  title="Correlation Matrix"):
    """Plot a correlation matrix heatmap."""
    print(f"\nPlotting correlation matrix: {title}")

    # print(f"DEBUG: Correlation DataFrame shape: {corr_df.shape}")
    # print(f"DEBUG: Correlation Annotation columns: {annot.shape}")
    
    plt.figure(figsize=(12, 10))
    mask = np.triu(np.ones_like(corr_df, dtype=bool))
    
    # Draw heatmap with mask and correct aspect ratio
    sns.heatmap(corr_df, mask=mask, annot=annot, cmap="coolwarm", fmt ="s",
                vmin=-1, vmax=1, center=0, square=True, linewidths=.5,
                 annot_kws={ 'fontstyle': 'italic', 'color':'w', 'alpha': 1.0,
                })
    
    plt.title(title)
    plt.tight_layout()
    plt.show()
    print("Plot complete!")


##### Wrapper

In [None]:
def analyze_foveal_shape_and_cone_density(fovea_data, retinal_regions, integration_data):
    """
    Main function to run the complete analysis workflow.
    
    Args:
        fvea_data: List of FoveaParams objects
        retinal_regions: List of dictionaries defining regions
        integration_data: Dictionary with cone density integration data
        
    Returns:
        correlation_df: The correlation matrix
        merged_df: The merged data for further analysis
        foveal_volumes: The calculated foveal volumes
    """
    # Step 1: Calculate foveal volumes
    print("Step 1: Calculating foveal volumes for all subjects and regions...")
    foveal_volumes = calculate_all_regional_volumes(fovea_data, retinal_regions)
    
    # Step 2: Correlate with cone density data
    print("\nStep 2: Correlating foveal volumes with cone density data...")
    corr_df, merged_df, annot = correlate_foveal_volumes_with_cone_density(
        foveal_volumes, integration_data)
    
    # Step 3: Plot correlation matrix using your existing function
    print("\nStep 3: Plotting correlation matrix...")
    plot_correlation_matrix(corr_df, annot = annot,
                           title="Correlation between Foveal Volume and Cone Density")

    
    return corr_df, merged_df, foveal_volumes


#### Fovea-CD correlation Analysis

##### Definitions

In [None]:
# Define retinal regions (for integration)
retinal_regions = [
    {'name' : 'Perifovea', 'start':1, 'end': np.inf },
    {'name': 'Parafovea', 'start': 0.75, 'end': 1 },
    {'name': 'Fovea', 'start': 0.175, 'end': 0.75 },
    {'name': 'Faz', 'start': 0.175, 'end': 0.25},
    {'name': 'Foveola', 'start': 0, 'end': 0.175},
    
]

integration_data = {}

for sd in subjects_data: 
    integration_data[sd.name] = {}

eccs_in_MM = eccs * MM_PER_DEGREE

In [None]:
#prints the retinal thresholds in deg

for region in retinal_regions:
    print(f"Region: {region['name']}, Start: {region['start']/MM_PER_DEGREE} deg, End: {region['end']/MM_PER_DEGREE} deg")

##### Data Extraction

In [None]:
#Fovea


base_path = r"P:\AOSLO\_automation\_PROCESSED\Photoreceptors\Healthy\_Results"

fovea_data = extract_fovea_data(base_path)

In [None]:
#CD (Integration) data

for sd in subjects_data:
    sd_name = sd.name
    # Find the corresponding fovea data object
    # fovea_obj = next((f for f in foveal_data if f.subject == sd_name), None)
    for region in retinal_regions:
        if region['end'] >0:
            print (f"Integrating for subject {sd.name} in Region: {region['name']}, Start: {region['start']}, End: {region['end']}")
            if region['end'] == np.inf:
                print(f"changing radius to 3.00 for {region['name']}")
                radius = 3.0
            else:
                radius = region['end']
            (mean_int, std_int, min_int, max_int, cov_int, 
            int_results, max_x_results, 
            max_y_results, r_grid, 
            cumulative_integrations) = integrate_cone_density_circle(
                                                                    eccs_in_MM, np.array(sd.density_fit_X).reshape(1, -1), np.array(sd.density_fit_Y).reshape(1, -1),
                                                                    radius=radius , exclude_center=False
                                                                    )

            integration_data[sd_name][f"n_cones_at_{region['name']}"] = int_results
            integration_data[sd_name]["max_X"] = max_x_results
            integration_data[sd_name]["max_Y"] = max_y_results

##### Filling in extra data fields

In [None]:
# Calculate peak widths at 20% or more threshold for each subject

threshold_percents = [20]  # Customizable threshold
for threshold_percent in threshold_percents:
    for sd in subjects_data:
        sd_name = sd.name
        
        # Make sure the subject exists in integration_data
        if sd_name not in integration_data:
            integration_data[sd_name] = {}
        
        # Calculate and store peak width at 20% threshold
        x_values = np.array(sd.density_fit_X)
        y_values = np.array(sd.density_fit_Y)
        
        # Create position arrays
        x_positions = eccs_in_MM
        y_positions = eccs_in_MM
        
        # Use your existing peak calculation function (assuming it's called calculate_peak_width)
        width_x = calculate_peak_width(x_values, x_positions, threshold_percent=threshold_percent)
        width_y = calculate_peak_width(y_values, y_positions, threshold_percent=threshold_percent)
        
        # Store the width measurements in integration_data
        integration_data[sd_name][f"peak_width_X_{threshold_percent}p"] = width_x
        integration_data[sd_name][f"peak_width_Y_{threshold_percent}p"] = width_y

        # Store the peak positions if needed
        # integration_data[sd_name][f"peak_position_right_X_{threshold_percent}p"] = posrightx
        # integration_data[sd_name][f"peak_position_left_X_{threshold_percent}p"] = posleftx
        # integration_data[sd_name][f"peak_position_right_Y_{threshold_percent}p"] = posrighty
        # integration_data[sd_name][f"peak_position_left_Y_{threshold_percent}p"] = poslefty
        
        # # Store Alex's peak width calculations

        # integration_data[sd_name][f"peak_width_X_alex"] = sd.width_nas + sd.width_tem
        # integration_data[sd_name][f"peak_width_Y_alex"] = sd.width_sup + sd.width_inf

##### Diagnostics

In [None]:
# Create a scatter plot to compare peak widths for the x and y directions
# They should be teh same, as they are inferring the CD for the same point
# at x=0 an d y= 0
plt.figure(figsize=(10, 6))

x_data = []
y_data = []
labels = []

for sd in subjects_data:
    subject_name = sd.name
    if subject_name in integration_data:

        x_max = np.nanmax(sd.density_fit_X)
        
        # Get width from integration_data
        # width_integration = integration_data[subject_name]['peak_width_Y_20p']
        y_max = np.nanmax(sd.density_fit_Y)
        
        
        x_data.append(x_max)
        y_data.append(y_max)
        labels.append(subject_name)

# Create scatter plot
plt.scatter(x_data, y_data, alpha=0.7, s=80)

# Add labels to each point
for i, label in enumerate(labels):
    plt.annotate(label.replace('Subject', ''), (x_data[i], y_data[i]), 
                 fontsize=8, alpha=0.7, 
                 xytext=(5, 5), textcoords='offset points')

# Add regression line
if len(x_data) > 1:
    z = np.polyfit(x_data, y_data, 1)
    p = np.poly1d(z)
    x_line = np.linspace(min(x_data), max(x_data), 100)
    plt.plot(x_line, p(x_line), 'r--', alpha=0.8)
    
    # Calculate correlation and add to plot
    r, p_val = scipy.stats.pearsonr(x_data, y_data)
    plt.text(0.05, 0.95, f'r = {r:.2f}, p = {p_val:.4f}\ny = {z[0]:.2f}x + {z[1]:.2f}', 
             transform=plt.gca().transAxes, fontsize=10, 
             verticalalignment='top', bbox=dict(facecolor='white', alpha=0.5))

plt.xlabel('Peak CD on x axis (cones/mm²)')
plt.ylabel('Peak CD on y axis (cones/mm²)')
plt.title('Comparison of peak CD measurmeents in x and y directions')
plt.grid(alpha=0.3)
plt.tight_layout()
plt.show()

##### Analysis

In [None]:
print("Starting analysis with:")
print(f"- {len(fovea_data)} fovea parameter objects")
print(f"- {len(integration_data)} subjects in integration data dictionary")

# Calculate correlation matrix
corr_matrix, merged_data, annot = create_correlation_matrix(fovea_data, integration_data)

if corr_matrix is not None:
    # Plot full correlation matrix
    plot_correlation_matrix(corr_matrix, annot=annot, title = "Full Correlation Matrix")
    
    # For a focused view, plot correlations between fovea params and regions only
    print("\nCreating focused correlation matrix (fovea params vs. region metrics)...")
    fovea_params = ['A00', 'A10', 'A01', 'A20', 'A02', 'A11', 'foveal_depth', 
                    'foveal_center_X', 'foveal_width_X', 'foveal_center_Y', 'foveal_width_Y', 
                    'foveal_max_slope', 'foveal_flatness', 'foveal_volume']
    region_metrics = ['n_cones_at_Perifovea', 'n_cones_at_Parafovea', 'n_cones_at_Fovea', 'n_cones_at_Foveola', 'max_cone_density_X', 'max_cone_density_Y']
    
    # Filter for only valid columns that exist in the data
    valid_fovea_params = [p for p in fovea_params if p in corr_matrix.index]
    valid_region_metrics = [r for r in region_metrics if r in corr_matrix.columns]
    
    print(f"Using {len(valid_fovea_params)} fovea parameters and {len(valid_region_metrics)} region metrics")
    
    # Get only the correlations between parameters and regions

    
    focused_corr = corr_matrix.loc[valid_fovea_params, valid_region_metrics]
    focused_annot = annot.loc[valid_fovea_params, valid_region_metrics]


    plot_correlation_matrix(focused_corr, focused_annot, "Fovea Parameters vs Region Metrics")
    
    # Print descriptive statistics for the merged data
    print("\nDescriptive Statistics:")
    print(merged_data[valid_fovea_params + valid_region_metrics].describe())
    
    print("\nAnalysis complete!")
else:
    print("\nERROR: Could not create correlation matrix due to no matching data.")
    print("Please check that subject identifiers match between the two datasets.")