In [None]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import matplotlib.pyplot as plt

plt.style.use('ggplot')  # Use ggplot style for all plots
plt.rcParams['figure.figsize'] = (10, 6)  # Default figure size
plt.rcParams['figure.dpi'] = 300  # Default figure dpi
plt.rcParams['font.size'] = 12  # Default font size
plt.rcParams['lines.linewidth'] = 2  # Default line width
plt.rcParams['axes.labelsize'] = 14  # Default label size
plt.rcParams['axes.titlesize'] = 16  # Default title size
plt.rcParams['xtick.labelsize'] = 12  # Default x-tick label size
plt.rcParams['ytick.labelsize'] = 12  # Default y-tick label size
plt.rcParams['legend.fontsize'] = 12  # Default legend font size
plt.rcParams['figure.titlesize'] = 18  # Default figure title size

## Gathering subjects' data

data structure to store subject data; feel free to add more fields if needed

In [None]:
from dataclasses import dataclass
import pandas as pd
import numpy as np

@dataclass
class SubjectData:
    name: str = None
    pid: str = None
    nb: int = None
    session: str = None

    width_nas: float = None
    width_tem: float = None
    width_inf: float = None
    width_sup: float = None
    max_slope_nas: float = None
    max_slope_tem: float = None
    max_slope_inf: float = None
    max_slope_sup: float = None

    oct_bump_X: float = None
    oct_bump_Y: float = None
    oct_width_X: float = None
    oct_width_Y: float = None
    oct_max_slope: float = None
    oct_depth: float = None
    oct_flatness: float = None

    age: float = None
    axial_length: float = None
    spherical_equiv: float = None
    sex: int = None

    eccs: np.ndarray = None
    density_X: pd.Series = None
    density_Y: pd.Series = None
    density_fit_X: pd.Series = None
    density_fit_Y: pd.Series = None

    cvi_X: pd.Series = None
    cvi_Y: pd.Series = None
    gcl_ipl_X: pd.Series = None
    gcl_ipl_Y: pd.Series = None
    onl_X: pd.Series = None
    onl_Y: pd.Series = None
    inl_opl_X: pd.Series = None
    inl_opl_Y: pd.Series = None
    rnfl_X: pd.Series = None
    rnfl_Y: pd.Series = None
    chrd_X: pd.Series = None
    chrd_Y: pd.Series = None
    pr_rpe_X: pd.Series = None
    pr_rpe_Y: pd.Series = None
    os_X: pd.Series = None
    os_Y: pd.Series = None

    nb_cones: float = None
    nb_cones_fit: float = None

    width_gcl_X: float = None
    width_gcl_Y: float = None
    min_thick_gcl: float = None

Gian Notes 

ONL we have mostly Muller Cells 
More cones present -> Superior in the PhotoR: the cones are standing right up, the shape might be thicker because of the infrastructure change
Whz would the INL be more thick at the Superior and thinner at the Inferior 

populate the data structure by gathering data from different sources

In [None]:

# here to avoid having to rerun the pipeline for -
# all subjects everytime i want to test something on the model.
 
# Since the list of subjects is ordered by strings , it goes from 10 to 100 to 103 etc...
# which requires a bit of work to get the first 5 subjects

#It will later be used to extract the first 5 subjects from the list of subject_data

take_first_five = False
first_five_subjects = ["Subject10","Subject100","Subject101","Subject104","Subject105"]

In [None]:
from pathlib import Path
from typing import List, Tuple, Dict
import sys

sys.path.append(str(Path.cwd().parent))
from src.cell.analysis.constants import MM_PER_DEGREE
from src.cell.layer.helpers import gaussian_filter_nan
from src.configs.parser import Parser


In [None]:

Parser.initialize()

sheet = pd.ExcelFile(r'V:\Studies\AOSLO\data\cohorts\AOSLO healthy\DATA_HC+DM.xlsx').parse('Healthy', header=0, nrows=45, index_col=0)
sheet.index = sheet.index.map(lambda x: f'Subject{x}')
age_dict = ((sheet['Date of visit'] - sheet['DDN']).dt.days / 365).to_dict()
axial_dict = sheet['AL D (mm)'].where(sheet['Laterality'] == 'OD', sheet['AL G (mm)']).to_dict()
spherical_dict = sheet['Equi Sph D'].where(sheet['Laterality'] == 'OD', sheet['Equi Sph G']).to_dict()
sex_dict = sheet['Sexe'].map(lambda x: 1 if x == 'F' else 0).to_dict()

base_path = Path(r'P:\AOSLO\_automation\_PROCESSED\Photoreceptors\Healthy\_Results')

# look-up table for subject and session numbers
subjects_sessions = [[int(n) for n in s.strip().split()] for s in open(r'P:\AOSLO\_automation\_PROCESSED\Photoreceptors\Healthy\processed.txt').readlines()] 

# subject for which OCTs are tilted (white dot is not well aligned with PR+RPE peak)
# see explanation in `PRxRLT_expmanual.ipynb`
oct_to_exclude = {
    13, 18, 20, 25, 26, 30, 35, 42, 46, 66, 100, 105,
} 


subjects_data: List[SubjectData] = []
for subject_n, session_n in subjects_sessions:
    if subject_n in oct_to_exclude:
        continue

    sd = SubjectData()
    sd.name = f'Subject{subject_n}'
    sd.pid = f'AOHC_{subject_n}'
    sd.nb = subject_n
    sd.session = f'Session{session_n}'

    #
    path = base_path / sd.name / sd.session
    print(f'Loading {sd.name} {sd.session}...')

    # record subject's metadata from the excel sheet
    sd.age = age_dict[sd.name]
    sd.axial_length = axial_dict[sd.name]
    sd.spherical_equiv = spherical_dict[sd.name]
    sd.sex = sex_dict[sd.name]

    # record foveal shape parameters (populated by `src/save_layer_features.ipynb`)
    df_oct = pd.read_csv(path / Parser.get_layer_thickness_dir() / 'fovea_3d_fitted_params.csv', sep=';', index_col=0)
    sd.oct_bump_X = df_oct.loc['A20', 'params']
    sd.oct_bump_Y = df_oct.loc['A02', 'params']
    sd.oct_width_X = df_oct.loc['width_X', 'params'] * np.sqrt(2 * 2.8) / MM_PER_DEGREE # in °
    sd.oct_width_Y = df_oct.loc['width_Y', 'params'] * np.sqrt(2 * 2.8) / MM_PER_DEGREE # in °
    sd.oct_max_slope = df_oct.loc['max_slope', 'params']
    sd.oct_depth = df_oct.loc['depth', 'params'] # in mm
    sd.oct_flatness = df_oct.loc['flatness', 'params']
    # sd.oct_volume = df_oct.loc['volume', 'params']

    # record cone density and fitted parameters (populated by `src/cell/analysis/density_analysis_pipeline_manager.py`)
    df_density = pd.read_csv(path / Parser.get_density_analysis_dir() / 'densities_test2907.csv', sep=';', index_col=0)
    df_raw_density_x = pd.read_csv(path / Parser.get_density_analysis_dir() / 'densities_raw_x.csv', sep=';', index_col=0)
    df_raw_density_y = pd.read_csv(path / Parser.get_density_analysis_dir() / 'densities_raw_y.csv', sep=';', index_col=0)
    
    sd.width_nas = df_density['width_nasal'].iloc[0]
    sd.width_tem = df_density['width_temporal'].iloc[0]
    sd.width_inf = df_density['width_inferior'].iloc[0]
    sd.width_sup = df_density['width_superior'].iloc[0]
    sd.max_slope_nas = df_density['max_slope_nasal'].iloc[0]
    sd.max_slope_tem = df_density['max_slope_temporal'].iloc[0]
    sd.max_slope_inf = df_density['max_slope_inferior'].iloc[0]
    sd.max_slope_sup = df_density['max_slope_superior'].iloc[0]
    sd.density_X = df_density['dens_smthd_X']
    sd.density_Y = df_density['dens_smthd_Y']
    sd.density_fit_X = df_density['dens_fit_X']
    sd.density_fit_Y = df_density['dens_fit_Y']
    
    sd.eccs = df_density.index.to_numpy()

    # record layer thicknesses (populated by `src/save_layer_features.ipynb`)
    df_thick = pd.read_csv(path / Parser.get_density_analysis_dir() / 'results.csv', sep=',', index_col=0, skiprows=1).query('-10 <= index <= 10')
    sd.cvi_X = df_thick['CVI_X']
    sd.cvi_Y = df_thick['CVI_Y']
    sd.gcl_ipl_X = df_thick['GCL+IPL_X']
    sd.gcl_ipl_Y = df_thick['GCL+IPL_Y']
    sd.onl_X = df_thick['ONL_X']
    sd.onl_Y = df_thick['ONL_Y']
    sd.inl_opl_X = df_thick['INL+OPL_X']
    sd.inl_opl_Y = df_thick['INL+OPL_Y']
    sd.rnfl_X = df_thick['RNFL_X']
    sd.rnfl_Y = df_thick['RNFL_Y']
    sd.chrd_X = df_thick['Choroid_X']
    sd.chrd_Y = df_thick['Choroid_Y']
    sd.pr_rpe_X = df_thick['PhotoR+RPE_X']
    sd.pr_rpe_Y = df_thick['PhotoR+RPE_Y']
    sd.os_X = df_thick['OS_X']
    sd.os_Y = df_thick['OS_Y']

    subjects_data.append(sd)

## Diagnostics / Exploratory Data Analysis

The following cells are used to visualize the data just extracted, mainly in terms of densities and layer thicknesses, to serve as a first analysis and as debugging for the data extracted from the pipeline 

In [None]:
#plots density for a subject

from sympy import plot


def plot_density(subject_data: SubjectData):
    plt.figure()
    plt.plot(subject_data.eccs, subject_data.density_X, label='X')
    plt.plot(subject_data.eccs, subject_data.density_Y, label='Y')
    # plt.plot(subject_data.eccs, subject_data.density_fit_X, label='X fit')
    # plt.plot(subject_data.eccs, subject_data.density_fit_Y, label='Y fit')
    plt.xlabel('Eccentricity (°)')
    plt.ylabel('Density (cells/deg²)')
    plt.title(subject_data.name)
    plt.legend()
    plt.show()

plot_density(subjects_data[0])

#plots thickness for a subject

In [None]:
#plots density for a subject

from sympy import plot


def plot_density(subject_data: SubjectData):
    plt.figure()
    plt.plot(subject_data.eccs, subject_data.os_X, label='X')

    # plt.plot(subject_data.eccs, subject_data.density_fit_X, label='X fit')

    plt.xlabel('Eccentricity (°)')
    plt.ylabel('Density (cells/deg²)')
    plt.title(subject_data.name)
    plt.legend()
    plt.show()


#searches fo a 
plot_density(subjects_data[-2])

#plots thickness for a subject

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation

def animate_and_save_images(subjects_data):
    # Sort subjects by the peak density (largest value in density_X) in descending order,

    # using np.nanmax so that NaNs are ignored.
    subjects_sorted = sorted(subjects_data, key=lambda s: np.nanmax(s.os_X), reverse=True)
    
    # Compute global limits while filtering out NaNs.
    all_eccs = np.concatenate([s.eccs for s in subjects_sorted])
    all_eccs = all_eccs[~np.isnan(all_eccs)]
    
    all_density = np.concatenate([s.density_fit_X for s in subjects_sorted])
    all_density = all_density[~np.isnan(all_density)]
    
    all_os = np.concatenate([s.os_X for s in subjects_sorted])
    all_os = all_os[~np.isnan(all_os)]
    
    # X limits (eccentricity) based on all valid points.
    x_min, x_max = np.min(all_eccs), np.max(all_eccs)
    x_margin = (x_max - x_min) * 0.1
    
    # Y limits for density.
    y_density_min, y_density_max = np.min(all_density), np.max(all_density)
    y_density_margin = (y_density_max - y_density_min) * 0.1
    
    # Y limits for OS.
    y_os_min, y_os_max = np.min(all_os), np.max(all_os)
    y_os_margin = (y_os_max - y_os_min) * 0.1
    
    # Create output folder for images.
    output_folder = "subject_images"
    os.makedirs(output_folder, exist_ok=True)
    
    #counting the subject opsition for saving 
    subject_rank = 1
    # Create a figure with two subplots (side by side).
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 6))
    
    def update(frame):
        subject = subjects_sorted[frame]
        # Clear both axes.
        ax1.cla()
        ax2.cla()
        
        # --- Density plot ---
        valid_idx_density = ~np.isnan(subject.eccs) & ~np.isnan(subject.density_fit_X)
        eccs_density = subject.eccs[valid_idx_density]
        density = subject.density_fit_X[valid_idx_density]
        
        ax1.set_xlim(x_min - x_margin, x_max + x_margin)
        ax1.set_ylim(y_density_min - y_density_margin, y_density_max + y_density_margin)
        ax1.plot(eccs_density, density, label='Density', marker='o')
        ax1.set_xlabel("Eccentricity (°)")
        ax1.set_ylabel("Density (cells/deg²)")
        ax1.set_title(f"{subject.name} - Density")
        ax1.legend()
        
        # --- OS (thickness) plot ---
        valid_idx_os = ~np.isnan(subject.eccs) & ~np.isnan(subject.os_X)
        eccs_os = subject.eccs[valid_idx_os]
        os_vals = subject.os_X[valid_idx_os]
        
        ax2.set_xlim(x_min - x_margin, x_max + x_margin)
        ax2.set_ylim(y_os_min - y_os_margin, y_os_max + y_os_margin)
        ax2.plot(eccs_os, os_vals, label='OS Thickness', color='orange', marker='o')
        ax2.set_xlabel("Eccentricity (°)")
        ax2.set_ylabel("OS Thickness (µm)")
        ax2.set_title(f"{subject.name} - OS")
        ax2.legend()
        
        # Set a suptitle for the whole figure.
        fig.suptitle(f"Subject {frame+1}/{len(subjects_sorted)}: {subject.name}", fontsize=16)
        
        # Save the current figure to an image file using the subject's name.
        image_filename = os.path.join(output_folder, f"{frame}-{subject.name}.png")
        fig.savefig(image_filename)
    
    # Create the animation; adjust fps and interval as needed.
    ani = animation.FuncAnimation(fig, update, frames=len(subjects_sorted), interval=2000, repeat=True)
    
    # Save the animation as a GIF file (requires Pillow installed).
    ani.save("subjects_animation_os.gif", writer="pillow", fps=2)
    plt.show()

# Usage:
# animate_and_save_images(subjects_data)


In [None]:
import PIL
import svgpathtools


def debug_visualize_segmentation(oct_file, seg_file, dims, bscan_id):
    """
    Display the original B-scan and overlay the segmentation polygon from the SVG file.
    """
    # Load the OCT image
    oct_img = np.array(PIL.Image.open(oct_file))
    
    # Get the paths from the SVG (returns a list of path objects)
    svg_paths = svgpathtools.svg2paths(str(seg_file))
    svg_layers = svg_paths[0]  # if there is only one group of paths
    
    # Each path is made of line segments. We'll collect the points in a list to plot.
    x_coords, z_coords = [], []
    for line in svg_layers:
        start = line.start
        # Convert complex coordinate to real, imag
        # dims['x'] is horizontal, dims['z'] is vertical in your code
        # So, line.start.real -> x, line.start.imag -> z
        x_coords.append(start.real)
        z_coords.append(start.imag)
    
    # Display
    fig, ax = plt.subplots(figsize=(10, 6))
    ax.imshow(oct_img, cmap='gray', origin='upper')
    ax.scatter(x_coords, z_coords, s=5, c='red', marker='o')
    ax.set_title(f"B-scan {bscan_id} with raw SVG points overlay")
    ax.invert_yaxis()  # if needed, depending on coordinate system
    plt.show()


In [None]:
print(subjects_data[0].density_X)

In [None]:

from sklearn.metrics import r2_score

def goodness_of_fit(subject_data: SubjectData, min_ecc, max_ecc):
    """
    Computes R² for model fit within a specified eccentricity range.
    
    Parameters:
        subject_data: An object containing `eccs`, `density_X`, `density_fit_X`, `density_Y`, and `density_fit_Y`.
        min_ecc: Minimum eccentricity for filtering.
        max_ecc: Maximum eccentricity for filtering.
    
    Returns:
        r2_X: R² for X densities within the eccentricity range.
        r2_Y: R² for Y densities within the eccentricity range.
    """
    
    # Apply mask and remove NaNs
    valid_mask_X =  ~np.isnan(subject_data.density_X) & ~np.isnan(subject_data.density_fit_X)
    valid_mask_Y =  ~np.isnan(subject_data.density_Y) & ~np.isnan(subject_data.density_fit_Y)

    density_X_valid = subject_data.density_X[valid_mask_X]
    density_fit_X_valid = subject_data.density_fit_X[valid_mask_X]

    density_Y_valid = subject_data.density_Y[valid_mask_Y]
    density_fit_Y_valid = subject_data.density_fit_Y[valid_mask_Y]

    # Compute R² only if we have at least two valid values
    r2_X = r2_score(density_X_valid, density_fit_X_valid) if len(density_X_valid) > 1 else np.nan
    r2_Y = r2_score(density_Y_valid, density_fit_Y_valid) if len(density_Y_valid) > 1 else np.nan

    return r2_X, r2_Y

# Example usage for all subjects with an eccentricity range of 0 to 10
min_ecc, max_ecc = 0, 10
r2s = [goodness_of_fit(sd, min_ecc, max_ecc) for sd in subjects_data]

print(r2s)



In [None]:
#get the mean of the r2s in probably the least efficient way possible
#i swear i don't usually code like this

mean_r2_X, mean_r2_Y = (np.mean([r2[0] for r2 in r2s]), np.mean([r2[1] for r2 in r2s]))

print(mean_r2_X, mean_r2_Y)

### additional fields based on the previously gathered data

In [None]:
def get_nb_cones(ecc: np.ndarray, dens_X: pd.Series, dens_Y: pd.Series, radius: float, smoothen: bool = True) -> float:
    
    '''
    Given the cone density profiles along the X and Y axes, compute the total number of cones within a disk of radius `radius` (in degree) centered at the fovea by linearly interpolating (radially) the density profiles and integrating over the disk.
    '''
    smthd_x = gaussian_filter_nan(dens_X, sigma=4) if smoothen else dens_X.to_numpy()
    smthd_y = gaussian_filter_nan(dens_Y, sigma=4) if smoothen else dens_Y.to_numpy()
   
    x_amax = np.nanargmax(smthd_x)
    p = np.polyfit(ecc[x_amax-2:x_amax+3], smthd_x[x_amax-2:x_amax+3], 2)
    x_amax = -p[1] / (2 * p[0])

    y_amax = np.nanargmax(smthd_y)
    p = np.polyfit(ecc[y_amax-2:y_amax+3], smthd_y[y_amax-2:y_amax+3], 2)
    y_amax = -p[1] / (2 * p[0])

    R = np.linspace(0.0001, radius, 500) # radius in degrees
    disk = np.r_[
        np.interp(x_amax + R, ecc, smthd_x),
        np.interp(x_amax - R, ecc, smthd_x),
        np.interp(y_amax + R, ecc, smthd_y),
        np.interp(y_amax - R, ecc, smthd_y)
    ]
    
    norm_coef = MM_PER_DEGREE**2 * 2 * np.pi
    # integrate cone density over disk to get total nb of cones
    return norm_coef * np.trapz(np.nanmean(disk, axis=0) * R, R)

RADIUS = 3 # degree
for sd in subjects_data:
    sd.nb_cones = get_nb_cones(sd.eccs, sd.density_X, sd.density_Y, radius = RADIUS)
    sd.nb_cones_fit = get_nb_cones(sd.eccs, sd.density_fit_X, sd.density_fit_Y, radius = RADIUS, smoothen=False)

In [None]:
from scipy.signal import find_peaks

def adjust_flat(gcl_data: np.ndarray, peak_left: int, peak_right: int) -> np.ndarray:
    slope = (gcl_data[peak_right] - gcl_data[peak_left]) / (peak_right - peak_left)
    transformed_gcl = gcl_data - slope * (np.arange(len(gcl_data)) - peak_left)
    return transformed_gcl

def get_gcl_width(gcl: pd.Series) -> Tuple[float, float]:
    '''
    Given the GCL+IPL thickness profile, compute the width of the pit as well as the minimum thickness of the layer. 
    Here, the width of the pit is defined as the distance between the two points where the thickness is 20% of the depth of the pit. The depth of the pit is defined as the difference between the thickness surrounding the pit and the thickness at the pit's bottom.
    '''
    # name = gcl.name
    gcl_to_plot = gcl.copy()
    eccs = gcl[np.abs(gcl.index) <= 6].index.to_numpy()
    gcl = gcl.interpolate(method='polynomial', order=1)[eccs].to_numpy()
    # plt.plot(eccs, gcl, label=name)
    smooth_param = 3
    peak_left = peak_right = []
    while not (len(peak_left) >= 1 and len(peak_right) >= 1) and smooth_param < 10:
        smoothed_gcl = gaussian_filter_nan(gcl, smooth_param)
        peaks = find_peaks(smoothed_gcl)[0]
        peak_left  = [peak for peak in peaks if peak < len(smoothed_gcl) / 3]
        peak_right = [peak for peak in peaks if peak > 2 * len(smoothed_gcl) / 3]
        smooth_param += 1
    assert len(peak_left) >= 1 and len(peak_right) >= 1, f'No peaks found for {gcl.name}'
    peak_left = round(np.mean(peak_left))   
    peak_right = round(np.mean(peak_right))
    adjusted_gcl = adjust_flat(gcl, peak_left, peak_right)
    smoothed_aj_gcl = gaussian_filter_nan(adjusted_gcl, 2)

    y_min = np.nanmin(smoothed_aj_gcl[peak_left:peak_right])
    y_target = y_min + (smoothed_aj_gcl[peak_left] - y_min) / 5
    intercepts = np.where(np.diff(np.sign(smoothed_aj_gcl - y_target)))[0]
    leftmost = eccs[intercepts[0]]
    rightmost = eccs[intercepts[-1]+1]
    width_pit_gcl = rightmost - leftmost

    indicies = np.argpartition(gcl, 10)[:10]
    p = np.polyfit(eccs[indicies], gcl[indicies], 2)
    if p[0] == 0:
    #     # gcl_to_plot.plot()
    #     plt.plot(eccs, gcl, label='gcl')
        plt.plot(np.sort(eccs[indicies]), np.polyval(p, np.sort(eccs[indicies])), '--')
    min_thickness_gcl = np.polyval(p, -p[1] / (2 * p[0]))
    return width_pit_gcl, min_thickness_gcl

for sd in subjects_data:
    width_gcl_x, min_thick_x = get_gcl_width(sd.gcl_ipl_X)
    width_gcl_y, min_thick_y = get_gcl_width(sd.gcl_ipl_Y)
    sd.width_gcl_X = width_gcl_x
    sd.width_gcl_Y = width_gcl_y
    sd.min_thick_gcl = min(min_thick_x, min_thick_y)
    # print(f'{sd.name:>10}: {width_gcl_x:.2f}°, {depth_gcl_x:.4f}, {width_gcl_y:.2f}°, {depth_gcl_y:.4f}')
    # plt.xlim(-6, 6)
    # plt.legend()
    # plt.title(sd.name)
    # plt.show()

In [None]:
#selecting the first 5 subjects from the list of subjects_data

if take_first_five:
    first_five_subjects_data = [sd for sd in subjects_data if sd.name in first_five_subjects]

    #storing the old subjects_data in a new variable to avoid overwriting the old one
    old_subjects_data = subjects_data

    #reassigning the subjects_data variable to the first_five_subjects_data
    subjects_data = first_five_subjects_data

    # keep in mind that now we have only five subjects in the subjects_data list, which is gonna 
    # mess with the statistics calculations later on

## Data analysis

In [None]:
import seaborn as sns
import scipy.stats

df = pd.DataFrame({k: [getattr(sd, k) for sd in subjects_data] for k,t in SubjectData.__annotations__.items() if t in (int, float)}, index=[sd.name for sd in subjects_data])

def corr_sig(df: pd.DataFrame, drop=['nb']) -> Tuple[np.ndarray, np.ndarray]:
    cols = df.columns.drop(drop).to_list()
    corr_matrix = np.zeros(shape=(len(cols), len(cols)))
    p_matrix = np.ones_like(corr_matrix)
    for col in cols:
        for col2 in cols:
            corr , p = scipy.stats.pearsonr(df[col],df[col2])
            corr_matrix[cols.index(col),cols.index(col2)] = corr
            p_matrix[cols.index(col),cols.index(col2)] = p
    return corr_matrix, p_matrix

corr, pv = corr_sig(df)
sig_mask = pv < 0.05
plt.figure(figsize=(14, 10), dpi=400)
# sns.heatmap(corr, mask=~sig_mask, annot=True, cmap='coolwarm', annot_kws={"fontsize":8}, xticklabels=df.columns, yticklabels=df.columns)
sns.heatmap(df.corr(), annot=True, cmap='coolwarm', annot_kws={"fontsize":8})
plt.show()

In [None]:
eccs = subjects_data[0].eccs
layer_names = ['rnfl', 'gcl_ipl', 'inl_opl', 'onl', 'pr_rpe', 'os', 'chrd']
names_r = {'rnfl': 'RNFL', 'gcl_ipl': 'GCL+IPL', 'inl_opl': 'INL+OPL', 'onl': 'ONL', 'pr_rpe': 'PhotoR+RPE', 'os': 'OS', 'chrd': 'Choroid', 'cones': 'Cone Density'}

In [None]:
from src.shared.helpers.direction import Direction


def preprocess_functional_feature(data: np.ndarray, standardization: str = 'inter') -> np.ndarray:
    '''
    Preprocess a functional feature (functional feature such as cone density or layer thickness, for which the feature is a function of eccentricity) by (Z-)standardizing it.
    Given `data` matrix should have shape (n_subjects, n_eccentricities).
    
    - For an intra-indivual analysis, use `standardization='intra'` to standardize within subjects (i.e. within each row). This removes inter-subject variability.
    - For an inter-individual analysis, use `standardization='inter'` to standardize across subjects, eccentricity-wise (i.e. within each column). Removes eccentricity-level variability, focuses on between-patient trends
    '''
    if standardization == 'inter':
        mean = np.mean(data, axis=0, keepdims=True)
        std = np.std(data, axis=0, keepdims=True)
        return (data - mean) / std
    if standardization == 'intra':
        mean = np.nanmean(data, axis=1, keepdims=True)
        std = np.nanstd(data, axis=1, keepdims=True)
        return (data - mean) / std
    return data

def preprocess_functional_data(direction: Direction, standardization: str = 'inter', toLog : bool = True) -> Dict[str, np.ndarray]:
    '''
    Preprocess functional data (e.g. cone density, layer thicknesses) for a given direction (X or Y) by (Z-)standardizing it.
    '''

    layer_fds = {
        layer: preprocess_functional_feature(
            np.array([getattr(s, f'{layer}_{direction.value}') for s in subjects_data]), standardization
        )
        for layer in layer_names
    }
    if toLog:
        cone_density_fd = preprocess_functional_feature(
            np.array([np.log(getattr(s, f'density_fit_{direction.value}')) for s in subjects_data]), standardization
        )
    else:
        cone_density_fd = preprocess_functional_feature(
            np.array([(getattr(s, f'density_fit_{direction.value}')) for s in subjects_data]), standardization
        )

    cone_density_nonfit = preprocess_functional_feature(
            np.array([(getattr(s, f'density_{direction.value}')) for s in subjects_data]), standardization
        )
    # return {'cones': cone_density_fd, "nonfit": cone_density_nonfit, **layer_fds}
    return {'cones': cone_density_fd, "nonfit": cone_density_nonfit, **layer_fds}

### cone density specific analysis

In [None]:
from scipy.stats import kendalltau, pearsonr, spearmanr
import seaborn as sns


def kendall_pval(x,y):
    return kendalltau(x,y)[1]

def pearsonr_pval(x,y):
    return pearsonr(x,y)[1]

def spearmanr_pval(x,y):
    return spearmanr(x,y, nan_policy = "omit")[1]


In [None]:
0.630 /MM_PER_DEGREE

In [None]:
2.070 / MM_PER_DEGREE

In [None]:
for s in subjects_data:
    print(s.name)

In [None]:

for direction in Direction:
    cone_density_fd = preprocess_functional_data(direction, standardization='none', toLog=False)['nonfit']

    def get_cd_on_range(left, right):
        range_eccs = np.argwhere((left <= eccs) & (eccs <= right)).flatten()
        return np.mean(cone_density_fd[:, range_eccs], axis=1)
    
    def plot_subjects_data(df):
        """
        Plots the cone density values for each subject from the given DataFrame.
        
        Parameters:
        - df: pandas DataFrame where each row corresponds to a subject, 
            and each column represents a specific eccentricity range.
        """
        plt.figure(figsize=(12, 6))
        
        # Get the x-axis labels (eccentricity ranges)
        x_labels = df.columns
        x_values = np.arange(len(x_labels))  # Numerical representation for plotting

        # Plot each subject's data in a different color
        for subject_id in df.index:
            plt.plot(x_values, df.loc[subject_id], marker='o', linestyle='-', label=f"Subject {subject_id}")

        # Formatting the plot
        plt.xticks(ticks=x_values, labels=x_labels, rotation=90)  # Rotate x-axis labels for readability
        plt.xlabel("Eccentricity Range (°)")
        plt.ylabel("Cone Density")
        plt.title("Cone Density Across Eccentricity Ranges for Each Subject")
        plt.legend()
        plt.grid(True, linestyle='--', alpha=0.6)

        # Show the plot
        plt.show()

    pids = np.array([s.nb for s in subjects_data])
    # Define the start and end of the range

    start_val = -10
    end_val = 10   # This will create bins up to 10.0° (last bin: 9.9° to 10.0°)
    step = 2

    # Generate bin edges from start_val to end_val (excluding the final edge)
    bin_starts = np.arange(start_val, end_val, step)

    # Create a dictionary with column names and corresponding data from get_cd_on_range
    data = {
        f"{bin_start:.1f}° to {bin_start + step:.1f}°": get_cd_on_range(bin_start, bin_start + step)
        for bin_start in bin_starts
    }

    # Create the DataFrame with your patient IDs (pids) as the index
    _df = pd.DataFrame(data, index=pids)

    # Optionally, sort the DataFrame by one of the bins (e.g., the bin covering 0.0° to 0.1°)
    print (_df.keys())
    _df = _df.sort_values(by="-2.0° to 0.0°")


    sns.set_theme(font_scale=0.8)
    sns.heatmap(_df.corr(method = "pearson"), annot=True, fmt=".2f", cmap='coolwarm', square=True, center=0, annot_kws={"size": 5}, cbar_kws={"shrink": 0.8})
    plt.title(f'Correlation matrix of Cone Density (Not fitted) at different eccentricities, {direction.value}-axis', fontsize=14)
    plt.show()


    # Call the function to plot the data
    plot_subjects_data(_df)


In [None]:
import seaborn as sns

for direction in Direction:
    cone_density_fd = preprocess_functional_data(direction, standardization='none', toLog=False)['cones']

    def get_cd_on_range(left, right):
        range_eccs = np.argwhere((left <= eccs) & (eccs <= right)).flatten()
        return np.mean(cone_density_fd[:, range_eccs], axis=1)
    
    def plot_subjects_data(df):
        """
        Plots the cone density values for each subject from the given DataFrame.
        
        Parameters:
        - df: pandas DataFrame where each row corresponds to a subject, 
            and each column represents a specific eccentricity range.
        """
        plt.figure(figsize=(12, 6))
        
        # Get the x-axis labels (eccentricity ranges)
        x_labels = df.columns
        x_values = np.arange(len(x_labels))  # Numerical representation for plotting

        # Plot each subject's data in a different color
        for subject_id in df.index:
            plt.plot(x_values, df.loc[subject_id], marker='o', linestyle='-', label=f"Subject {subject_id}")

        # Formatting the plot
        plt.xticks(ticks=x_values, labels=x_labels, rotation=90)  # Rotate x-axis labels for readability
        plt.xlabel("Eccentricity Range (°)")
        plt.ylabel("Cone Density")
        plt.title("Cone Density Across Eccentricity Ranges for Each Subject")
        plt.legend()
        plt.grid(True, linestyle='--', alpha=0.6)

        # Show the plot
        plt.show()

    pids = np.array([s.nb for s in subjects_data])
    _df = pd.DataFrame({
        '-10° to -6°': get_cd_on_range(-10, -6),
        '-6° to -3°': get_cd_on_range(-6, -3),
        '-3° to -0.5°': get_cd_on_range(-3, -0.5),
        '-0.5° to 0.5°': get_cd_on_range(-0.5, 0.5),
        '0.5° to 3°': get_cd_on_range(1, 3),
        '3° to 6°': get_cd_on_range(3, 6),
        '6° to 10°': get_cd_on_range(6, 10),
    }, index=pids).sort_values(by='-0.5° to 0.5°')

    sns.set_theme(font_scale=0.8)
    sns.heatmap(_df.corr(method = "spearman"), annot=True, fmt=".2f", cmap='coolwarm', square=True, center=0)
    plt.title(f'Correlation matrix of Cone Density at different eccentricities, {direction.value}-axis', fontsize=14)
    plt.show()

    # sns.set_theme(font_scale=0.8)
    # sns.heatmap(_df.corr(method = spearmanr_pval), annot=True, fmt=".6f", cmap='coolwarm', square=True, center=0)
    # plt.title(f'Correlation matrix of Cone Density at different eccentricities, {direction.value}-axis', fontsize=14)
    # plt.show()

    # Call the function to plot the data
    plot_subjects_data(_df)


In [None]:

cone_density_fd_X = preprocess_functional_data(Direction.X, standardization='none', toLog = False)['cones']
cone_density_fd_Y = preprocess_functional_data(Direction.Y, standardization='none', toLog = False)['cones']

def get_cd_on_range(left, right, direction: Direction):
    range_eccs = np.argwhere((left <= eccs) & (eccs <= right)).flatten()
    return np.mean((cone_density_fd_X if direction == Direction.X else cone_density_fd_Y)[:, range_eccs], axis=1)

pids = np.array([s.nb for s in subjects_data])
_df = pd.DataFrame({
    'X-axis,\n-10° to -6°': get_cd_on_range(-10, -6, Direction.X),
    'X-axis,\n-6° to -3°': get_cd_on_range(-6, -3, Direction.X),
    'X-axis,\n-3° to -0.5°': get_cd_on_range(-3, -0.5, Direction.X),
    'X-axis,\n-0.5° to 0.5°': get_cd_on_range(-0.5, 0.5, Direction.X),
    'X-axis,\n0.5° to 3°': get_cd_on_range(1, 3, Direction.X),
    'X-axis,\n3° to 6°': get_cd_on_range(3, 6, Direction.X),
    'X-axis,\n6° to 10°': get_cd_on_range(6, 10, Direction.X),
    'Y-axis,\n-10° to -6°': get_cd_on_range(-10, -6, Direction.Y),
    'Y-axis,\n-6° to -3°': get_cd_on_range(-6, -3, Direction.Y),
    'Y-axis,\n-3° to -0.5°': get_cd_on_range(-3, -0.5, Direction.Y),
    'Y-axis,\n-0.5° to 0.5°': get_cd_on_range(-0.5, 0.5, Direction.Y),
    'Y-axis,\n0.5° to 3°': get_cd_on_range(1, 3, Direction.Y),
    'Y-axis,\n3° to 6°': get_cd_on_range(3, 6, Direction.Y),
    'Y-axis,\n6° to 10°': get_cd_on_range(6, 10, Direction.Y),
    'X-axis, \ncone density ratio 4um/fovea': (get_cd_on_range((0.4/(MM_PER_DEGREE)),(0.5/(MM_PER_DEGREE)), Direction.X)/get_cd_on_range((0), (0.100), Direction.X)),
    'X-axis, \ncone density ratio 2070um/630um microm': (get_cd_on_range((2.070/(MM_PER_DEGREE)),(2.100/(MM_PER_DEGREE)), Direction.X)/get_cd_on_range((0.630/(MM_PER_DEGREE)), (0.650/(MM_PER_DEGREE)), Direction.X)),
    'X-axis, \n630um': get_cd_on_range((0.630/(MM_PER_DEGREE)), (0.650/(MM_PER_DEGREE)), Direction.X),


}, index=pids)#.sort_values(by='-0.5° to 0.5°')

sns.set_theme(font_scale=0.6)
sns.heatmap(_df.corr(method='pearson'), annot=True, fmt=".2f", cmap='coolwarm', square=True, center=0)
plt.title(f'Correlation matrix of Cone Density at different eccentricities.', fontsize=14)
plt.show()

In [None]:
import seaborn as sns

for direction in Direction:
    cone_density_fd = preprocess_functional_data(direction, standardization='none', toLog=False)['cones']

    def get_cd_on_range(left, right):
        range_eccs = np.argwhere((left <= eccs) & (eccs <= right)).flatten()
        return np.mean(cone_density_fd[:, range_eccs], axis=1)

    pids = np.array([s.nb for s in subjects_data])
    _df = pd.DataFrame({
        '-10° to -6°': get_cd_on_range(-10, -6),
        '-6° to -3°': get_cd_on_range(-6, -3),
        '-3° to -0.5°': get_cd_on_range(-3, -0.5),
        '-0.5° to 0.5°': get_cd_on_range(-0.5, 0.5),
        '0.5° to 3°': get_cd_on_range(1, 3),
        '3° to 6°': get_cd_on_range(3, 6),
        '6° to 10°': get_cd_on_range(6, 10),
    }, index=pids).sort_values(by='-0.5° to 0.5°')

    sns.set_theme(font_scale=0.8)
    sns.heatmap(_df.corr(), annot=True, fmt=".2f", cmap='coolwarm', square=True, center=0)
    plt.title(f'Correlation matrix of Cone Density at different eccentricities, {direction.value}-axis', fontsize=14)
    plt.show()

cone_density_fd_X = preprocess_functional_data(Direction.X, standardization='none', toLog = False)['cones']
cone_density_fd_Y = preprocess_functional_data(Direction.Y, standardization='none', toLog = False)['cones']

def get_cd_on_range(left, right, direction: Direction):
    range_eccs = np.argwhere((left <= eccs) & (eccs <= right)).flatten()
    return np.mean((cone_density_fd_X if direction == Direction.X else cone_density_fd_Y)[:, range_eccs], axis=1)

pids = np.array([s.nb for s in subjects_data])
_df = pd.DataFrame({
    'X-axis,\n-10° to -6°': get_cd_on_range(-10, -6, Direction.X),
    'X-axis,\n-6° to -3°': get_cd_on_range(-6, -3, Direction.X),
    'X-axis,\n-3° to -0.5°': get_cd_on_range(-3, -0.5, Direction.X),
    'X-axis,\n-0.5° to 0.5°': get_cd_on_range(-0.5, 0.5, Direction.X),
    'X-axis,\n0.5° to 3°': get_cd_on_range(1, 3, Direction.X),
    'X-axis,\n3° to 6°': get_cd_on_range(3, 6, Direction.X),
    'X-axis,\n6° to 10°': get_cd_on_range(6, 10, Direction.X),
    'Y-axis,\n-10° to -6°': get_cd_on_range(-10, -6, Direction.Y),
    'Y-axis,\n-6° to -3°': get_cd_on_range(-6, -3, Direction.Y),
    'Y-axis,\n-3° to -0.5°': get_cd_on_range(-3, -0.5, Direction.Y),
    'Y-axis,\n-0.5° to 0.5°': get_cd_on_range(-0.5, 0.5, Direction.Y),
    'Y-axis,\n0.5° to 3°': get_cd_on_range(1, 3, Direction.Y),
    'Y-axis,\n3° to 6°': get_cd_on_range(3, 6, Direction.Y),
    'Y-axis,\n6° to 10°': get_cd_on_range(6, 10, Direction.Y),
    'X-axis, \ncone density ratio 4um/fovea': (get_cd_on_range((0.4/(MM_PER_DEGREE)),(0.5/(MM_PER_DEGREE)), Direction.X)/get_cd_on_range((0), (0.100), Direction.X)),
    'X-axis, \ncone density ratio 2070um/630um microm': (get_cd_on_range((2.070/(MM_PER_DEGREE)),(2.100/(MM_PER_DEGREE)), Direction.X)/get_cd_on_range((0.630/(MM_PER_DEGREE)), (0.650/(MM_PER_DEGREE)), Direction.X)),
    'X-axis, \n630um': get_cd_on_range((0.630/(MM_PER_DEGREE)), (0.650/(MM_PER_DEGREE)), Direction.X),


}, index=pids)#.sort_values(by='-0.5° to 0.5°')

sns.set_theme(font_scale=0.6)
sns.heatmap(_df.corr(method='pearson'), annot=True, fmt=".2f", cmap='coolwarm', square=True, center=0)
plt.title(f'Correlation matrix of Cone Density at different eccentricities.', fontsize=14)
plt.show()

In [None]:

# Select the last two entries
y_data = _df['X-axis,\n-0.5° to 0.5°']
x_data = _df['X-axis,\n0.5° to 3°']

# Create the scatter plot
plt.figure(figsize=(8, 6))
plt.scatter(x_data, y_data, color='blue', label='Data Points')

# Add labels and title
plt.ylabel('Cone Density Ratio (2070/630 um)')
plt.xlabel('Cone Density at 2.0° to 2.5°')
plt.title('Scatter Plot: Cone Density Ratio vs Cone Density at 630 um')

# Show grid
plt.grid(True)

# Display the plot
plt.legend()
plt.show()


### Density fit analysis  - Ratios 

In [None]:
print(cone_density_fd)

In [None]:
import numpy as np
import matplotlib.pyplot as plt


eccs_in_MM = eccs*MM_PER_DEGREE
# print(eccs_in_MM)

# Normalize by the max value for each of the 201 parameters (column-wise normalization)
normalized_data_x = cone_density_fd_X/ np.max(cone_density_fd_X, axis=1, keepdims=True)
normalized_data_y = cone_density_fd_Y/ np.max(cone_density_fd_Y, axis=1, keepdims=True)

# Compute the mean across the 33 patients
mean_data_x= np.mean(normalized_data_x, axis=0)
mean_data_y= np.mean(normalized_data_y, axis=0)

mean_data_x_nonnorm = np.mean(cone_density_fd_X, axis=0)
mean_data_y_nonnorm = np.mean(cone_density_fd_Y, axis=0)

# Plot all 33 lines with lower opacity
plt.figure(figsize=(12, 6))
for i in range(33):
    plt.plot(eccs_in_MM, cone_density_fd_X[i, :],  color='blue', alpha=0.2)  # Low opacity
    plt.plot(eccs_in_MM, cone_density_fd_Y[i, :],  color='blue', alpha=0.2)  # Low opacity


plt.xlabel("Eccentricity (mm)")
plt.ylabel("Normalized Value")
plt.title("Normalized Data Across Patients with Mean Line")
plt.legend()
plt.show()

# Plot all 33 lines with lower opacity
plt.figure(figsize=(12, 6))
for i in range(33):
    plt.plot(eccs_in_MM, normalized_data_x[i, :],  color='blue', alpha=0.2)  # Low opacity
    plt.plot(eccs_in_MM, normalized_data_y[i, :],  color='blue', alpha=0.2)  # Low opacity
# Plot the mean line with higher opacity
plt.plot( eccs_in_MM, mean_data_x, color='red',  linewidth=2, label="Mean", alpha=0.9)
plt.plot( eccs_in_MM, mean_data_y, color='green',  linewidth=2, label="Mean", alpha=0.9)


plt.xlabel("Eccentricity (mm)")
plt.ylabel("Value")
plt.title("Data Across Patients ")
plt.legend()
plt.show()


In [None]:
# Compute derivatives
derivative_x = np.gradient(cone_density_fd_X, eccs_in_MM, axis=1)
derivative_y = np.gradient(cone_density_fd_Y, eccs_in_MM, axis=1)

# Compute g'(x) using g'(x) = f'(g(x)) / f(g(x))
gx_derivative_x = derivative_x / cone_density_fd_X
gx_derivative_y = derivative_y / cone_density_fd_Y

# Avoid division by zero and NaNs
gx_derivative_x[np.isinf(gx_derivative_x) | np.isnan(gx_derivative_x)] = 0
gx_derivative_y[np.isinf(gx_derivative_y) | np.isnan(gx_derivative_y)] = 0

print(gx_derivative_x.shape)
# # Compute the mean derivative
# mean_derivative_x = np.gradient(mean_data_x, eccs_in_MM)
# mean_derivative_y = np.gradient(mean_data_y, eccs_in_MM)

# Plot all 33 derivative curves with lower opacity
plt.figure(figsize=(12, 6))
for i in range(33):
    plt.plot(eccs_in_MM, gx_derivative_x[i, :], color='blue', alpha=0.2)  # Low opacity
    plt.plot(eccs_in_MM, gx_derivative_y[i, :], color='blue', alpha=0.2)  # Low opacity

# Plot the mean derivative with higher opacity
# plt.plot(eccs_in_MM, mean_derivative_x, color='red', linewidth=2, label="Mean Derivative X", alpha=0.9)
# plt.plot(eccs_in_MM, mean_derivative_y, color='green', linewidth=2, label="Mean Derivative Y", alpha=0.9)

plt.xlabel("Eccentricity (mm)")
plt.ylabel("Derivative of Normalized Value")
plt.title("Derivative of Normalized Data Across Patients with Mean Line")
plt.legend()
plt.show()



# Plot all 33 derivative curves with lower opacity
plt.figure(figsize=(12, 6))
for i in range(33):
    plt.plot(eccs_in_MM, derivative_x[i, :], color='blue', alpha=0.2)  # Low opacity
    plt.plot(eccs_in_MM, derivative_y[i, :], color='blue', alpha=0.2)  # Low opacity

# Plot the mean derivative with higher opacity
# plt.plot(eccs_in_MM, mean_derivative_x, color='red', linewidth=2, label="Mean Derivative X", alpha=0.9)
# plt.plot(eccs_in_MM, mean_derivative_y, color='green', linewidth=2, label="Mean Derivative Y", alpha=0.9)

plt.xlabel("Eccentricity (mm)")
plt.ylabel("Derivative of Normalized Value")
plt.title("Derivative of Normalized Data Across Patients with Mean Line")
plt.legend()
plt.show()

In [None]:
# print(cone_density_fd)

In [None]:
radii = [0.150, 0.300, 1.2]
indices = []
ratio = []

# mean_data_x = cone_density_fd_X[0]
# mean_data_y = cone_density_fd_Y[0]
for i, radius in enumerate (radii):
    index_pos = np.searchsorted(eccs_in_MM, radius)
    index_neg = np.searchsorted(eccs_in_MM, -radius)
    ratio_values =[mean_data_x[index_pos], mean_data_y[index_pos],
                    mean_data_x[index_neg], mean_data_y[index_neg]]
    print(ratio_values)
    ratio.append( np.mean(ratio_values))
    print (f"Ratio at {radii[i]} mm, for eccentricity of radius {eccs_in_MM[index_pos]} mm is {ratio[i]}")


### Zhang 2015 - assessing wheter the total number of cones results are reproducible by our model

#### Integrating the density function

In [None]:
print(eccs_in_MM)

In [None]:
print(cone_density_fd_X.shape)

In [None]:
from turtle import color
from cv2 import mean
import scipy.integrate as integrate

import numpy as np
import scipy.integrate as integrate

def integrate_cone_density(eccs_in_MM : np.array, cone_density_fd_X : np.array,
                            cone_density_fd_Y : np.array, radius: float = 0.1, exclude_center: bool = False):



    if exclude_center:
        #makes a mask to esclude the center of the fovea on both positive and negative direction
        mask_pos = (eccs_in_MM > 0.3) & (eccs_in_MM < radius)
        mask_neg = (eccs_in_MM < -0.3) & (eccs_in_MM > -radius)
        mask = mask_pos | mask_neg
        x_integr = eccs_in_MM[mask]

    else :
        mask = (eccs_in_MM > -radius) & (eccs_in_MM < radius)

    x_integr = eccs_in_MM[mask]

    int_results = []
    #concatenate the two arrays
    cone_density_fd_XY = np.concatenate((cone_density_fd_X, cone_density_fd_Y), axis=0)
    print(cone_density_fd_XY.shape)

    for i in range(cone_density_fd_XY.shape[0]):  # Loop over each dataset
        y_integr = cone_density_fd_XY[i, mask]
        int_val = integrate.simpson(y=y_integr, x=x_integr)
        int_results.append(int_val)
        # plt.plot(x_integr, y_integr, color='blue', alpha=0.2)  # Low opacity

    # Convert to NumPy array for statistics
    int_results = np.array(int_results)

    # Compute statistics
    mean_int = np.mean(int_results)
    std_int = np.std(int_results)
    min_int = np.min(int_results)
    max_int = np.max(int_results)

    # Compute COV (Coefficient of Variation)
    cov_int = (std_int / mean_int) * 100  # Convert to percentage

    #plot the y_integr

    # plt.show() 

    return mean_int, std_int, min_int, max_int, cov_int, int_results



mean_int, std_int, min_int, max_int, cov_int, int_results = integrate_cone_density(eccs_in_MM, cone_density_fd_X,
                                                                                    cone_density_fd_Y, radius = 1, exclude_center=False)
# Print results
print(f"Mean integrated cone density: {mean_int}")
print(f"Standard deviation: {std_int}")
print(f"Minimum: {min_int}")
print(f"Maximum: {max_int}")

# Print the COV
print(f"Coefficient of Variation (COV): {cov_int:.2f}%")

# Plot histogram
plt.figure(figsize=(8, 6))
plt.hist(int_results, bins=100, edgecolor='black', alpha=0.7)

# Labels and title
plt.xlabel("Integrated Cone Density")
plt.ylabel("Frequency")
plt.title("Histogram of Integrated Cone Density Values")

# Show the plot
plt.show()



In [None]:
COV_results = []

# Compute the COV for different radii
radii = np.linspace(0.1, 1.5, 100)

for radius in radii:
    mean_int, std_int, min_int, max_int, cov_int, int_results = integrate_cone_density(eccs_in_MM, cone_density_fd_X,
                                                                                    cone_density_fd_Y, radius = radius)
    COV_results.append(cov_int)

# Convert to NumPy array
COV_results = np.array(COV_results)

# Plot the COV as a function of radius
plt.figure(figsize=(8, 6))
plt.plot(radii, COV_results, color='blue', linewidth=2)
plt.xlabel("Radius (mm)")
plt.ylabel("COV")
plt.title("COV as a Function of Radius")
plt.grid(True)
plt.show()


#### Correlation between peak and [?]

### Functions for intra-/inter-individual analysis

In [None]:
from typing import Iterable
import warnings
from scipy.stats import spearmanr, pearsonr, binomtest
import statsmodels.formula.api as smf

def mixedlm(cd: np.ndarray, lt: np.ndarray, pids: np.ndarray, eccs: np.ndarray, standardization: str = 'inter') -> Tuple[float, float]:
    data = pd.DataFrame({'Subject': pids, 'Eccentricity': eccs, 'LayerThickness': lt, 'ConeDensity': cd})
    if standardization == 'intra':
        model = smf.mixedlm("LayerThickness ~ ConeDensity", data, groups="Subject", re_formula="~Eccentricity")
    else:
        model = smf.mixedlm("LayerThickness ~ ConeDensity", data, groups="Subject")
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        result = model.fit()
    return result.fe_params['ConeDensity'], result.pvalues['ConeDensity']

def plot_slice_correlation(layer_name: str, degree: float | Iterable | None, direction: Direction, eccs: np.ndarray, comp_to = 'cones', restrict_to_os = False, standardization: str = 'inter'):
    
    fd = preprocess_functional_data(direction, standardization)

    n_subject = fd['cones'].shape[0]

    if degree is None:
        degree = eccs
        ecc_str = 'across all eccs'
    elif isinstance(degree, Iterable):
        degree = np.round(np.array(degree), 1)
        ecc_str = f'on {np.min(degree)}° to {np.max(degree)}°'
    else:
        degree = np.round(float(degree), 1)
        ecc_str = f'at {degree}°'

    indices = np.searchsorted(eccs, degree)

    cd = fd[comp_to][:, indices].flatten()
    lt = fd[layer_name][:, indices].flatten()
    pids = np.repeat(np.arange(n_subject), len(indices))
    eccentricities = np.tile(eccs[indices], n_subject)
    if restrict_to_os:
        os_mask = ~np.isnan(fd['os'][:, indices].flatten())
        cd = cd[os_mask]
        lt = lt[os_mask]
        pids = pids[os_mask]
        eccentricities = eccentricities[os_mask]
    valid = ~np.isnan(cd) & ~np.isnan(lt)
    if not valid.any():
        print(f'No valid data for {layer_name} {ecc_str}.')
        return
    cd = cd[valid]
    lt = lt[valid]
    pids = pids[valid]
    eccentricities = eccentricities[valid]

    LL_UR = (lt > 0) != (cd > 0)
    UL_LR = (lt > 0) == (cd > 0)

    spearman_corr = spearmanr(cd, lt)
    pearson_corr = pearsonr(cd, lt)
    if (perform_mlm := len(indices) > 1):
        mixedlm_corr = mixedlm(cd, lt, pids, eccentricities)
    binom_corr = binomtest(
        LL_UR.sum() if pearson_corr.correlation < 0 else UL_LR.sum(),
        LL_UR.sum() + UL_LR.sum(), 
        p=0.5, alternative='greater'
    )

    plot_limit = max(3, 0.1 + np.ceil(np.max(np.abs([lt, cd])) * 10) / 10)
    # plot_limit=3.2
    # colors = iter(plt.get_cmap('Accent', 33)(np.arange(33)).tolist())
    # for _cd, _lt in zip(cone_density_fd.data_matrix[:,:,0], layer_fds[layer_name].data_matrix[:, :, 0]):
    #     plt.scatter(_cd, _lt, 2, color=next(colors), alpha=0.6)
    plt.scatter(cd[LL_UR], lt[LL_UR], 5, color='blue', alpha=0.6, label=f'n = {LL_UR.sum()}')
    plt.scatter(cd[UL_LR], lt[UL_LR], 5, color='red', alpha=0.6, label=f'n = {UL_LR.sum()}')

    plt.axhline(0, color='black', linewidth=0.5)
    plt.axvline(0, color='black', linewidth=0.5)
    plt.fill_between([-plot_limit, 0], -plot_limit, 0, color='red', alpha=0.05)
    plt.fill_between([0, plot_limit], 0, plot_limit, color='red', alpha=0.05)
    plt.fill_between([-plot_limit, 0], 0, plot_limit, color='blue', alpha=0.05)
    plt.fill_between([0, plot_limit], -plot_limit, 0, color='blue', alpha=0.05)

    indices = np.argsort(cd)
    x_p = np.linspace(-plot_limit, plot_limit, 100)
    if perform_mlm:
        p = np.polyfit(cd[indices], lt[indices], 1)
        plt.plot(x_p, np.polyval(p, x_p), color='olive', label=f'fit: y = {p[0]:.4g}x')
    else:
        slope_mlm = mixedlm_corr[0]
        plt.plot(x_p, slope_mlm * x_p, color='olive', label=f'MLM: y = {slope_mlm:.4g}x')
    # plt.plot(x_p, np.sign(p[0]) * x_p, '--', color='olive', label=f'identity', alpha=0.6)

    plt.ylim(-plot_limit, plot_limit)
    plt.xlim(-plot_limit, plot_limit)
    plt.gca().set_aspect('equal', adjustable='box')
    comp_to_str = 'Cone density' if comp_to=='cones' else f'{names_r[comp_to]} thickness'
    plt.xlabel(f'{comp_to_str} (Z-Score)')
    plt.ylabel(f'{names_r[layer_name]} thickness, (Z-Score)')
    # plot legend on right side, out of the plot
    plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    # below the legend, add box of text displaying the statistics results
    plt.text(1.02, 0.7, 
             f'Spearman: {spearman_corr[0]:.3f}, p={spearman_corr[1]:.2g}'
             f'\nPearson: {pearson_corr[0]:.3f}, p={pearson_corr[1]:.2g}'
             f'\nBinomial test: p={binom_corr.pvalue:.2g}'
             f'\nMixedLM: {mixedlm_corr[0]:.3f}, p={mixedlm_corr[1]:.2g}' if perform_mlm else '', 
             fontsize=12, ha='left', transform=plt.gca().transAxes)

    plt.title(f'{names_r[layer_name]} thickness vs {comp_to_str} across subjects & {ecc_str}, {direction.value}-axis\nStandardized {standardization}-individually')
    plt.show()

### intra-individual analysis

Have a look at the violin plots as well: `P:\AOSLO\_automation\_PROCESSED\Photoreceptors\Healthy\_Results\all_stats_new\spearman_correlation_for_*.png`.
The following plots are just an other way to visualize the same thing since both are intra-individual analysis.

In [None]:
features = ['rnfl', 'pr_rpe', 'onl', 'gcl_ipl']#, 'cones']
for i in range(len(features)):
    # for j in range(i+1, len(features)):
    layer, comp_to = features[i], 'cones'#features[j]
    # for direction in Direction:
    plot_slice_correlation(layer, None, Direction.X, eccs, comp_to=comp_to, standardization='intra')
    
# plot_slice_correlation('onl', None, Direction.X, eccs, comp_to='cones', restrict_to_os=False, standardization='intra')

### inter-individual analysis

In [None]:
features = ['rnfl', 'pr_rpe', 'os', 'onl', 'cones']

step = 0.1
deg = np.arange(-1, 1+step, step)

for i in range(len(features)):
    for j in range(i+1, len(features)):
        layer, comp_to = features[i], features[j]
        plot_slice_correlation(layer, deg, Direction.X, eccs, comp_to=comp_to, standardization='inter')

In [None]:
from typing import Callable

def compute_correlations_eccwise(direction: Direction, eccs: np.ndarray, correlation_fun: Callable[[np.ndarray, np.ndarray], Tuple[float, float]] = lambda cd, lt: spearmanr(cd, lt, nan_policy='omit')
) -> Dict[str, Tuple[np.ndarray]]:
    """
    Compute inter-individual Spearman correlations between standardized cone density and each standardized layer, for each eccentricity.
    In a nutshell, for each eccentricity, we correlate the deviations of the cone density and the layer thickness from their respective means,
    to assess inter-individual relationships between cone density and retinal layer thicknesses.
    """
    fd = preprocess_functional_data(direction, standardization='inter')
    results = {}
    for layer, layer_fd in fd.items():
        if layer == 'cones':
            continue
        pointwise_corr = np.zeros(len(eccs))
        pointwise_pvalues = np.zeros(len(eccs))
        
        # Iterate over each eccentricity to compute Spearman correlation and p-values
        for i in range(len(eccs)):
            cone_density_values = fd['cones'][:, i]
            layer_values = layer_fd[:, i]
            corr, pv = correlation_fun(cone_density_values, layer_values)
            # corr, pv = spearmanr(cone_density_values, layer_values, nan_policy='omit')
            pointwise_corr[i] = corr
            pointwise_pvalues[i] = pv
        results[layer] = (pointwise_corr, pointwise_pvalues)
    return results


def plot_correlations_eccwise(results: Dict[str, Tuple[np.ndarray]], eccs: np.ndarray, direction: Direction, layers_to_plot: List[str] | None = None, abs_: bool = False, pv_threshold: float | None = None, corr_name: str = 'Spearman'):
    """
    Plot the pointwise correlations of cone density with each layer, for each eccentricity.
    """
    f = lambda x: np.abs(x) if abs_ else x
    colors = plt.get_cmap('Accent_r', len(results))
    plt.figure(figsize=(10, 6), dpi=300)
    
    def __plot_smooth_alpha(xs, ys, alphas, color, linewidth_fun):
        assert len(xs) == len(ys) == len(alphas) == 2
        n_steps = 2 + int(np.abs(np.diff(alphas))[0] / 0.01)
        x = np.linspace(xs[0], xs[1], n_steps)
        y = np.interp(x, xs, ys)
        alpha = np.linspace(alphas[0], alphas[1], n_steps)
        
        for j in range(n_steps - 1):
            plt.plot(
                x[j:j+2], 
                y[j:j+2],
                color=color,
                alpha=alpha[j],
                linewidth=linewidth_fun(alpha[j]),
                label=None
            )

    min_alpha = 0.15
    plots = []
    for i, layer in enumerate(layers_to_plot or results.keys()):
        correlations, pvalues = results[layer]
        alphas = np.minimum(
            1, 
            np.where(np.isnan(pvalues), 0, min_alpha + (1 - min_alpha) * (1 - pvalues) ** 4)
        )  # Higher alpha for smaller p-values, lower alpha for larger p-values
        if pv_threshold is not None:
            alphas = np.where(pvalues <= pv_threshold, alphas, 0)

        for j in range(len(eccs) - 1):
            __plot_smooth_alpha(
                eccs[j:j+2], 
                f(correlations[j:j+2]),
                color=colors(i),
                alphas=alphas[j:j+2],
                linewidth_fun=lambda a: 0.3 + a * 0.7,
            )
        
        label = names_r[layer] if layer != 'nonfit' else "nonfit"
        plots.append(plt.scatter(
            eccs, f(correlations), 
            label=label,
            color=colors(i),
            alpha=alphas,
            edgecolors='none',
            s=20 + alphas * 30
        ))


    legend = plt.legend(loc='best')

    for lh in legend.legend_handles:
        lh.set_alpha(np.ones_like(alphas))  # Set alpha of legend markers to 1

    lim = 0.8 #np.ceil(max([abs(y) for y in plt.ylim()]) * 10) / 10
    plt.ylim(0 if abs_ else -lim, lim)
    plt.xlim(-10, 10)

    plt.title(f'Pointwise {corr_name} Correlation of Cone Density with Layers, {direction.value}-Axis')
    plt.xlabel('Eccentricity [°]')
    ylabel = f'{corr_name} correlation coefficient'
    plt.ylabel(f'|{ylabel}|' if abs_ else ylabel)
    plt.grid(True)
    plt.show()
    
# print(eccs)
direction = Direction.X
results = compute_correlations_eccwise(direction, eccs)
plot_correlations_eccwise(results, eccs, direction, abs_=False, pv_threshold=0.05)

In [None]:

direction = Direction.Y
results = compute_correlations_eccwise(direction, eccs)
plot_correlations_eccwise(results, eccs, direction, abs_=False, pv_threshold=0.1)

### Cone spacing analysis

reproducing Figure 3 B. from [Foote et al. 2019](https://doi.org/10.1167/iovs.18-25688), which compares cone spacing and OS thickness in healthy subjects

In [None]:
def density_to_spacing(cd: np.ndarray) -> np.ndarray:
    """
    Convert cone density [cells/mm²] to cone spacing [arcmin].
    """
    return np.sqrt(2 / cd / np.sqrt(3)) / MM_PER_DEGREE * 60 # in arcmin

def get_data_from_range(layer_name, left, right):
    range_eccs = np.argwhere((left <= eccs) & (eccs < right)).flatten()
    cd = np.array([getattr(s, f'density_fit_X') for s in subjects_data])[:, range_eccs].flatten()
    os = np.array([getattr(s, f'{layer_name}_X') for s in subjects_data])[:, range_eccs].flatten()
    # print(os)
    valid = ~np.isnan(cd) & ~np.isnan(os)
    cd = cd[valid]
    os = os[valid]
    return cd, os

start = -0.5
end = 0.5
step = 0.1
layer_name = 'os'
cd, lt = get_data_from_range(layer_name, start, end+step) 
icd = density_to_spacing(cd)
p = np.polyfit(cd, lt * 1000, 1)
# Compute fitted values
lt_fitted = np.polyval(p, cd)
# Compute RMSE
rmse = np.sqrt(np.mean((lt * 1000 - lt_fitted) ** 2))

plt.scatter(cd, lt * 1000, 5, color='gray', alpha=0.6, label=f'{start:.1f}° to {end:.1f}°, β={p[0]:.2f}, RMSE={rmse:.2f}')
plt.plot(np.sort(cd), np.polyval(p, np.sort(cd)), color='black', alpha=1, label=f'All data, {start:.1f}° to {end:.1f}°, β={p[0]:.2f}, RMSE={rmse:.2f}')

plt.xlabel('Cone density (cones/mm^2)', fontsize=14)
plt.ylabel(f'{names_r[layer_name]} thickness (µm)', fontsize=14)
plt.title(f'Cone density vs {names_r[layer_name]} thickness across all patients and {start:.1f}° to {end:.1f}° eccs', fontsize=14)
plt.legend()
plt.show()

Interpretation of above figure: the lower the cone spacing (i.e. the higher the cone density), the thicker the OS layer. Note that it ***does not*** contradict the previous analysis, where we found that subjects with higher cone density have thinner foveal OS layer. Indeed, what is shown here (which is a mere aggregation of intra-individual analysis) only confirms that the OS layer is thicker in the fovea, where cone density peaks; there is no surprise here. The previous analysis, on the other hand, really was inter-individual, and showed that subjects with higher foveal cone density tend to have thinner foveal OS layer, which in itself is a more interesting finding.

Note that both observations are not mutually exclusive: it is in fact an instance of [Simpsom's paradox](https://en.wikipedia.org/wiki/Simpson%27s_paradox#/media/File:Simpsons_paradox_-_animation.gif). Indeed, when stratifying the data by eccentricity (see next figure, for PR+RPE from -0.5° to 0.5° to make it more obvious), we recover what we found in the previous inter-individual analysis: around the fovea, cone spacing is positively correlated with OS thickness (i.e. the higher the cone density, the thinner the OS layer).

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import linregress

start = -0.5
end = 0.5
step = 0.1
layer_name = 'os'
spacing = True

# Create a figure and axis so we can place the legend properly
fig, ax = plt.subplots(figsize=(8, 6))

degrees_range = np.round(np.arange(start, end, step), 1)
colors = plt.get_cmap('Accent', len(degrees_range))

for i, deg in enumerate(degrees_range):
    cd, lt = get_data_from_range(layer_name, deg, deg + step)
    
    # Example: convert cd to spacing if requested
    icd = density_to_spacing(cd)
    if spacing:
        cd = icd
    
    # Optional cleaning step (remove NaN / inf) if you suspect such data
    mask = (~np.isnan(cd)) & (~np.isnan(lt)) & np.isfinite(cd) & np.isfinite(lt)
    cd = cd[mask]
    lt = lt[mask]
    
    # Skip if not enough points
    if len(cd) < 2:
        continue
    
    # Linear regression using linregress
    slope, intercept, r_value, p_value, std_err = linregress(cd, lt * 1000)
    
    # Compute predicted values
    lt_fitted = slope * cd + intercept
    
    # Compute RMSE
    rmse = np.sqrt(np.mean((lt * 1000 - lt_fitted) ** 2))
    
    # Scatter plot of points
    ax.scatter(cd, lt * 1000, s=5, color=colors(i), alpha=0.6,
               label=(f'{deg:.1f}° to {deg + step:.1f}°, '
                      f'β={slope:.2f}, p={p_value:.3g}'))
    
    # Plot regression line (sorted by x for a clean line)
    idx_sorted = np.argsort(cd)
    ax.plot(cd[idx_sorted], lt_fitted[idx_sorted], color=colors(i), alpha=0.6)

# Now do the regression for all data combined
cd_all, lt_all = get_data_from_range(layer_name, start, end + step)
if spacing:
    cd_all = density_to_spacing(cd_all)

# Optional cleaning for "all" data
mask_all = (~np.isnan(cd_all)) & (~np.isnan(lt_all)) & np.isfinite(cd_all) & np.isfinite(lt_all)
cd_all = cd_all[mask_all]
lt_all = lt_all[mask_all]

if len(cd_all) >= 2:
    slope_all, intercept_all, r_value_all, p_value_all, std_err_all = linregress(cd_all, lt_all * 1000)
    lt_fitted_all = slope_all * cd_all + intercept_all
    rmse_all = np.sqrt(np.mean((lt_all * 1000 - lt_fitted_all) ** 2))
    
    idx_all_sorted = np.argsort(cd_all)
    ax.plot(cd_all[idx_all_sorted],
            lt_fitted_all[idx_all_sorted],
            color='black', alpha=1,
            label=(f'All data, {start:.1f}° to {end:.1f}°, '
                   f'β={slope_all:.2f}, p={p_value_all:.3g}'))

ax.set_xlabel('Cone density (cones/mm^2)', fontsize=14)
ax.set_ylabel(f'{names_r[layer_name]} thickness (µm)', fontsize=14)
ax.set_title(f'Cone density vs {names_r[layer_name]} thickness '
             f'across all patients\n{start:.1f}° to {end:.1f}° eccentricities',
             fontsize=14)

# Place legend outside
if len(degrees_range) > 10:
    ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
else:
    ax.legend()
plt.tight_layout()
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import linregress

start = -2.5
end = 2.5
step = 0.1
layer_name = 'os'
spacing = True

degrees_range = np.round(np.arange(start, end, step), 1)
colors = plt.get_cmap('Accent', len(degrees_range))

for i, deg in enumerate(degrees_range):
    cd, lt = get_data_from_range(layer_name, deg, deg + step) 
    icd = density_to_spacing(cd)

    if spacing:
        cd = icd
    
    # --- Data cleaning step ---
    # Remove NaNs and optionally infinities
    mask = (~np.isnan(cd)) & (~np.isnan(lt)) & np.isfinite(cd) & np.isfinite(lt)
    cd = cd[mask]
    lt = lt[mask]
    
    # If after cleaning we have too few points, skip
    if len(cd) < 2:
        continue

    # Use linregress
    slope, intercept, r_value, p_value, std_err = linregress(cd, lt * 1000)
    
    lt_fitted = slope * cd + intercept

    plt.scatter(
        cd, lt * 1000, s=5, color=colors(i), alpha=0.6,
        label=(
            f'{deg:.1f}° to {deg + step:.1f}°, '
            f'β={slope:.2f}, p={p_value:.3g}'
        )
    )

    sorted_idx = np.argsort(cd)
    cd_sorted = cd[sorted_idx]
    lt_fit_sorted = lt_fitted[sorted_idx]

    plt.plot(cd_sorted, lt_fit_sorted, color=colors(i), alpha=0.6)

# Fit and plot for all data
cd_all, lt_all = get_data_from_range(layer_name, start, end + step)
icd_all = density_to_spacing(cd_all)

if spacing:
    cd_all = icd_all

# --- Data cleaning for "all data" ---
mask_all = (~np.isnan(cd_all)) & (~np.isnan(lt_all)) & np.isfinite(cd_all) & np.isfinite(lt_all)
cd_all = cd_all[mask_all]
lt_all = lt_all[mask_all]

if len(cd_all) >= 2:
    slope_all, intercept_all, r_value_all, p_value_all, std_err_all = linregress(cd_all, lt_all * 1000)
    lt_fitted_all = slope_all * cd_all + intercept_all


    sorted_idx_all = np.argsort(cd_all)
    cd_sorted_all = cd_all[sorted_idx_all]
    lt_fit_sorted_all = lt_fitted_all[sorted_idx_all]

    plt.plot(
        cd_sorted_all, lt_fit_sorted_all,
        color='black', alpha=1,
        label=(
            f'All data, {start:.1f}° to {end:.1f}°, '
            f'β={slope_all:.2f}, p={p_value_all:.3g}'
        )
    )
plt.ylabel(f'{names_r[layer_name]} thickness (µm)', fontsize=14)
if spacing:
    plt.xlabel('Cone spacing (arcmin)', fontsize=14)
    plt.title(
        f'Cone spacing vs {names_r[layer_name]} thickness across all patients '
        f'and {start:.1f}° to {end:.1f}° eccs',
        fontsize=14
    )
else:
    plt.xlabel('Cone density (cones/mm^2)', fontsize=14)

    plt.title(
        f'Cone density vs {names_r[layer_name]} thickness across all patients '
        f'and {start:.1f}° to {end:.1f}° eccs',
        fontsize=14
    )
if len(degrees_range) > 10:
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
else:
    plt.legend()
plt.show()


In [None]:
# start = -0.5
# end = 0.5
# step = 0.1
# layer_name = 'os'


def plot_scatter_fit(start: float, end: float, step: float, layer_name: str, spacing: bool = False):
    # Create figure and axis for custom legend placement
    # fig, ax = plt.subplots(figsize=(8, 6))
    
    degrees_range = np.round(np.arange(start, end, step), 1)
    colors = plt.get_cmap('Accent', len(degrees_range))
    
    # Lists to collect the already z-scored cone density (cd_z) and corresponding thickness values
    all_cd_z = []
    all_lt_vals = []
    
    for i, deg in enumerate(degrees_range):
        # print(layer_name)
        cd, lt = get_data_from_range(layer_name, deg, deg + step)
        icd = density_to_spacing(cd)
        if spacing:
            cd = icd
        
        # Clean the data (remove NaNs/infs)
        mask = (~np.isnan(cd)) & (~np.isnan(lt)) & np.isfinite(cd) & np.isfinite(lt)
        cd = cd[mask]
        lt = lt[mask]
        
        if len(cd) < 2:
            continue
        
        # Compute z-score for cone density using the subrange's mean and std
        mean_cd = np.mean(cd)
        std_cd = np.std(cd)
        if std_cd == 0:
            continue
        cd_z = (cd - mean_cd) / std_cd
        
        # Save these z-scored values and their corresponding thickness values
        all_cd_z.append(cd_z)
        all_lt_vals.append(lt * 1000)  # Convert thickness to µm
        
        # Perform regression on the subrange's z-scored data
        slope, intercept, r_value, p_value, std_err = linregress(cd_z, lt * 1000)
        lt_fitted = slope * cd_z + intercept
        
        # Compute RMSE
        rmse = np.sqrt(np.mean((lt * 1000 - lt_fitted) ** 2))
        
        # Scatter plot for subrange data
        plt.scatter(cd_z, lt * 1000, s=5, color=colors(i), alpha=0.6,
                   label=f'{deg:.1f}° to {deg+step:.1f}°, β={slope:.2f}, p={p_value:.3g}')
        
        # Plot the regression line for this subrange (sorting for a smooth line)
        idx_sorted = np.argsort(cd_z)
        plt.plot(cd_z[idx_sorted], lt_fitted[idx_sorted], color=colors(i), alpha=0.6)
    
    # Combine all the already z-scored data from each subrange
    if all_cd_z and all_lt_vals:
        all_cd_z = np.concatenate(all_cd_z)
        all_lt_vals = np.concatenate(all_lt_vals)
        
        # Compute regression on the combined, already z-scored data
        slope_all, intercept_all, r_value_all, p_value_all, std_err_all = linregress(all_cd_z, all_lt_vals)
        lt_fitted_all = slope_all * all_cd_z + intercept_all
        rmse_all = np.sqrt(np.mean((all_lt_vals - lt_fitted_all) ** 2))
        
        # Plot the regression line for all combined data
        idx_sorted_all = np.argsort(all_cd_z)
        plt.plot(all_cd_z[idx_sorted_all], lt_fitted_all[idx_sorted_all],
                color='black', alpha=1,
                label=f'All data, β={slope_all:.2f}, p={p_value_all:.3g}')
    
    # Plot cosmetics
    
    plt.ylabel(f'{layer_name} thickness (µm)', fontsize=14)

    if spacing:
       plt.title(f'Z-scored Cone Spacing vs {layer_name} Thickness', fontsize=14)
       plt.xlabel('Z-scored Cone Spacing', fontsize=14)
    else: 
        plt.title(f'Z-scored Cone Density vs {layer_name} Thickness', fontsize=14)
        plt.xlabel('Z-scored Cone Density', fontsize=14)
        
    if len(degrees_range) > 10:
        plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    else:
        plt.legend()

    # plt.tight_layout()
    plt.show()



layer_names_alt = [
    # "cvi",
    # "gcl_ipl",
    # "onl",
    # "inl_opl",
    # "rnfl",
    # "chrd",
    # "pr_rpe",
    "os"
]

if len(layer_names_alt) == 1:
    plot_scatter_fit(start = -2.5, end = 2.5, step = 0.1, layer_name= layer_names_alt[0], spacing = True)
else:
    for layer_name in layer_names_alt:
        print(layer_name)
        plot_scatter_fit(start = -2.5, end = 2.5, step = 0.1, layer_name= layer_names_alt, spacing = True)


In [None]:

from scipy.stats import linregress

results = compute_correlations_eccwise(Direction.X, eccs)
# print(results)
threshold = 0.05
layers_to_plot = ['rnfl', 'pr_rpe', 'os']

#Range of eccentricity to plot
range_start = -2.5
range_end = 0.5

# correlations_to_plot = "negative"



for layer_name, (correlations, p_values) in results.items():
# checks if we want to plot the layer
    if layer_name not in layers_to_plot:
        continue

    for ecc, corr, p_val in zip(eccs, correlations, p_values):
        #checks if the ecc is in the range
        if ecc < range_start or ecc > range_end:
            continue

        if p_val < threshold:
            # 1) Retrieve the actual data for this layer and eccentricity
            x_data, y_data = get_data_from_range(layer_name, (ecc-0.05), (ecc+0.05))
            

            
            # 3) Fit a line via linregress
            slope, intercept, r_value, p_value_fit, std_err = linregress(x_data, y_data)
            
            # 4) Compute points on the line for plotting
            x_line = np.linspace(x_data.min(), x_data.max(), 100)
            y_line = slope * x_line + intercept

            if layer_name != 'cvi':
                # 2) Scatter plot
                plt.figure(figsize=(5, 4))
                plt.scatter(x_data, y_data, label='Data points')

                plt.plot(x_line, y_line, color='red',
                        label=(f'Fit: slope={slope:.3f}, '
                                f'intercept={intercept:.3f}, '
                                f'fit p={p_value_fit:.3g}'))
                
                # 5) Add some labels/legend
                plt.title(f'{names_r[layer_name]}, Ecc={ecc}, Corr p<{threshold}, Spearman corr : {corr:.3f}')
                plt.ylabel('Layer thickness (mm)')
                plt.xlabel('Photoreceptor density (cones/mm²)')
                plt.legend()
                
                # 6) Show or save the figure
                plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import spearmanr, theilslopes
from scipy.stats.mstats import winsorize

# Compute max of cone density
max_cd = np.max(cone_density_fd_X, axis=1)

# Get max of outer segment thickness (OS), handling NaNs
os = np.vstack([getattr(s, f'os_X') for s in subjects_data])
os_cleaned = np.where(np.isnan(os), -np.inf, os)  # Replace NaNs with -inf
max_os = np.max(os_cleaned, axis=1)

# Winsorizing (reducing effect of outliers)
max_cd_winsor = winsorize(max_cd, limits=[0.05, 0.05])  # Trim top/bottom 5%
max_os_winsor = winsorize(max_os, limits=[0.05, 0.05])

# Spearman correlation (non-parametric, robust to non-normality)
spearman_corr, spearman_p = spearmanr(max_cd_winsor, max_os_winsor)
print(f"Spearman correlation: {spearman_corr:.3f}, p-value: {spearman_p:.3f}")

# Theil-Sen robust regression (handles outliers better than OLS)
slope, intercept, lower, upper = theilslopes(max_os_winsor * 1000, max_cd_winsor, 0.95)
print(f"Robust fit: y = {slope:.2f}x + {intercept:.2f}")

# Scatter plot
plt.figure(figsize=(8, 6))
plt.scatter(max_cd_winsor, max_os_winsor * 1000, color='blue', alpha=0.7, label='Data Points')
plt.plot(np.sort(max_cd_winsor), slope * np.sort(max_cd_winsor) + intercept, color='red', linewidth=2, label=f'Robust Fit: y = {slope:.2f}x')

# Labels and title
plt.ylabel('Max Outer Segment Thickness (µm)')
plt.xlabel('Max Cone Density (cones/mm^2)')
plt.title('Scatter Plot: Max Cone Density vs Max Outer Segment Thickness')
plt.legend()
plt.grid(True)
plt.show()


### Correlating Axial length with layer thicknesses 

In [None]:
# sd.axial_length = axial_dict[sd.name]
# sd.spherical_equiv = spherical_dict[sd.name]

for s in subjects_data:
        print("axial_lenght", s.axial_length)
        print("spherical_equiv", s.spherical_equiv)

In [None]:
def process_axial_length_correlation(direction : Direction, subjects_data : list = subjects_data ):

    #initializes an array for the axial lengths of length number of subjects
    axial_lengths = np.zeros(len(subjects_data))
    for i, s in enumerate(subjects_data):
        axial_lengths[i] = s.axial_length

    #initializes a dict to save the correlation results
    results = {}


    for layer in layer_names:
        print(f"Processing {layer} layer")
        #initializes a 2d array for the layer thicknesses of length number of subjects
        layer_fds = np.zeros((len(subjects_data), len(eccs)))
        for i, s in enumerate(subjects_data):
            layer_fds[i] = np.array(getattr(s, f'{layer}_{direction.value}'))

        bin_centers, correlations, pvalues = compute_axial_length_correlations_interval(layer_fds, axial_lengths, eccs)
        results[layer] = (correlations, pvalues)

    return bin_centers, results, axial_lengths
    
    
    

def compute_axial_length_correlations_interval(thicknesses: np.ndarray, 
                                               axial_lengths: np.ndarray,
                                               eccs: np.ndarray,
                                               interval: float = 0.5,
                                               correlation_fun: Callable[[np.ndarray, np.ndarray], Tuple[float, float]] = lambda x, y: spearmanr(x, y, nan_policy='omit')
                                              ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
    """
    Compute Spearman correlations between axial length and the layer thickness averaged over specified eccentricity intervals.
    
    Parameters:
      thicknesses: 2D numpy array of shape (n_subjects, n_eccentricities) containing layer thicknesses.
      axial_lengths: 1D numpy array of shape (n_subjects,) with each subject's axial length.
      eccs: 1D numpy array of eccentricity values corresponding to the columns of `thicknesses`.
      interval: float, the width of each eccentricity bin (default 0.5).
      correlation_fun: Function to compute correlation (default uses spearmanr with nan_policy='omit').
      
    Returns:
      bin_centers: 1D numpy array of the center eccentricity for each bin.
      correlations: 1D numpy array of correlation coefficients for each bin.
      p_values: 1D numpy array of corresponding p-values.
    """
    # Create bins based on the eccentricity range
    min_e = np.min(eccs)
    max_e = np.max(eccs)
    bins = np.arange(min_e, max_e + interval, interval)
    # Digitize eccentricity values into bins (bins numbered starting at 1)
    bin_indices = np.digitize(eccs, bins)
    
    unique_bins = np.unique(bin_indices)
    correlations = []
    p_values = []
    bin_centers = []
    
    for b in unique_bins:
        # Get indices of eccs in the current bin
        idx = np.where(bin_indices == b)[0]
        if len(idx) == 0:
            continue
        
        # Average the thickness values across the selected eccentricities for each subject
        avg_thickness = np.nanmean(thicknesses[:, idx], axis=1)
        
        # Z-score the average thickness across subjects
        mean_val = np.nanmean(avg_thickness)
        std_val = np.nanstd(avg_thickness)
        if std_val != 0:
            avg_thickness_z = (avg_thickness - mean_val) / std_val
        else:
            avg_thickness_z = avg_thickness
        
        # Compute the correlation between axial_lengths and the z-scored average thickness
        corr, p = correlation_fun(axial_lengths, avg_thickness_z)
        correlations.append(corr)
        p_values.append(p)
        
        # Use the mean of the eccs in the bin as the bin center
        center = np.mean(eccs[idx])
        bin_centers.append(center)
        
    return np.array(bin_centers), np.array(correlations), np.array(p_values)


In [None]:
bin_centers, axcorr_results, axial_lengths = process_axial_length_correlation(Direction.X)
    

In [None]:
sig_threshold = 0.2

# Plot the significant points
ecc = eccs

plt.figure(figsize=(10, 6))

for layer, (corr, pval) in axcorr_results.items():
    sig_idx = pval < sig_threshold
    # If there are significant points, scatter them with the layer label.
    if np.any(sig_idx):
        plt.scatter(bin_centers[sig_idx], corr[sig_idx], s=50, label=layer, edgecolor='black')
    else:
        # No significant points: add a dummy scatter for the legend.
        plt.scatter([], [], s=50, label=layer)

plt.xlabel('Eccentricity')
plt.ylabel('Correlation Coefficient')
plt.title('Axial Length Correlations (Significant Points)')
plt.legend()
plt.show()


In [None]:
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import linregress

def plot_axial_length_layer_scatter_grouped(axial_lengths: np.ndarray, 
                                              layer_fds: np.ndarray, 
                                              eccs: np.ndarray, 
                                              interval: float = 0.5,
                                              significance_threshold: float = 0.05):
    """
    Plot axial length vs. layer thickness for each data point, color-coded by the eccentricity bin
    (without averaging over the bin). For each bin, a linear regression is performed and its line is 
    plotted if significant.

    Parameters:
      axial_lengths: 1D array of shape (n_subjects,) containing each subject's axial length.
      layer_fds: 2D array of shape (n_subjects, n_eccentricities) for the layer thickness.
      eccs: 1D array of eccentricity values corresponding to the columns of layer_fds.
      interval: The width of each eccentricity bin (default 0.5).
      significance_threshold: p-value threshold for plotting the regression line.
    """
    fig, ax = plt.subplots(figsize=(10, 6))
    
    # Define bins over the range of eccentricities.
    min_e, max_e = np.min(eccs), np.max(eccs)
    bins = np.arange(min_e, max_e + interval, interval)
    # For each eccentricity, determine its bin (np.digitize returns indices starting at 1)
    bin_indices = np.digitize(eccs, bins)
    unique_bins = np.unique(bin_indices)
    
    # Use the 'Accent' colormap with one color per unique bin.
    cmap = plt.get_cmap('Accent', len(unique_bins))
    norm = plt.Normalize(vmin=bins[0], vmax=bins[-1])
    sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
    sm.set_array([])

    for j, b in enumerate(unique_bins):
        # b is the bin number (starting at 1). Determine the corresponding eccentricity indices.
        idx = np.where(bin_indices == b)[0]
        if len(idx) == 0:
            continue
        
        # For each subject and for each eccentricity in this bin, we have a data point.
        # Expand axial_lengths so that each subject's axial length is paired with each ecc in idx.
        n_subjects = axial_lengths.shape[0]
        X = np.repeat(axial_lengths, len(idx))
        Y = layer_fds[:, idx].flatten()
        # Get the corresponding eccentricity for each data point (for later use if needed)
        ecc_values = np.tile(eccs[idx], n_subjects)
        
        # Remove any non-finite values
        mask = np.isfinite(X) & np.isfinite(Y)
        X_clean = X[mask]
        Y_clean = Y[mask]
        if len(X_clean) < 2:
            continue
        
        # Scatter plot: all points in this bin get the same color.
        # For labeling, we show the eccentricity interval corresponding to this bin.
        bin_start = bins[b - 1]
        bin_end = bin_start + interval
        ax.scatter(X_clean, Y_clean, s=20, color=cmap(j), alpha=0.5, 
                   label=f'{bin_start:.2f}°–{bin_end:.2f}°')
        
        # Perform linear regression on the points in this bin.
        slope, intercept, r_value, p_value, std_err = linregress(X_clean, Y_clean)
        if p_value < significance_threshold:
            x_fit = np.linspace(np.min(X_clean), np.max(X_clean), 100)
            y_fit = slope * x_fit + intercept
            ax.plot(x_fit, y_fit, color=cmap(j), linewidth=2,
                    label=f'Fit {bin_start:.2f}–{bin_end:.2f}°: β={slope:.2f}, p={p_value:.3g}')
    
    ax.set_xlabel('Axial Length')
    ax.set_ylabel('Layer Thickness')
    ax.set_title('Axial Length vs. Layer Thickness\nColor-coded by Eccentricity Bin')
    
    # If there are not too many bins, show the legend; otherwise, add a colorbar.
    if len(unique_bins) <= 10:
        ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    else:
        cbar = fig.colorbar(sm, ax=ax)
        cbar.set_label('Eccentricity')
    
    fig.tight_layout()
    plt.show()


layer_names_alt = ["onl"]

for layer in layer_names_alt:
    print(f"Processing {layer} layer")
    #initializes a 2d array for the layer thicknesses of length number of subjects
    layer_fds = np.zeros((len(subjects_data), len(eccs)))
    for i, s in enumerate(subjects_data):
        layer_fds[i] = np.array(getattr(s, f'{layer}_{direction.value}'))

    # Plot the axial length vs. layer thickness scatter plot
    plot_axial_length_layer_scatter_grouped(axial_lengths, layer_fds, eccs, significance_threshold=0.1)

In [None]:

def preprocess_functional_feature(data: np.ndarray, 
                                  axial_lengths: np.ndarray = None, 
                                  standardization: str = 'inter', 
                                  axialLength_standard: bool = False) -> np.ndarray:
    """
    Preprocess a functional feature (e.g. cone density or layer thickness, which is a function of eccentricity)
    by (Z-)standardizing it.
    `data` should have shape (n_subjects, n_eccentricities).
    
    - For an intra-individual analysis, use standardization='intra' to standardize within subjects (i.e. within each row).
    - For an inter-individual analysis, use standardization='inter' to standardize across subjects, eccentricity-wise (i.e. within each column).
    """
    # If requested, remove axial length effects
    if axialLength_standard and axial_lengths is not None:
        residuals = np.empty_like(data)
        # For each eccentricity, regress the feature on axial length and keep the residuals
        for i in range(data.shape[1]):
            # Create a mask for valid (finite) values in both arrays
            valid_mask = np.isfinite(data[:, i]) & np.isfinite(axial_lengths)
            if np.sum(valid_mask) < 2:
                # Not enough data to perform regression, set results to NaN
                residuals[:, i] = np.nan
            else:
                coef = np.polyfit(axial_lengths[valid_mask], data[valid_mask, i], 1)  # simple linear regression
                pred = np.polyval(coef, axial_lengths[valid_mask])
                # Compute residuals only for valid entries; for others, assign NaN
                res = np.full(data.shape[0], np.nan)
                res[valid_mask] = data[valid_mask, i] - pred
                residuals[:, i] = res
        data = residuals

    # Standardize the data as before, using only valid values
    if standardization == 'inter':
        # Compute mean and std ignoring NaNs
        mean = np.nanmean(data, axis=0, keepdims=True)
        std = np.nanstd(data, axis=0, keepdims=True)
        return (data - mean) / std
    elif standardization == 'intra':
        mean = np.nanmean(data, axis=1, keepdims=True)
        std = np.nanstd(data, axis=1, keepdims=True)
        return (data - mean) / std

    return data

def preprocess_functional_data(direction: Direction, 
                               standardization: str = 'inter', 
                               toLog: bool = True, 
                               axial_lengths: np.ndarray = None, 
                               axialLength_standard: bool = False) -> Dict[str, np.ndarray]:
    '''
    Preprocess functional data (e.g. cone density, layer thicknesses) for a given direction (X or Y)
    by (Z-)standardizing it.
    '''
    layer_fds = {
        layer: preprocess_functional_feature(
            data=np.array([getattr(s, f'{layer}_{direction.value}') for s in subjects_data]),
            axial_lengths=axial_lengths,
            standardization=standardization,
            axialLength_standard=axialLength_standard
        )
        for layer in layer_names
    }
    
    if toLog:
        cone_density_fd = preprocess_functional_feature(
            data=np.array([np.log(getattr(s, f'density_fit_{direction.value}')) for s in subjects_data]),
            axial_lengths=axial_lengths,
            standardization=standardization,
            axialLength_standard=axialLength_standard
        )
    else:
        cone_density_fd = preprocess_functional_feature(
            data=np.array([getattr(s, f'density_fit_{direction.value}') for s in subjects_data]),
            axial_lengths=axial_lengths,
            standardization=standardization,
            axialLength_standard=axialLength_standard
        )

    cone_density_nonfit = preprocess_functional_feature(
        data=np.array([getattr(s, f'density_{direction.value}') for s in subjects_data]),
        axial_lengths=axial_lengths,
        standardization=standardization,
        axialLength_standard=axialLength_standard
    )
    
    # Return a dictionary with the cone density and the layers.
    return {'cones': cone_density_fd, **layer_fds}

def compute_correlations_eccwise(direction: Direction, 
                                 eccs: np.ndarray, 
                                 correlation_fun: Callable[[np.ndarray, np.ndarray], Tuple[float, float]] = lambda cd, lt: spearmanr(cd, lt, nan_policy='omit'),
                                 axial_lengths: np.ndarray = None, 
                                 axialLength_standard: bool = False) -> Dict[str, Tuple[np.ndarray, np.ndarray]]:
    """
    Compute inter-individual Spearman correlations between standardized cone density and each standardized layer, for each eccentricity.
    For each eccentricity, we correlate the deviations of the cone density and the layer thickness from their respective means,
    to assess inter-individual relationships between cone density and retinal layer thicknesses.
    """
    
    fd = preprocess_functional_data(direction=direction, 
                                    standardization='inter', 
                                    axial_lengths=axial_lengths, 
                                    axialLength_standard=axialLength_standard)
    results = {}
    for layer, layer_fd in fd.items():
        if layer == 'cones':
            continue
        pointwise_corr = np.zeros(len(eccs))
        pointwise_pvalues = np.zeros(len(eccs))
        
        # Iterate over each eccentricity to compute Spearman correlation and p-values
        for i in range(len(eccs)):
            cone_density_values = fd['cones'][:, i]
            layer_values = layer_fd[:, i]
            corr, pv = correlation_fun(cone_density_values, layer_values)
            pointwise_corr[i] = corr
            pointwise_pvalues[i] = pv
        results[layer] = (pointwise_corr, pointwise_pvalues)
    return results


In [None]:
direction = Direction.X
results = compute_correlations_eccwise(direction, eccs, axial_lengths=axial_lengths, axialLength_standard=False)
plot_correlations_eccwise(results, eccs, direction, abs_=False, pv_threshold=0.1)

results = compute_correlations_eccwise(direction, eccs, axial_lengths=axial_lengths, axialLength_standard=True)
plot_correlations_eccwise(results, eccs, direction, abs_=False, pv_threshold=0.05)

In [None]:
direction = Direction.Y
results = compute_correlations_eccwise(direction, eccs, axial_lengths=axial_lengths, axialLength_standard=False)
plot_correlations_eccwise(results, eccs, direction, abs_=False, pv_threshold=0.1)

results = compute_correlations_eccwise(direction, eccs, axial_lengths=axial_lengths, axialLength_standard=True)
plot_correlations_eccwise(results, eccs, direction, abs_=False, pv_threshold=0.05)