## Setup

### Initial Imports

In [None]:
import os
import pandas as pd
import numpy as np

from dataclasses import dataclass
from typing import List, Optional

from pytest import param
from zmq import has

### Plot Setup

In [None]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import matplotlib.pyplot as plt
import matplotx

# plt.style.use('science')  # Use ggplot style for all plots
plt.rcParams['figure.figsize'] = (10, 6)  # Default figure size
plt.rcParams['figure.dpi'] = 300  # Default figure dpi
plt.rcParams['font.size'] = 12  # Default font size
plt.rcParams['lines.linewidth'] = 2  # Default line width
plt.rcParams['axes.labelsize'] = 14  # Default label size
plt.rcParams['axes.titlesize'] = 16  # Default title size
plt.rcParams['xtick.labelsize'] = 12  # Default x-tick label size
plt.rcParams['ytick.labelsize'] = 12  # Default y-tick label size
plt.rcParams['legend.fontsize'] = 12  # Default legend font size
plt.rcParams['figure.titlesize'] = 18  # Default figure title size

## Gathering subjects' data

### Data Structures

data structure to store subject data;

In [None]:
from dataclasses import dataclass
import pandas as pd
import numpy as np

@dataclass
class SubjectData:
    name: str = None
    pid: str = None
    nb: int = None
    session: str = None

    width_nas: float = None
    width_tem: float = None
    width_inf: float = None
    width_sup: float = None
    max_slope_nas: float = None
    max_slope_tem: float = None
    max_slope_inf: float = None
    max_slope_sup: float = None

    oct_bump_X: float = None
    oct_bump_Y: float = None
    oct_width_X: float = None
    oct_width_Y: float = None
    oct_max_slope: float = None
    oct_depth: float = None
    oct_flatness: float = None

    age: float = None
    axial_length: float = None
    spherical_equiv: float = None
    sex: int = None

    eccs: np.ndarray = None
    density_X: pd.Series = None
    density_Y: pd.Series = None
    density_fit_X: pd.Series = None
    density_fit_Y: pd.Series = None

    cvi_X: pd.Series = None
    cvi_Y: pd.Series = None
    gcl_ipl_X: pd.Series = None
    gcl_ipl_Y: pd.Series = None
    onl_X: pd.Series = None
    onl_Y: pd.Series = None
    inl_opl_X: pd.Series = None
    inl_opl_Y: pd.Series = None
    rnfl_X: pd.Series = None
    rnfl_Y: pd.Series = None
    chrd_X: pd.Series = None
    chrd_Y: pd.Series = None
    pr_rpe_X: pd.Series = None
    pr_rpe_Y: pd.Series = None
    os_X: pd.Series = None
    os_Y: pd.Series = None

    nb_cones: float = None
    nb_cones_fit: float = None

    width_gcl_X: float = None
    width_gcl_Y: float = None
    min_thick_gcl: float = None



@dataclass
class FoveaParams:
    """Class for storing fovea 3D fitted parameters."""
    # Patient information
    subject: str
    patient_id: str
    subject_folder: str
    trial_name: str
    age: Optional[int] = None

    # Fitted parameters
    A00: Optional[float] = None
    A10: Optional[float] = None
    A01: Optional[float] = None
    A20: Optional[float] = None
    A02: Optional[float] = None
    A11: Optional[float] = None
    foveal_depth: Optional[float] = None
    foveal_center_X: Optional[float] = None
    foveal_width_X: Optional[float] = None
    foveal_center_Y: Optional[float] = None
    foveal_width_Y: Optional[float] = None
    foveal_max_slope: Optional[float] = None
    foveal_flatness: Optional[float] = None
    foveal_volume: Optional[float] = None



In [None]:

# here to avoid having to rerun the pipeline for -
# all subjects everytime i want to test something on the model.
 
# Since the list of subjects is ordered by strings , it goes from 10 to 100 to 103 etc...
# which requires a bit of work to get the first 5 subjects

#It will later be used to extract the first 5 subjects from the list of subject_data

take_first_five = False
first_five_subjects = ["Subject10","Subject100","Subject101","Subject104","Subject105"]

### Function definitions

#### Foveal Data Extraction

In [None]:
def extract_fovea_data(base_path: str) -> List[FoveaParams]:
    """
    Extract fovea parameters from CSV files with known structure.
    
    Args:
        base_path: Path to the base directory containing subject folders
        (subjfolder/trialfolder/layer_new/fovea_3d_fitted_params.csv)
    
    Returns:
        List of FoveaParams objects, one for each found CSV file
    """
    fovea_data = []
    
    # Get only the subject directories (directories starting with "Subject")
    try:
        subject_dirs = [d for d in os.listdir(base_path) 
                      if os.path.isdir(os.path.join(base_path, d)) and d.startswith("Subject")]
    except FileNotFoundError:
        print(f"Base path not found: {base_path}")
        return []
    
    # For each subject directory, get the session directories
    for subject_dir in subject_dirs:
        subject_path = os.path.join(base_path, subject_dir)
        
        # Extract subject number
        import re
        subject_match = re.search(r'Subject(\d+)', subject_dir)
        if not subject_match:
            continue
            
        subject_num = subject_match.group(1)
        patient_id = f"{subject_num}"
        
        try:
            # Get session directories (directories starting with "Session")
            session_dirs = [d for d in os.listdir(subject_path) 
                          if os.path.isdir(os.path.join(subject_path, d)) and d.startswith("Session")]
        except FileNotFoundError:
            continue
        
        # For each session directory, check if the CSV file exists
        for session_dir in session_dirs:
            session_path = os.path.join(subject_path, session_dir)
            csv_path = os.path.join(session_path, "layer_new", "fovea_3d_fitted_params.csv")
            
            # Check if the CSV file exists
            if os.path.isfile(csv_path):
                # try:
                # Read CSV file
                df = pd.read_csv(csv_path, sep=';', header=None, names=['param', 'value'])
                
                # Create basic FoveaParams object with patient info
                fovea_obj = FoveaParams(
                    patient_id=str(patient_id),
                    subject=f"Subject{patient_id}",
                    subject_folder=subject_dir,
                    trial_name=session_dir
                )
                print(f"Processing file: {csv_path} for patient {patient_id}")
                
                # Fill in parameter values
                for _, row in df.iterrows():
                    param_name = row['param']
                    param_value = row['value']
                    print(f"Processing {param_name} with value {param_value} for patient {patient_id}")
                    
                    if param_value == "params":
                        print(f"Skipping parameter {param_name} for patient {patient_id} as it is 'params'")
                        continue
                    
                    # Check if this parameter exists in our class
                    if hasattr(fovea_obj, param_name) or hasattr(fovea_obj, f"foveal_{param_name}"):
                        print(f"Setting {param_name} for patient {patient_id}")
                        try:
                            # Convert to float and set attribute
                            print(f"Trying to set {param_name} for patient {patient_id}")
                            setattr(fovea_obj, param_name, float(param_value))
                            print(f"Successfully set {param_name} for patient {patient_id}")
                        except:
                            
                            print(f"Error setting {param_name} for patient {patient_id}: {param_value}")

                    if hasattr(fovea_obj, f"foveal_{param_name}"):
                        try:
                            print(f"Trying to set foveal_{param_name} for patient {patient_id}")
                            setattr(fovea_obj, f"foveal_{param_name}", float(param_value))

                        except:
                            print(f"Skipping parameter {param_name} for patient {patient_id} ")

                            pass
                    
                fovea_data.append(fovea_obj)
                # except Exception as e:
                    # print(f"Error processing file {csv_path}: {str(e)}")
    
    return fovea_data

def save_to_dataframe(fovea_data: List[FoveaParams], output_file: str = "fovea_parameters.csv") -> pd.DataFrame:
    """
    Convert list of FoveaParams objects to a pandas DataFrame and save to CSV.
    
    Args:
        fovea_data: List of FoveaParams objects
        output_file: Path to save the CSV file
        
    Returns:
        DataFrame containing all fovea data
    """
    # Convert to list of dictionaries
    data_dicts = [vars(f) for f in fovea_data]
    
    # Create DataFrame
    df = pd.DataFrame(data_dicts)
    
    # Save to CSV
    df.to_csv(output_file, index=False)
    
    return df



## Loading data

#### Populating Additional fields based on the previously gathered data

In [None]:
def get_nb_cones(ecc: np.ndarray, dens_X: pd.Series, dens_Y: pd.Series, radius: float, smoothen: bool = True) -> float:
    
    '''
    Given the cone density profiles along the X and Y axes, compute the total number of cones within a disk of radius `radius` (in degree) centered at the fovea by linearly interpolating (radially) the density profiles and integrating over the disk.
    '''
    smthd_x = gaussian_filter_nan(dens_X, sigma=4) if smoothen else dens_X.to_numpy()
    smthd_y = gaussian_filter_nan(dens_Y, sigma=4) if smoothen else dens_Y.to_numpy()
   
    x_amax = np.nanargmax(smthd_x)
    p = np.polyfit(ecc[x_amax-2:x_amax+3], smthd_x[x_amax-2:x_amax+3], 2)
    x_amax = -p[1] / (2 * p[0])

    y_amax = np.nanargmax(smthd_y)
    p = np.polyfit(ecc[y_amax-2:y_amax+3], smthd_y[y_amax-2:y_amax+3], 2)
    y_amax = -p[1] / (2 * p[0])

    R = np.linspace(0.0001, radius, 500) # radius in degrees
    disk = np.r_[
        np.interp(x_amax + R, ecc, smthd_x),
        np.interp(x_amax - R, ecc, smthd_x),
        np.interp(y_amax + R, ecc, smthd_y),
        np.interp(y_amax - R, ecc, smthd_y)
    ]
    
    norm_coef = MM_PER_DEGREE**2 * 2 * np.pi
    # integrate cone density over disk to get total nb of cones
    return norm_coef * np.trapz(np.nanmean(disk, axis=0) * R, R)

RADIUS = 3.33 # degree
for sd in subjects_data:
    sd.nb_cones = get_nb_cones(sd.eccs, sd.density_X, sd.density_Y, radius = RADIUS)
    sd.nb_cones_fit = get_nb_cones(sd.eccs, sd.density_fit_X, sd.density_fit_Y, radius = RADIUS, smoothen=False)

In [None]:
from scipy.signal import find_peaks

def adjust_flat(gcl_data: np.ndarray, peak_left: int, peak_right: int) -> np.ndarray:
    slope = (gcl_data[peak_right] - gcl_data[peak_left]) / (peak_right - peak_left)
    transformed_gcl = gcl_data - slope * (np.arange(len(gcl_data)) - peak_left)
    return transformed_gcl

def get_gcl_width(gcl: pd.Series) -> Tuple[float, float]:
    '''
    Given the GCL+IPL thickness profile, compute the width of the pit as well as the minimum thickness of the layer. Here, the width of the pit is defined as the distance between the two points where the thickness is 20% of the depth of the pit. The depth of the pit is defined as the difference between the thickness surrounding the pit and the thickness at the pit's bottom.
    '''
    # name = gcl.name
    gcl_to_plot = gcl.copy()
    eccs = gcl[np.abs(gcl.index) <= 6].index.to_numpy()
    gcl = gcl.interpolate(method='polynomial', order=1)[eccs].to_numpy()
    # plt.plot(eccs, gcl, label=name)
    smooth_param = 3
    peak_left = peak_right = []
    while not (len(peak_left) >= 1 and len(peak_right) >= 1) and smooth_param < 10:
        smoothed_gcl = gaussian_filter_nan(gcl, smooth_param)
        peaks = find_peaks(smoothed_gcl)[0]
        peak_left  = [peak for peak in peaks if peak < len(smoothed_gcl) / 3]
        peak_right = [peak for peak in peaks if peak > 2 * len(smoothed_gcl) / 3]
        smooth_param += 1
    assert len(peak_left) >= 1 and len(peak_right) >= 1, f'No peaks found for {gcl.name}'
    peak_left = round(np.mean(peak_left))   
    peak_right = round(np.mean(peak_right))
    adjusted_gcl = adjust_flat(gcl, peak_left, peak_right)
    smoothed_aj_gcl = gaussian_filter_nan(adjusted_gcl, 2)

    y_min = np.nanmin(smoothed_aj_gcl[peak_left:peak_right])
    y_target = y_min + (smoothed_aj_gcl[peak_left] - y_min) / 5
    intercepts = np.where(np.diff(np.sign(smoothed_aj_gcl - y_target)))[0]
    leftmost = eccs[intercepts[0]]
    rightmost = eccs[intercepts[-1]+1]
    width_pit_gcl = rightmost - leftmost

    indicies = np.argpartition(gcl, 10)[:10]
    p = np.polyfit(eccs[indicies], gcl[indicies], 2)
    if p[0] == 0:
    #     # gcl_to_plot.plot()
    #     plt.plot(eccs, gcl, label='gcl')
        plt.plot(np.sort(eccs[indicies]), np.polyval(p, np.sort(eccs[indicies])), '--')
    min_thickness_gcl = np.polyval(p, -p[1] / (2 * p[0]))
    return width_pit_gcl, min_thickness_gcl

for sd in subjects_data:
    width_gcl_x, min_thick_x = get_gcl_width(sd.gcl_ipl_X)
    width_gcl_y, min_thick_y = get_gcl_width(sd.gcl_ipl_Y)
    sd.width_gcl_X = width_gcl_x
    sd.width_gcl_Y = width_gcl_y
    sd.min_thick_gcl = min(min_thick_x, min_thick_y)
    # print(f'{sd.name:>10}: {width_gcl_x:.2f}°, {depth_gcl_x:.4f}, {width_gcl_y:.2f}°, {depth_gcl_y:.4f}')
    # plt.xlim(-6, 6)
    # plt.legend()
    # plt.title(sd.name)
    # plt.show()

In [None]:
from pathlib import Path
from typing import List, Tuple, Dict

from src.cell.analysis.constants import MM_PER_DEGREE
from src.cell.layer.helpers import gaussian_filter_nan
from src.configs.parser import Parser

Parser.initialize()

subjects_sessions = [[int(n) for n in s.strip().split()] for s in open('src/processed.txt').readlines()] 


try:
    sheet = pd.ExcelFile(r'V:\Studies\AOSLO\data\cohorts\AOSLO healthy\DATA_HC+DM.xlsx').parse('Healthy', header=0, nrows=45, index_col=0)
    sheet.index = sheet.index.map(lambda x: f'Subject{x}')
    age_dict = ((sheet['Date of visit'] - sheet['DDN']).dt.days / 365).to_dict()
    axial_dict = sheet['AL D (mm)'].where(sheet['Laterality'] == 'OD', sheet['AL G (mm)']).to_dict()
    spherical_dict = sheet['Equi Sph D'].where(sheet['Laterality'] == 'OD', sheet['Equi Sph G']).to_dict()
    sex_dict = sheet['Sexe'].map(lambda x: 1 if x == 'F' else 0).to_dict()
except:
    # if the excel file is not found, use a hardcoded dictionary
    age_dict = {}
base_path = Path(r'P:\AOSLO\_automation\_PROCESSED\Photoreceptors\Healthy\_Results')

# look-up table for subject and session numbers


# subject for which OCTs are tilted (white dot is not well aligned with PR+RPE peak)
# see explanation in `PRxRLT_expmanual.ipynb`
oct_to_exclude = {
    13, 18, 20, 25, 26, 30, 35, 42, 46, 66, 100, 105,
} 


subjects_data: List[SubjectData] = []
for subject_n, session_n in subjects_sessions:
    if subject_n in oct_to_exclude:
        continue

    sd = SubjectData()
    sd.name = f'Subject{subject_n}'
    sd.pid = f'AOHC_{subject_n}'
    sd.nb = subject_n
    sd.session = f'Session{session_n}'

    #
    path = base_path / sd.name / sd.session
    print(f'Loading {sd.name} {sd.session}...')

    # record subject's metadata from the excel sheet
    sd.age = age_dict[sd.name]
    sd.axial_length = axial_dict[sd.name]
    sd.spherical_equiv = spherical_dict[sd.name]
    sd.sex = sex_dict[sd.name]

    # record foveal shape parameters (populated by `src/save_layer_features.ipynb`)
    df_oct = pd.read_csv(path / Parser.get_layer_thickness_dir() / 'fovea_3d_fitted_params.csv', sep=';', index_col=0)
    sd.oct_bump_X = df_oct.loc['A20', 'params']
    sd.oct_bump_Y = df_oct.loc['A02', 'params']
    sd.oct_width_X = df_oct.loc['width_X', 'params'] * np.sqrt(2 * 2.8) / MM_PER_DEGREE # in °
    sd.oct_width_Y = df_oct.loc['width_Y', 'params'] * np.sqrt(2 * 2.8) / MM_PER_DEGREE # in °
    sd.oct_max_slope = df_oct.loc['max_slope', 'params']
    sd.oct_depth = df_oct.loc['depth', 'params'] # in mm
    sd.oct_flatness = df_oct.loc['flatness', 'params']
    # sd.oct_volume = df_oct.loc['volume', 'params']

    # record cone density and fitted parameters (populated by `src/cell/analysis/density_analysis_pipeline_manager.py`)
    df_density = pd.read_csv(path / Parser.get_density_analysis_dir() / 'densities.csv', sep=';', index_col=0)
    df_raw_density_x = pd.read_csv(path / Parser.get_density_analysis_dir() / 'densities_raw_x.csv', sep=';', index_col=0)
    df_raw_density_y = pd.read_csv(path / Parser.get_density_analysis_dir() / 'densities_raw_y.csv', sep=';', index_col=0)
    
    sd.width_nas = df_density['width_nasal'].iloc[0]
    sd.width_tem = df_density['width_temporal'].iloc[0]
    sd.width_inf = df_density['width_inferior'].iloc[0]
    sd.width_sup = df_density['width_superior'].iloc[0]
    sd.max_slope_nas = df_density['max_slope_nasal'].iloc[0]
    sd.max_slope_tem = df_density['max_slope_temporal'].iloc[0]
    sd.max_slope_inf = df_density['max_slope_inferior'].iloc[0]
    sd.max_slope_sup = df_density['max_slope_superior'].iloc[0]
    sd.density_X = df_density['dens_smthd_X']
    sd.density_Y = df_density['dens_smthd_Y']
    sd.density_fit_X = df_density['dens_fit_X']
    sd.density_fit_Y = df_density['dens_fit_Y']
    
    sd.eccs = df_density.index.to_numpy()

    # record layer thicknesses (populated by `src/save_layer_features.ipynb`)
    df_thick = pd.read_csv(path / Parser.get_density_analysis_dir() / 'results.csv', sep=',', index_col=0, skiprows=1).query('-10 <= index <= 10')
    sd.cvi_X = df_thick['CVI_X']
    sd.cvi_Y = df_thick['CVI_Y']
    sd.gcl_ipl_X = df_thick['GCL+IPL_X']
    sd.gcl_ipl_Y = df_thick['GCL+IPL_Y']
    sd.onl_X = df_thick['ONL_X']
    sd.onl_Y = df_thick['ONL_Y']
    sd.inl_opl_X = df_thick['INL+OPL_X']
    sd.inl_opl_Y = df_thick['INL+OPL_Y']
    sd.rnfl_X = df_thick['RNFL_X']
    sd.rnfl_Y = df_thick['RNFL_Y']
    sd.chrd_X = df_thick['Choroid_X']
    sd.chrd_Y = df_thick['Choroid_Y']
    sd.pr_rpe_X = df_thick['PhotoR+RPE_X']
    sd.pr_rpe_Y = df_thick['PhotoR+RPE_Y']
    sd.os_X = df_thick['OS_X']
    sd.os_Y = df_thick['OS_Y']

    subjects_data.append(sd)

Loading Subject10 Session279...
Loading Subject11 Session241...
Loading Subject12 Session239...
Loading Subject15 Session254...
Loading Subject16 Session246...
Loading Subject19 Session248...
Loading Subject24 Session298...
Loading Subject27 Session270...
Loading Subject28 Session251...
Loading Subject29 Session258...
Loading Subject33 Session272...
Loading Subject34 Session274...
Loading Subject36 Session284...
Loading Subject37 Session285...
Loading Subject38 Session288...
Loading Subject39 Session290...
Loading Subject40 Session292...
Loading Subject41 Session294...
Loading Subject47 Session311...
Loading Subject48 Session314...
Loading Subject49 Session316...
Loading Subject51 Session323...
Loading Subject53 Session334...
Loading Subject55 Session337...
Loading Subject91 Session428...
Loading Subject92 Session431...
Loading Subject93 Session434...
Loading Subject94 Session436...
Loading Subject98 Session450...
Loading Subject101 Session470...
Loading Subject104 Session492...
Loadin