# Related imports

In [1]:
import os
import eyepy
import h5py as h5
import numpy as np
import pandas as pd
import skimage as ski

from eyepy.io import HeE2eReader
from typing import Optional, Tuple
from eyepy.io.he.e2e_format import TypesEnum
from eyepy.core.utils import from_vol_intensity

In [2]:
#patient's mrn extraction function
def retrieve_patient_meta(e2e_file_path: str,
                          mrns_file_path: Optional[str] = None
                         ) -> Tuple[str,str]:
    
    """This function retrieves patient medical record number for the E2E file being extracted 
        from a patients medical record numbers database.

    Args:
        e2e_file_path (str): The E2E file path.
        mrns_file_path (Optional[str], optional): Patients' medical record numbers database path. Defaults to None.

    Returns:
        Tuple[str,str]: Tuple object holding the medical record (mrn) and the E2E file name.
    """    
    file_name = e2e_file_path.split('/')[-1]
    if mrns_file_path is not None:
        mrns = pd.read_csv(mrns_file_path)
        try:
            patient_mrn = list(mrns.loc[mrns.file_name == file_name].PatientDurableKey)[0]
            acquisition_date = list(mrns.loc[mrns.file_name == file_name].study_date)[0]
        except:
            patient_mrn = "N/A"
            acquisition_date = "N/A"
    else:
        patient_mrn = "N/A"
        acquisition_date = "N/A"

    patient_meta = {'patient_mrn':patient_mrn,
                'file_name':file_name,
                'acquisition_date':acquisition_date}
    return patient_meta

In [3]:
# patient's metadata extraction function
def extract_file_meta(series_index: int,
                      e2e_file_path: str,
                      mrns_file_path: str,
                      metadata: pd.DataFrame,
                      e2e_reader: eyepy.io.HeE2eReader,
                     ) -> pd.DataFrame:
    
    """This function extracts all metadata associated with an E2E file including:

       Data_division: The data division at where the E2E file is stored
       File_name: The file name excluding the .E2E extension 
       Patient_MRN: patient medical record number
       Sex: Male or female
       Acquisition_date: The date at which the scan is captured
       Series_id: The index of the E2E file series of scans being extracted
       Structure: The Eye physical structure being scanned
       Laterality: The Eye lateralite as OD (R) and OS (L)
       Scan_pattern: The Pattern at which the OCT scan is captured
       Enface_modality: The type of the enface modality scanned
       Oct_modality: The type of OCT modality scanned
       
       The function return these values for each scan series if they already stored in 
       the E2E file. Other wise it returns N/A


    Args:
        series_index (int): The index of the series in the E2E file being extracted.
        e2e_file_path (str):  The E2E file path.
        mrns_file_path (str): Patients' medical record numbers database path
        metadata (pd.DataFrame): An empty pandas dataframe to store the extracted metadat
        e2e_reader (eyepy.io.HeE2eReader): Eyepy E2E reader instance that reads the E2E file

    Returns:
        pd.DataFrame: Pandas dataframe includes the extracted metadata 
    """    
    i = series_index
    
    patient_meta = retrieve_patient_meta(e2e_file_path= e2e_file_path,
                                         mrns_file_path=mrns_file_path)
    
    metadata.at[i,'Data_division'] = e2e_file_path.split('/')[-2]
    metadata.at[i,'File_name'] = patient_meta['file_name'][:-4]
    metadata.at[i,'Patient_MRN'] = str(patient_meta['patient_mrn'])
            
    try:
        metadata.at[i,'Sex'] = e2e_reader.patients[0].folders[9][0].data['sex']
    except:
        metadata.at[i,'Sex'] = 'N/A'
    try:
        metadata.at[i,'Acquisition_date'] = patient_meta['acquisition_date'] 
        #str(e2e_reader.series[0].get_bscan_meta()[0]['acquisitionTime'].date())
    except:
        metadata.at[i,'Acquisition_date'] = 'N/A'
    
    metadata.at[i,'Study_id']  = str(e2e_reader.studies[0].id)   
    metadata.at[i,'Series_id'] = i
    metadata.at[i,'Structure'] = e2e_reader.series[i].folders[9005][0].data.text[0]
            
    try:
        metadata.at[i,'Laterality'] = str(e2e_reader.series[i].laterality()[15:])
    except:
        metadata.at[i,'Laterality'] = 'N/A'
                
    metadata.at[i,'Scan_pattern'] = e2e_reader.series[i].folders[9006][0].data.text[0]
    metadata.at[i,'Enface_modality'] = e2e_reader.series[i].folders[9007][0].data.text[0]
            
    try:
        metadata.at[i,'Oct_modality'] = e2e_reader.series[i].folders[9008][0].data.text[0]
    except KeyError:
        metadata.at[i,'Oct_modality'] = 'N/A'
        
    return metadata, patient_meta['patient_mrn']

In [4]:
# patient's enface modality extraction function
def extract_en_face_scans(series_index: int,
                          scans: dict,
                          metadata: pd.DataFrame,
                          e2e_reader: eyepy.io.HeE2eReader,
                          ) -> np.array:
    """This fucntion extract the Enface modality scasn from E2E files

    Args:
        series_index (int): The index of the series in the E2E file being extracted.
        scans (dict): An empty dictionary that holds the extracted scans
        metadata (pd.DataFrame): An empty pandas dataframe to store the extracted metadat
        e2e_reader (eyepy.io.HeE2eReader): Eyepy E2E reader instance that reads the E2E file

    Returns:
        np.array: An array that holds the extratced enface scans
    """    
    i = series_index
    try:
        folders = e2e_reader.series[i].folders[TypesEnum.image]
                
    except KeyError:
        scans['en_face_scan'] = 'N/A'
        metadata.at[i,'en_face_scans_num'] = 0
    else:
        scans['en_face_scan'] = [scan for scan in [folder.data.data for folder in folders]]
        metadata.at[i,'en_face_scans_num'] = len(scans['en_face_scan']) 
        scans['en_face_scan'] = scans['en_face_scan'][0]
    return scans


In [5]:
# patient's OCT scans extraction function 
def extract_oct_scans(series_index:int,
                      scans:dict,
                      metadata: pd.DataFrame,
                      e2e_reader: eyepy.io.HeE2eReader,
                      ) -> np.array:
    
    """ This function extract the oct modality scans from the E2E file

    Args:
        series_index (int): The index of the series in the E2E file being extracted.
        scans (dict): An empty dictionary that holds the extracted scans
        metadata (pd.DataFrame): An empty pandas dataframe to store the extracted metadat
        e2e_reader (eyepy.io.HeE2eReader): Eyepy E2E reader instance that reads the E2E file

    Returns:
        np.array: An array that holds the extratced OCT scans
    """    
    i = series_index
    bscans = {}
    for id, bscan_slice in e2e_reader.series[i].slices.items():
        flag = True
        try:
            folders = bscan_slice.folders[TypesEnum.image] 
        except:
            scans[metadata.iloc[i].Scan_pattern] = 'N/A'
            flag = False
            continue
        #Extract raw B-scan data and apply intensity correction
        try:
            # [0] idx retrieves the first slice and ignore the second
            bscans[id] = [from_vol_intensity(folder.data.data) for folder in folders][0]
            try:
                np.array(list(bscans.values()),dtype=object).shape[1]
                scans[metadata.iloc[i].Scan_pattern] = np.array(list(bscans.values()))
            except:
                
                try:
                    slices = list(bscans.values())
                    h, w = slices[0].shape[0], slices[0].shape[1]
                    resized = [ski.util.img_as_ubyte(ski.transform.resize(img,(h,w))) for img in slices]
                    scans[metadata.iloc[i].Scan_pattern] = np.array(resized)
                except:
                    continue
            
        except:
            # handle slices size inconsistency
            try:
                bscans[id] = [np.copy(folder.data.data) for folder in folders][0]
                try:
                    np.array(list(bscans.values()),dtype=object).shape[1]
                    scans[metadata.iloc[i].Scan_pattern] = np.array(list(bscans.values()))
                except:
    
                    try:
                        slices = list(bscans.values())
                        h, w = slices[0].shape[0], slices[0].shape[1]
                        resized = [ski.util.img_as_ubyte(ski.transform.resize(img,(h,w))) for img in slices]
                        scans[metadata.iloc[i].Scan_pattern] = np.array(resized)
                    except:
                        continue
            except:
                try:
                    h, w = bscans[0].shape[0], bscans[0].shape[1]
                    bscans[id] = ski.util.img_as_ubyte(ski.transform.resize(bscans[id],(h,w))) 
                    scans[metadata.iloc[i].Scan_pattern] = np.array(list(bscans.values()))
                except:
                    continue
        try:
            metadata.at[i,'B_scans_num'] = len(scans[metadata.iloc[i].Scan_pattern]) 
            if flag == False:
                metadata.at[i,'B_scans_num'] = 'N/A'    
        except:
            metadata.at[i,'B_scans_num'] = 'N/A'
    return scans

In [6]:
# e2e' data extraction function
def extract_e2e(e2e_file_path: str,
                mrns_file_path: Optional[str] = None
               ) -> Tuple[dict,pd.DataFrame]:
    
    """ This function aggregates the three individual functions (extract_metadata(), extract_en_face_scabs(),
        and extract_oct_scans()) into a single function that extract the scans and the associated metadata.

    Args:
        e2e_file_path (str): The E2E file path.
        mrns_file_path (Optional[str], optional): Patients' medical record numbers database path. Defaults to None.

    Returns:
        Tuple[dict,pd.DataFrame]: A dictionary that holds the extracted scans and a pandas dataframe that holds
                                  the metadata
    """    
    # data extraction placeholders
    #study = {}
    series = {}
    metadata = pd.DataFrame()
    
    with HeE2eReader(e2e_file_path) as e2ereader:
    
        num_series = len(e2ereader.series)
        
        # scans and their metadata extraction
        for i in range(num_series):
            scans = {}
            metadata, patient_mrn = extract_file_meta(series_index= i,
                                                     e2e_file_path= e2e_file_path,
                                                     mrns_file_path=mrns_file_path,
                                                     metadata=metadata,
                                                     e2e_reader= e2ereader
                                                     )
            
            scans = extract_en_face_scans(series_index= i,
                                          scans=scans,
                                          metadata= metadata,
                                          e2e_reader=e2ereader
                                          )
            
            scans = extract_oct_scans(series_index= i,
                                      scans= scans,
                                      metadata= metadata,
                                      e2e_reader=e2ereader
                                      )

  
            series[f'series-{i}'] = scans
        #study[patient_mrn] = series
        
    return series, metadata

# Samples extarction

In [None]:
# your code goes here 
# define the matadata file path
# defiane the e2e data path
# extract samples for each combination