# Libraries

## Standard

In [1]:
import numpy as np
import os

import warnings
warnings.filterwarnings('ignore')

## Statisitcs

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Utilities

In [3]:
def get_scan_paths():
    #for MAC
    slices_path = r'/Volumes/fsmresfiles/Ophthalmology/Mirza_Images/AMD/dAMD_GA/all_slices_3'
    slices = os.listdir(slices_path)

    # For windows
    # slices_path = r'\\fsmresfiles.fsm.northwestern.edu\fsmresfiles\Ophthalmology\Mirza_Images\AMD\dAMD_GA\all_slices_3'
    # slices = os.listdir(all_slices_path)

    scan_paths = [item for item in slices if item.endswith('.jpg')]
    return scan_paths

In [13]:
def create_model_df(scan_paths):

    landmark_path = '../data/fovea_landmarks.csv'
    data_landmarks = pd.read_csv(landmark_path)

    scan_metadata_path = '../data/fovea_detection_data.csv'
    data_metadata = pd.read_csv(scan_metadata_path)

    filtered_data = data_metadata[data_metadata['scan_name'].isin(scan_paths)]
    
    filtered_data['label'] = filtered_data['status'].astype(int)

    sorted_filtered_data = filtered_data.sort_values(by=['patient_id','folder_name','scan_number'])

    return data_landmarks, sorted_filtered_data


In [24]:
def assign_sequence(landmarks_mapping,scan_number, patient_id, folder_name):
    # Retrieve the landmarks for the given patient and folder
    landmarks = landmarks_mapping.get((patient_id, folder_name), None)
    if landmarks is None:
        return 'unknown', -1  # Handle cases where no mapping is found

    if scan_number < landmarks['start']:
        return 'no fovea', 0
    elif scan_number == landmarks['start']:
        return 'start', 1
    elif scan_number > landmarks['start'] and scan_number < landmarks['deepest']:
        return 'start progression', 2
    elif scan_number == landmarks['deepest']:
        return 'mid', 3
    elif scan_number > landmarks['deepest'] and scan_number < landmarks['end']:
        return 'end progression', 4
    elif scan_number == landmarks['end']:
        return 'end', 5
    else:
        return 'no fovea', 6

In [42]:
def landmark_labels(fovea_landmarks,fovea_detection_data):

    # Initialize a dictionary to hold the landmarks for each patient and folder
    landmarks_mapping = {}
    for index, row in fovea_landmarks.iterrows():
        key = (row['patient_id'], row['folder_name'])
        landmarks_mapping[key] = {
            'start': row['start'],
            'deepest': row['deepest'],
            'end': row['end']
        }

    # Apply the function to each row
    results = fovea_detection_data.apply(lambda row: assign_sequence(landmarks_mapping,row['scan_number'], row['patient_id'], row['folder_name']), axis=1)
    fovea_detection_data['sequence'], fovea_detection_data['sequence_label'] = zip(*results)
    
    
    return fovea_detection_data

# Main

In [14]:
scan_paths = get_scan_paths()
df_landmarks, df_metadata = create_model_df(scan_paths)
df_landmarks.info()
df_metadata.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41 entries, 0 to 40
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   patient_id     41 non-null     int64 
 1   folder_name    41 non-null     object
 2   start          41 non-null     int64 
 3   deepest        41 non-null     int64 
 4   end            41 non-null     int64 
 5   start_slice    41 non-null     object
 6   deepest_slice  41 non-null     object
 7   end_slice      41 non-null     object
dtypes: int64(4), object(4)
memory usage: 2.7+ KB
<class 'pandas.core.frame.DataFrame'>
Index: 21500 entries, 10202 to 12453
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   scan_name    21500 non-null  object
 1   scan_number  21500 non-null  int64 
 2   folder_name  21500 non-null  object
 3   patient_id   21500 non-null  int64 
 4   eye_side     21500 non-null  object
 5   status       21500 

In [47]:
df_landmarks.head()

Unnamed: 0,patient_id,folder_name,start,deepest,end,start_slice,deepest_slice,end_slice
0,64,064_OD_7_GA_6x6_SSOCT_2,160,244,341,064_OD_7_GA_6x6_SSOCT_2_160.jpg,064_OD_7_GA_6x6_SSOCT_2_244.jpg,064_OD_7_GA_6x6_SSOCT_2_341.jpg
1,64,064_OD_7_GA_6x6_SSOCT_3,124,208,311,064_OD_7_GA_6x6_SSOCT_3_124.jpg,064_OD_7_GA_6x6_SSOCT_3_208.jpg,064_OD_7_GA_6x6_SSOCT_3_311.jpg
2,64,064_OD_8_GA_6x6_SSOCT_1,170,263,352,064_OD_8_GA_6x6_SSOCT_1_170.jpg,064_OD_8_GA_6x6_SSOCT_1_263.jpg,064_OD_8_GA_6x6_SSOCT_1_352.jpg
3,64,064_OD_9_GA_6x6_SSOCT_1,163,253,352,064_OD_9_GA_6x6_SSOCT_1_163.jpg,064_OD_9_GA_6x6_SSOCT_1_253.jpg,064_OD_9_GA_6x6_SSOCT_1_352.jpg
4,64,064_OD_10_GA_6x6_SSOCT_1,155,258,348,064_OD_10_GA_6x6_SSOCT_1_155.jpg,064_OD_10_GA_6x6_SSOCT_1_258.jpg,064_OD_10_GA_6x6_SSOCT_1_348.jpg


In [48]:
df_metadata.head()

Unnamed: 0,scan_name,scan_number,folder_name,patient_id,eye_side,status,label,sequence,sequence_label
10202,047_OS_1_GA_6x6_SSOCT_1_1.jpg,1,047_OS_1_GA_6x6_SSOCT_1,47,left,False,0,no fovea,0
10439,047_OS_1_GA_6x6_SSOCT_1_2.jpg,2,047_OS_1_GA_6x6_SSOCT_1,47,left,False,0,no fovea,0
10021,047_OS_1_GA_6x6_SSOCT_1_3.jpg,3,047_OS_1_GA_6x6_SSOCT_1,47,left,False,0,no fovea,0
10301,047_OS_1_GA_6x6_SSOCT_1_4.jpg,4,047_OS_1_GA_6x6_SSOCT_1,47,left,False,0,no fovea,0
10164,047_OS_1_GA_6x6_SSOCT_1_5.jpg,5,047_OS_1_GA_6x6_SSOCT_1,47,left,False,0,no fovea,0


In [43]:
df_model = landmark_labels(df_landmarks, df_metadata)

In [44]:
df_model.info()

<class 'pandas.core.frame.DataFrame'>
Index: 21500 entries, 10202 to 12453
Data columns (total 9 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   scan_name       21500 non-null  object
 1   scan_number     21500 non-null  int64 
 2   folder_name     21500 non-null  object
 3   patient_id      21500 non-null  int64 
 4   eye_side        21500 non-null  object
 5   status          21500 non-null  bool  
 6   label           21500 non-null  int64 
 7   sequence        21500 non-null  object
 8   sequence_label  21500 non-null  int64 
dtypes: bool(1), int64(4), object(4)
memory usage: 1.5+ MB


In [45]:
df_model.head()

Unnamed: 0,scan_name,scan_number,folder_name,patient_id,eye_side,status,label,sequence,sequence_label
10202,047_OS_1_GA_6x6_SSOCT_1_1.jpg,1,047_OS_1_GA_6x6_SSOCT_1,47,left,False,0,no fovea,0
10439,047_OS_1_GA_6x6_SSOCT_1_2.jpg,2,047_OS_1_GA_6x6_SSOCT_1,47,left,False,0,no fovea,0
10021,047_OS_1_GA_6x6_SSOCT_1_3.jpg,3,047_OS_1_GA_6x6_SSOCT_1,47,left,False,0,no fovea,0
10301,047_OS_1_GA_6x6_SSOCT_1_4.jpg,4,047_OS_1_GA_6x6_SSOCT_1,47,left,False,0,no fovea,0
10164,047_OS_1_GA_6x6_SSOCT_1_5.jpg,5,047_OS_1_GA_6x6_SSOCT_1,47,left,False,0,no fovea,0


In [46]:
df_model.to_csv('../data/fovea_progression_dataset.csv', index=False)