In [None]:
import pandas as pd
from tqdm import tqdm
import seaborn as sns
import matplotlib.pyplot as plt

from utils import load_encrypted_xlsx

In [None]:
data_path = '/Users/jk1/temp/cereblink/pupillometry/data_saving/exclude_nan_outcome_False/DCI_ischemia_normalised_pupillometry_df.csv'
registry_data_path = '/Users/jk1/Library/CloudStorage/OneDrive-unige.ch/icu_research/dci_sah/data/sos_sah_data/post_hoc_modified_aSAH_DATA_2009_2023_24122023.xlsx'
registry_pdms_correspondence_path = '/Users/jk1/Library/CloudStorage/OneDrive-unige.ch/icu_research/dci_sah/data/pdms_data/registry_pdms_correspondence.csv'

In [None]:
df = pd.read_csv(data_path)

In [None]:
registry_df = load_encrypted_xlsx(registry_data_path)
registry_pdms_correspondence_df = pd.read_csv(registry_pdms_correspondence_path)
registry_pdms_correspondence_df['Date_birth'] = pd.to_datetime(registry_pdms_correspondence_df['Date_birth'],
                                                               format='%Y-%m-%d')
registry_pdms_correspondence_df.rename(columns={'JoinedName': 'Name'}, inplace=True)
registry_df = registry_df.merge(registry_pdms_correspondence_df, on=['SOS-CENTER-YEAR-NO.', 'Name', 'Date_birth'],
                                how='left')

In [None]:
registry_df = registry_df[~registry_df.pNr.isna()]

In [None]:
df.head()

In [None]:
df['timePupil'] = pd.to_datetime(df['timePupil'], format='%Y-%m-%d %H:%M:%S.%f')

In [None]:
# for every pNr go through every row and annotate the prior maximum for NPi_r_value, NPi_l_value, CV_r_value, CV_l_value
pnr_dfs = []
for pNr in tqdm(df['pNr'].unique()):
    pNr_df = df[df['pNr'] == pNr]
    # sort by timePupil
    pNr_df = pNr_df.sort_values(by='timePupil')
    for i, row in pNr_df.iterrows():
        
        prior_NPi_r = pNr_df[pNr_df['timePupil'] < row['timePupil']]['NPI_r_value']
        # if not empty and not all nan
        if not prior_NPi_r.empty and not prior_NPi_r.isnull().all():
            row['NPi_r_prior_max_idx'] = prior_NPi_r.argmax()
            pNr_df.loc[pNr_df.timePupil == row['timePupil'],'NPi_r_prior_max_time'] = pNr_df.iloc[row['NPi_r_prior_max_idx']][ 'timePupil']
                
        prior_NPi_l = pNr_df[pNr_df['timePupil'] < row['timePupil']]['NPI_l_value']
        # if not empty and not all nan
        if not prior_NPi_l.empty and not prior_NPi_l.isnull().all():
            row['NPi_l_prior_max_idx'] = prior_NPi_l.argmax()
            pNr_df.loc[pNr_df.timePupil == row['timePupil'],'NPi_l_prior_max_time'] = pNr_df.iloc[row['NPi_l_prior_max_idx']]['timePupil']
            
        prior_CV_r = pNr_df[pNr_df['timePupil'] < row['timePupil']]['CV_r_value']
        # if not empty and not all nan
        if not prior_CV_r.empty and not prior_CV_r.isnull().all():
            row['CV_r_prior_max_idx'] = prior_CV_r.argmax()
            pNr_df.loc[pNr_df.timePupil == row['timePupil'],'CV_r_prior_max_time'] = pNr_df.iloc[row['CV_r_prior_max_idx']]['timePupil']
        
        prior_CV_l = pNr_df[pNr_df['timePupil'] < row['timePupil']]['CV_l_value']
        # if not empty and not all nan
        if not prior_CV_l.empty and not prior_CV_l.isnull().all():
            row['CV_l_prior_max_idx'] = prior_CV_l.argmax()
            pNr_df.loc[pNr_df.timePupil == row['timePupil'],'CV_l_prior_max_time'] = pNr_df.iloc[row['CV_l_prior_max_idx']]['timePupil']
            
    pnr_dfs.append(pNr_df)
        


In [None]:
joined_df = pd.concat(pnr_dfs)

In [None]:
joined_df = joined_df.merge(registry_df, left_on='pNr', right_on='pNr', how='left')

In [None]:
joined_df['relative_NPi_r_prior_max_time'] = (pd.to_datetime(joined_df['NPi_r_prior_max_time']) - pd.to_datetime(joined_df['Date_Ictus'])).dt.total_seconds() / (60*60*24)
joined_df['relative_NPi_l_prior_max_time'] = (pd.to_datetime(joined_df['NPi_l_prior_max_time']) - pd.to_datetime(joined_df['Date_Ictus'])).dt.total_seconds() / (60*60*24)
joined_df['relative_CV_r_prior_max_time'] = (pd.to_datetime(joined_df['CV_r_prior_max_time']) - pd.to_datetime(joined_df['Date_Ictus'])).dt.total_seconds() / (60*60*24)
joined_df['relative_CV_l_prior_max_time'] = (pd.to_datetime(joined_df['CV_l_prior_max_time']) - pd.to_datetime(joined_df['Date_Ictus'])).dt.total_seconds() / (60*60*24)

In [None]:
# build a new df where relative_NPi_r_prior_max_time and relative_NPi_l_prior_max_time are grouped as relative_NPi_prior_max_time and relative_CV_r_prior_max_time and relative_CV_l_prior_max_time are grouped as relative_CV_prior_max_time
relative_times_df = joined_df[['relative_NPi_r_prior_max_time', 'relative_NPi_l_prior_max_time', 'relative_CV_r_prior_max_time', 'relative_CV_l_prior_max_time']]
relative_times_df = relative_times_df.melt(var_name='variable', value_name='value')


In [None]:
relative_times_df['variable'] = relative_times_df['variable'].str.replace('NPi_r', 'NPi')
relative_times_df['variable'] = relative_times_df['variable'].str.replace('NPi_l', 'NPi')
relative_times_df['variable'] = relative_times_df['variable'].str.replace('CV_r', 'CV')
relative_times_df['variable'] = relative_times_df['variable'].str.replace('CV_l', 'CV')


In [None]:
# undo melt
relative_times_df = relative_times_df.pivot(columns='variable', values='value')

In [None]:
# exclude negative values
cv_relative_times_df = relative_times_df[relative_times_df['relative_CV_prior_max_time'] > 0]['relative_CV_prior_max_time']
npi_relative_times_df = relative_times_df[relative_times_df['relative_NPi_prior_max_time'] > 0]['relative_NPi_prior_max_time']

In [None]:
cv_relative_times_df.describe()

In [None]:
npi_relative_times_df.describe()

In [None]:
all_colors_palette = sns.color_palette(['#f61067', '#049b9a', '#012D98', '#a76dfe', '#FFA987'], n_colors=5)
all_colors_palette

In [None]:
# plot histograms of relative max times for NPi and CV superimposed

n_bins = 50
# single ax
fig, ax = plt.subplots(figsize=(10, 6))

# represent as fractional counts
sns.histplot(npi_relative_times_df, bins=n_bins, stat='percent', color=all_colors_palette[2], label='NPi', ax=ax, alpha=0.4)
sns.histplot(cv_relative_times_df, bins=n_bins, stat='percent', color=all_colors_palette[0], label='CV', ax=ax, alpha=0.4)

add_legend = True
if add_legend:
    ax.legend()
    ax.set_xlabel('Time since event (days)')
    ax.set_ylabel('Fraction of all reference values (%)')
    
# set lower bound of x axis to 0
ax.set_xlim(left=0)

# add % to y ticks
y_ticks = ax.get_yticks()
ax.set_yticklabels([f'{int(tick)}%' for tick in y_ticks])

In [None]:
# save image to file
output_dir = '/Users/jk1/Downloads'
output_file = 'relative_max_times_histogram.svg'
output_path = f'{output_dir}/{output_file}'
fig.savefig(output_path, format='svg', bbox_inches='tight')