In [None]:
import pandas as pd
from utils import load_encrypted_xlsx
from statsmodels.miscmodels.ordinal_model import OrderedModel

In [None]:
pupillometry_processed_data = '/Users/jk1/temp/cereblink/pupillometry/data_saving/exclude_nan_outcome_False/DCI_ischemia_normalised_pupillometry_df.csv'
outcomes_registry_data_path = '/Users/jk1/Library/CloudStorage/OneDrive-unige.ch/icu_research/dci_sah/data/sos_sah_data/original_data/outcomes_aSAH_DATA_2009_2024_17022024.xlsx'
registry_data_path = '/Users/jk1/Library/CloudStorage/OneDrive-unige.ch/icu_research/dci_sah/data/sos_sah_data/post_hoc_modified_aSAH_DATA_2009_2023_24122023.xlsx'
registry_pdms_correspondence_path = '/Users/jk1/Library/CloudStorage/OneDrive-unige.ch/icu_research/dci_sah/data/pdms_data/registry_pdms_correspondence.csv'

In [None]:
df = pd.read_csv(pupillometry_processed_data)

In [None]:
outcomes_registry_df = load_encrypted_xlsx(outcomes_registry_data_path)
registry_df = load_encrypted_xlsx(registry_data_path)
registry_pdms_correspondence_df = pd.read_csv(registry_pdms_correspondence_path)
registry_pdms_correspondence_df['Date_birth'] = pd.to_datetime(registry_pdms_correspondence_df['Date_birth'],
                                                               format='%Y-%m-%d')
registry_pdms_correspondence_df.rename(columns={'JoinedName': 'Name'}, inplace=True)
outcomes_registry_df = outcomes_registry_df.merge(registry_pdms_correspondence_df, on=['SOS-CENTER-YEAR-NO.', 'Name', 'Date_birth'],
                                how='left')
registry_df = outcomes_registry_df.merge(registry_df, on=['SOS-CENTER-YEAR-NO.', 'Name', 'Date_birth'],
                                how='left')

In [None]:
registry_df = registry_df[~registry_df.pNr.isna()]
registry_df.mRS_FU_1y = pd.to_numeric(registry_df.mRS_FU_1y, errors='coerce')

In [None]:
df['inter_eye_min_NPI'] = df[['NPI_r_value', 'NPI_l_value']].min(axis=1)
df['inter_eye_min_CV'] = df[['CV_r_value', 'CV_l_value']].min(axis=1)
df['inter_eye_min_norm_NPI'] = df[['NPI_r_value_normalised', 'NPI_l_value_normalised']].min(axis=1)
df['inter_eye_min_norm_CV'] = df[['CV_r_value_normalised', 'CV_l_value_normalised']].min(axis=1)

In [None]:
# define aggregration with function of relative frequency of value less than 3
def relative_frequency_less_than_3(x):
    return (x < 3).sum() / len(x)

In [None]:
summary_df = df.groupby('pNr').agg({'inter_eye_min_NPI': 'median', 'inter_eye_min_CV': 'median', 'inter_eye_min_norm_NPI': 'median', 'inter_eye_min_norm_CV': 'median'}).reset_index()
npi_rel_f_less_than_3_df = df.groupby('pNr').agg({'inter_eye_min_NPI': relative_frequency_less_than_3}).reset_index()
# rename column 
npi_rel_f_less_than_3_df.rename(columns={'inter_eye_min_NPI': 'inter_eye_min_NPI_rel_f_less_than_3'}, inplace=True)

summary_df = summary_df.merge(npi_rel_f_less_than_3_df, on='pNr', how='left')

In [None]:
joined_df = summary_df.merge(registry_df, on='pNr', how='left')

In [None]:
# plot correlation between inter_eye_min_NPI and mRS_FU_1y, as well as inter_eye_min_CV and mRS_FU_1y
import seaborn as sns
import matplotlib.pyplot as plt

# for every column in summary_df, plot correlation with mRS_FU_1y
for column in summary_df.columns:
    if column == 'pNr':
        continue
    sns.scatterplot(data=joined_df, x=column, y='mRS_FU_1y')
    plt.show()

In [None]:
dependent_variable = 'mRS_FU_1y'
independent_variables = ['inter_eye_min_NPI', 'inter_eye_min_CV']
# independent_variables = ['inter_eye_min_norm_NPI', 'inter_eye_min_norm_CV']
# independent_variables = ['inter_eye_min_NPI_rel_f_less_than_3']

In [None]:
# suggestion: WFNS/HH, age, Fisher / intraventricular / intraparenchymal hemorrhage
covariates = ['Age', 'WFNS', 'Fisher_Score']

In [None]:
temp_df = joined_df[[dependent_variable] + independent_variables + covariates]
for covariate in covariates:
    temp_df[covariate] = pd.to_numeric(temp_df[covariate], errors='coerce')
temp_df = temp_df.dropna(subset=[dependent_variable] + independent_variables + covariates)

In [None]:
mod_log = OrderedModel(temp_df[dependent_variable], temp_df[independent_variables + covariates], distr='logit')

In [None]:
res_log = mod_log.fit(maxiter=10000)
res_log.summary()

In [None]:
results_as_html = res_log.summary().as_html()
results_df = pd.read_html(results_as_html, index_col=0, header=0)[1]