In [None]:
import pandas as pd
from utils import load_encrypted_xlsx

In [None]:
data_path = '/Users/jk1/Downloads/data_saving/exclude_nan_outcome_False/DCI_ischemia_normalised_pupillometry_df.csv'
gcs_path = '/Users/jk1/Library/CloudStorage/OneDrive-unige.ch/icu_research/dci_sah/data/pdms_data/Transfer Urs.pietsch@kssg.ch 22.01.24, 15_34/20240117_SAH_SOS_GCS.csv'
registry_data_path = '/Users/jk1/Library/CloudStorage/OneDrive-unige.ch/icu_research/dci_sah/data/sos_sah_data/post_hoc_modified_aSAH_DATA_2009_2023_24122023.xlsx'
outcomes_data_path = '/Users/jk1/Library/CloudStorage/OneDrive-unige.ch/icu_research/dci_sah/data/sos_sah_data/original_data/outcomes_aSAH_DATA_2009_2024_17022024.xlsx'
registry_pdms_correspondence_path = '/Users/jk1/Library/CloudStorage/OneDrive-unige.ch/icu_research/dci_sah/data/pdms_data/registry_pdms_correspondence.csv'
output_dir = '/Users/jk1/Downloads/'

In [None]:
target = 'DCI_ischemia'

In [None]:
pupillometry_df = pd.read_csv(data_path)
registry_df = load_encrypted_xlsx(registry_data_path)
outcomes_df = load_encrypted_xlsx(outcomes_data_path)
registry_pdms_correspondence_df = pd.read_csv(registry_pdms_correspondence_path)
gcs_df = pd.read_csv(gcs_path, sep=';', decimal='.')


In [None]:
pupillometry_df.head()

In [None]:
measures = ['NPI_r_value_normalised', 'NPI_l_value_normalised', 'CV_r_value_normalised', 'CV_l_value_normalised']
# if all in measures nan, drop the row
pupillometry_df = pupillometry_df.dropna(subset=measures, how='all')

In [None]:
included_admissions = pupillometry_df.pNr.unique()
n_pnr_nan = pupillometry_df.pNr.isna().sum()
print(f'Number of included admissions: {len(included_admissions)}')
print(f'Number of admissions with missing pNr: {n_pnr_nan}')

In [None]:
registry_pdms_correspondence_df.rename(columns={'JoinedName': 'Name'}, inplace=True)
registry_pdms_correspondence_df['Date_birth'] = pd.to_datetime(registry_pdms_correspondence_df['Date_birth'], format='%Y-%m-%d')
registry_df = registry_df.merge(registry_pdms_correspondence_df, on=['SOS-CENTER-YEAR-NO.', 'Name', 'Date_birth'], how='left')
outcomes_df = outcomes_df.merge(registry_pdms_correspondence_df, on=['SOS-CENTER-YEAR-NO.', 'Name', 'Date_birth'], how='left')

In [None]:
registry_df = registry_df[registry_df['pNr'].isin(included_admissions)]
outcomes_df = outcomes_df[outcomes_df['pNr'].isin(included_admissions)]
# drop rows duplicate pNr
registry_df = registry_df.drop_duplicates(subset='pNr')
outcomes_df = outcomes_df.drop_duplicates(subset='pNr')

In [None]:
registry_df.pNr.nunique()

In [None]:
registry_df.head()

variables to extract
- age
- sex
- admission GCS
- admission WFNS
- admission Fisher
- mortality
- LOS
- 3 month mRS / GOS

in groups
- all
- DCI
- no DCI

In [None]:
registry_df.Death.isnull().sum()

In [None]:
# preprocess registry data
registry_df['Sex'] = registry_df['Sex'].str.upper().map({'M': 0, 'F': 1, 'W': 1})

In [None]:
gcs_df['GCS'] = gcs_df.eyes + gcs_df.verbal + gcs_df.movement
first_gcs_df = gcs_df.groupby('pNr').apply(lambda x: x.sort_values('timeGCS', ascending=True).iloc[0])
first_gcs_df.reset_index(drop=True, inplace=True)
first_gcs_df.rename(columns={'GCS': 'GCS_pdms', 'intubated': 'intubated_pdms'}, inplace=True)
registry_df = registry_df.merge(first_gcs_df[['pNr', 'GCS_pdms', 'intubated_pdms']], left_on='pNr', right_on='pNr', how='left')
registry_df['GCS_admission'] = registry_df['GCS_admission'].fillna(registry_df['GCS_pdms'])
registry_df['Intubated_on_admission_YN'] = registry_df['Intubated_on_admission_YN'].fillna(registry_df['intubated_pdms'])

In [None]:
registry_df['Fischer_Score'] = pd.to_numeric(registry_df['Fisher_Score'])

In [None]:
registry_df.head()

In [None]:
registry_df['los'] = (pd.to_datetime(registry_df['Date_Discharge']) - pd.to_datetime(registry_df['Date_admission'])).dt.days

In [None]:
def get_population_stats(registry_df, outcomes_df, pupillometry_df):
    population_df = pd.DataFrame()
    population_df['n_patients'] = [pupillometry_df.pNr.nunique()]
    
    population_df['age_median'] = registry_df.Age.median()
    population_df['age_q1'] = registry_df.Age.quantile(0.25)
    population_df['age_q3'] = registry_df.Age.quantile(0.75)
    population_df['age_str'] = f'{population_df.age_median.iloc[0]:.1f} ({population_df.age_q1.iloc[0]:.1f}-{population_df.age_q3.iloc[0]:.1f})'
    
    population_df['n_female'] = registry_df.Sex.sum()
    population_df['p_female'] = registry_df.Sex.sum() / registry_df.pNr.nunique()
    population_df['female_str'] = f'{population_df.n_female.iloc[0]} ({population_df.p_female.iloc[0]*100:.1f}%)'
    
    population_df['gcs_admission_median'] = registry_df.GCS_admission.median()
    population_df['gcs_admission_q1'] = registry_df.GCS_admission.quantile(0.25)
    population_df['gcs_admission_q3'] = registry_df.GCS_admission.quantile(0.75)
    population_df['gcs_admission_str'] = f'{population_df.gcs_admission_median.iloc[0]:.0f} ({population_df.gcs_admission_q1.iloc[0]:.0f}-{population_df.gcs_admission_q3.iloc[0]:.0f})'
    
    population_df['wfns_median'] = registry_df.WFNS.median()
    population_df['wfns_q1'] = registry_df.WFNS.quantile(0.25)
    population_df['wfns_q3'] = registry_df.WFNS.quantile(0.75)
    population_df['wfns_str'] = f'{population_df.wfns_median.iloc[0]:.0f} ({population_df.wfns_q1.iloc[0]:.0f}-{population_df.wfns_q3.iloc[0]:.0f})'
    
    population_df['fisher_median'] = pd.to_numeric(registry_df['Fisher_Score']).median()
    population_df['fisher_q1'] = pd.to_numeric(registry_df['Fisher_Score']).quantile(0.25)
    population_df['fisher_q3'] = pd.to_numeric(registry_df['Fisher_Score']).quantile(0.75)
    population_df['fisher_str'] = f'{population_df.fisher_median.iloc[0]:.0f} ({population_df.fisher_q1.iloc[0]:.0f}-{population_df.fisher_q3.iloc[0]:.0f})'
    
    population_df['los_median'] = registry_df.los.median()
    population_df['los_q1'] = registry_df.los.quantile(0.25)
    population_df['los_q3'] = registry_df.los.quantile(0.75)
    population_df['los_str'] = f'{population_df.los_median.iloc[0]:.0f} ({population_df.los_q1.iloc[0]:.0f}-{population_df.los_q3.iloc[0]:.0f})'
    
    population_df['n_mortality'] = registry_df.Death.sum()
    population_df['p_mortality'] = registry_df.Death.sum() / registry_df.pNr.nunique()
    population_df['mortality_str'] = f'{population_df.n_mortality.iloc[0]:.0f} ({population_df.p_mortality.iloc[0]*100:.1f}%)'
    
    population_df['1y_mrs_median'] = outcomes_df['mRS_FU_1y'].median()
    population_df['1y_mrs_q1'] = pd.to_numeric(outcomes_df['mRS_FU_1y']).quantile(0.25)
    population_df['1y_mrs_q3'] = pd.to_numeric(outcomes_df['mRS_FU_1y']).quantile(0.75)
    population_df['1y_mrs_str'] = f'{population_df["1y_mrs_median"].iloc[0]:.0f} ({population_df["1y_mrs_q1"].iloc[0]:.0f}-{population_df["1y_mrs_q3"].iloc[0]:.0f})'
    
    return population_df

In [None]:
overall_population_df = get_population_stats(registry_df, outcomes_df, pupillometry_df)
overall_population_df

In [None]:
dci_pnr = registry_df[registry_df[target] == 1].pNr.astype(int).unique()
dci_population_df = get_population_stats(registry_df[registry_df.pNr.isin(dci_pnr)], outcomes_df[outcomes_df.pNr.isin(dci_pnr)], pupillometry_df[pupillometry_df.pNr.isin(dci_pnr)])
dci_population_df

In [None]:
no_dci_pnr = registry_df[registry_df[target] == 0].pNr.astype(int).unique()
no_dci_population_df = get_population_stats(registry_df[registry_df.pNr.isin(no_dci_pnr)], outcomes_df[outcomes_df.pNr.isin(no_dci_pnr)], pupillometry_df[pupillometry_df.pNr.isin(no_dci_pnr)])
no_dci_population_df

In [None]:
full_population_df = pd.concat([overall_population_df, dci_population_df, no_dci_population_df], keys=['overall', 'dci', 'no_dci'])
full_population_df = full_population_df.droplevel(1).T

In [None]:
full_population_df

In [None]:
str_pop_df = full_population_df.loc[['n_patients', 'age_str', 'female_str', 'gcs_admission_str', 'wfns_str', 'fisher_str', 'los_str', 'mortality_str', '1y_mrs_str']]

In [None]:
# rename indices
str_pop_df.index = ['Number of patients', 'Age', 'Sex (Female)', 'Admission GCS', 'Admission WFNS', 'Admission Fisher', 'Length of stay', 'Hospital mortality', '1 year mRS']
str_pop_df.rename(columns={'overall': 'Overall population', 'dci': 'DCI', 'no_dci': 'No DCI'}, inplace=True)
str_pop_df

In [None]:
str_pop_df.to_csv(output_dir + 'population_stats.csv')