In [None]:
import pandas as pd
import io
import getpass
import msoffcrypto
import seaborn as sns
import matplotlib 

In [None]:
registry_data_path = '/Users/jk1/Library/CloudStorage/OneDrive-unige.ch/icu_research/pupillometry_sah/data/aSAH_DATA_2009_2023_24122023.xlsx'

In [None]:
password = getpass.getpass()

In [None]:
decrypted_workbook = io.BytesIO()
with open(registry_data_path, 'rb') as file:
    office_file = msoffcrypto.OfficeFile(file)
    office_file.load_key(password=password)
    office_file.decrypt(decrypted_workbook)

In [None]:
registry_df = pd.read_excel(decrypted_workbook, sheet_name='DATA')

In [None]:
registry_df.head()

In [None]:
def safe_conversion_to_datetime(date):
    try:
        return pd.to_datetime(date)
    except:
        return pd.NaT

In [None]:
# compute time to CVS and Infarction (DCI date not available)
registry_df['time_to_cvs'] = registry_df['Date_CVS_Start'].apply(safe_conversion_to_datetime) - registry_df['Date_Ictus'].apply(safe_conversion_to_datetime)

registry_df['time_to_infarction'] = registry_df['Date_Infarction'].apply(safe_conversion_to_datetime) - registry_df['Date_Ictus'].apply(safe_conversion_to_datetime)

In [None]:
# filter out negative times
registry_df.loc[registry_df['time_to_cvs'] < pd.Timedelta(0), 'time_to_cvs'] = pd.NaT
registry_df.loc[registry_df['time_to_infarction'] < pd.Timedelta(0), 'time_to_infarction'] = pd.NaT

In [None]:
print('Number of patients in registry: {}'.format(registry_df['SOS-CENTER-YEAR-NO.'].nunique()))

In [None]:
registry_df.DCI_YN.value_counts(dropna=False)

In [None]:
registry_df['CVS_YN'].value_counts(dropna=False)

In [None]:
registry_df['Infarction_YN'].value_counts(dropna=False)

Restrict to after 2019 (when we started collecting pupillometry data)

In [None]:
registry_after_2019_df = registry_df[registry_df['Date_admission'] > '2019-01-01']


In [None]:
print('Number of patients in registry after 2019: {}'.format(registry_after_2019_df['SOS-CENTER-YEAR-NO.'].nunique()))

In [None]:
registry_after_2019_df.DCI_YN.value_counts(dropna=False)

In [None]:
registry_after_2019_df['CVS_YN'].value_counts(dropna=False)

In [None]:
registry_after_2019_df['Infarction_YN'].value_counts(dropna=False)

## Evaluate time to CVS and Infarction 

In [None]:
registry_df['time_to_cvs_days'] = registry_df['time_to_cvs'].dt.days
registry_df['time_to_infarction_days'] = registry_df['time_to_infarction'].dt.days

In [None]:
registry_df['time_to_cvs'].describe()

In [None]:
registry_df['time_to_infarction'].describe()

In [None]:
# bar plot with days on x axis and number of patients on y axis
ax = sns.histplot(data=registry_df, x='time_to_cvs_days', hue='DCI_YN', multiple='stack', bins=30, binrange=(0,30))

ax.set_xlabel('Days')
ax.set_ylabel('Number of patients')
ax.set_title('Time to CVS')

In [None]:
# bar plot with days on x axis and number of patients on y axis
ax = sns.histplot(data=registry_df, x='time_to_infarction_days', hue='DCI_YN', multiple='stack', bins=30, binrange=(0,30))

ax.set_xlabel('Days')
ax.set_ylabel('Number of patients')
ax.set_title('Time to Infarction')