# Time to DCI

- time to DCI (data after 2019 verified by JK)
- time to DCI related infarction (data after 2019 verified by JK)
- time to CVS (as reported in the SOS registry)
- time to CT (as extracted from PDMS)

Possibly tie this with new definition of DCI as ischemia and not infarction?

In [None]:
import pandas as pd
from utils import load_encrypted_xlsx

import seaborn as sns
from matplotlib import pyplot as plt

Load data

In [None]:
post_hoc_corrected_registry_path = '/Users/jk1/Library/CloudStorage/OneDrive-unige.ch/icu_research/dci_sah/data/sos_sah_data/post_hoc_modified_aSAH_DATA_2009_2023_24122023.xlsx'
ct_timings_path = '/Users/jk1/Library/CloudStorage/OneDrive-unige.ch/icu_research/dci_sah/data/pdms_data/Transfer Urs.pietsch@kssg.ch 22.01.24, 15_34/20240207_SAH_SOS_CT.csv'
registry_pdms_correspondence_path = '/Users/jk1/Library/CloudStorage/OneDrive-unige.ch/icu_research/dci_sah/data/pdms_data/registry_pdms_correspondence.csv'

In [None]:
registry_df = load_encrypted_xlsx(post_hoc_corrected_registry_path)
ct_timings_df = pd.read_csv(ct_timings_path, sep=';', decimal='.')
registry_pdms_correspondence_df = pd.read_csv(registry_pdms_correspondence_path)
registry_pdms_correspondence_df['Date_birth'] = pd.to_datetime(registry_pdms_correspondence_df['Date_birth'], format='%Y-%m-%d')

Preprocessing

In [None]:
def safe_conversion_to_datetime(date):
    try:
        return pd.to_datetime(date)
    except:
        return pd.NaT

In [None]:
# for patients with Date_CVS_Start nan but Date_CVS_DSA not nan, set Date_CVS_Start = Date_CVS_DSA
registry_df.loc[(registry_df['Date_CVS_Start'].isnull()) & (
    registry_df['Date_CVS_DSA'].notnull()), 'Date_CVS_Start'] = registry_df['Date_CVS_DSA']
# for patients with Date_CVS_Start nan but Date_CVS_CTA not nan, set Date_CVS_Start = Date_CVS_CTA
registry_df.loc[(registry_df['Date_CVS_Start'].isnull()) & (
    registry_df['Date_CVS_CTA'].notnull()), 'Date_CVS_Start'] = registry_df['Date_CVS_CTA']
# for patients with Date_CVS_Start nan but Date_CVS_TCD not nan, set Date_CVS_Start = Date_CVS_TCD
registry_df.loc[(registry_df['Date_CVS_Start'].isnull()) & (
    registry_df['Date_CVS_TCD'].notnull()), 'Date_CVS_Start'] = registry_df['Date_CVS_TCD']

# patients with Date_CVS_Start not na but with  but CVS_YN = 0, in this case we should set CVS_YN = 1
registry_df.loc[(registry_df['CVS_YN'] == 0) & (
    registry_df['Date_CVS_Start'].apply(safe_conversion_to_datetime).notnull()), 'CVS_YN'] = 1

In [None]:
# if Date_Ictus is nan, set it to Date_admission
registry_df.loc[registry_df['Date_Ictus'].isnull(), 'Date_Ictus'] = registry_df['Date_admission']

In [None]:
ct_timings_df = ct_timings_df.merge(registry_pdms_correspondence_df, on='pNr', how='left')
ct_timings_df.rename(columns={'JoinedName': 'Name'}, inplace=True)
ct_timings_df = ct_timings_df.merge(registry_df[['SOS-CENTER-YEAR-NO.','Name', 'Date_birth', 'Date_admission', 'Date_Ictus', 'CVS_YN']], on=['SOS-CENTER-YEAR-NO.','Name', 'Date_birth'], how='left')

In [None]:
# for each image check if it is the first image of DCI ischemia
registry_df['ct_date'] = registry_df['Date_DCI_ischemia_first_image'].apply(safe_conversion_to_datetime)
ct_timings_df['ct_date'] = ct_timings_df['timeAktion'].apply(safe_conversion_to_datetime).dt.date.apply(safe_conversion_to_datetime)
ct_timings_df = ct_timings_df.merge(registry_df[['SOS-CENTER-YEAR-NO.','Name', 'Date_birth', 'ct_date', 'DCI_ischemia',]], on=['SOS-CENTER-YEAR-NO.','Name', 'Date_birth', 'ct_date'], how='left')
ct_timings_df['DCI_ischemia'] = ct_timings_df['DCI_ischemia'].fillna(0).astype(int)
registry_df.drop(columns=['ct_date'], inplace=True)
ct_timings_df.drop(columns=['ct_date'], inplace=True)

Limit analysis to after 2019 (start of PDMS data collection)

In [None]:
registry_after_2019_df = registry_df[registry_df['Date_admission'] >= '2019-01-01']
ct_timings_after_2019_df = ct_timings_df[ct_timings_df['Date_admission'] >= '2019-01-01']

In [None]:
n_patients = registry_after_2019_df['Name'].nunique()
n_cts = ct_timings_after_2019_df.shape[0]

print('Number of patients in registry after 2019: {}'.format(n_patients))
print('Number of CTs after 2019: {}'.format(n_cts))

In [None]:
n_dci_ischemia = registry_after_2019_df['DCI_ischemia'].sum()
n_dci_infarct = registry_after_2019_df['DCI_infarct'].sum()
n_cvs = registry_after_2019_df['CVS_YN'].sum()

print('Number of patients with DCI ischemia: {}'.format(n_dci_ischemia))
print('Number of patients with DCI infarct: {}'.format(n_dci_infarct))
print('Number of patients with CVS: {}'.format(n_cvs))

#### Compute timings

In [None]:
# add Date_DCI_ischemia_first_image and Time_DCI_ischemia_first_image to get the full date
registry_after_2019_df['full_date_dci_ischemia'] = registry_after_2019_df['Date_DCI_ischemia_first_image'].astype(str) + ' ' + registry_after_2019_df['Time_DCI_ischemia_first_image'].astype(str)
# replace NaT nan with nan
registry_after_2019_df['full_date_dci_ischemia'] = registry_after_2019_df['full_date_dci_ischemia'].replace('NaT nan', pd.NaT)
registry_after_2019_df['full_date_dci_ischemia'] = registry_after_2019_df['full_date_dci_ischemia'].apply(safe_conversion_to_datetime)

registry_after_2019_df['full_date_dci_infarction'] = registry_after_2019_df['Date_DCI_infarct_first_image'].astype(str) + ' ' + registry_after_2019_df['Time_DCI_infarct_first_image'].astype(str)
# replace NaT nan with nan
registry_after_2019_df['full_date_dci_infarction'] = registry_after_2019_df['full_date_dci_infarction'].replace('NaT nan', pd.NaT)
registry_after_2019_df['full_date_dci_infarction'] = registry_after_2019_df['full_date_dci_infarction'].apply(safe_conversion_to_datetime)

# ensure number of nans in full_date_dci_ischemia and Date_DCI_ischemia_first_image are the same
assert registry_after_2019_df['full_date_dci_ischemia'].isnull().sum() == registry_after_2019_df['Date_DCI_ischemia_first_image'].isnull().sum()
# ensure number of nans in full_date_dci_infarction and Date_DCI_infarct_first_image are the same
assert registry_after_2019_df['full_date_dci_infarction'].isnull().sum() == registry_after_2019_df['Date_DCI_infarct_first_image'].isnull().sum()

In [None]:
# compute time to CVS, DCI ischemia and DCI infarction
registry_after_2019_df['time_to_cvs'] = registry_after_2019_df['Date_CVS_Start'].apply(safe_conversion_to_datetime) - registry_after_2019_df['Date_Ictus'].apply(safe_conversion_to_datetime)

registry_after_2019_df['time_to_dci_ischemia'] = registry_after_2019_df['full_date_dci_ischemia'] - registry_after_2019_df['Date_Ictus'].apply(safe_conversion_to_datetime)
registry_after_2019_df['time_to_dci_infarction'] = registry_after_2019_df['full_date_dci_infarction'] - registry_after_2019_df['Date_Ictus'].apply(safe_conversion_to_datetime)

In [None]:
# compute ct timings
ct_timings_after_2019_df['time_to_ct'] = ct_timings_after_2019_df['timeAktion'].apply(safe_conversion_to_datetime) - ct_timings_after_2019_df['Date_Ictus'].apply(safe_conversion_to_datetime)

In [None]:
# check if any negative timings
print('Number of negative time_to_cvs: {}'.format((registry_after_2019_df['time_to_cvs'] < pd.Timedelta(0)).sum()))
print('Number of negative time_to_dci_ischemia: {}'.format((registry_after_2019_df['time_to_dci_ischemia'] < pd.Timedelta(0)).sum()))
print('Number of negative time_to_dci_infarction: {}'.format((registry_after_2019_df['time_to_dci_infarction'] < pd.Timedelta(0)).sum()))
print('Number of negative time_to_ct: {}'.format((ct_timings_after_2019_df['time_to_ct'] < pd.Timedelta(0)).sum()))

# filter out negative times
registry_after_2019_df.loc[registry_after_2019_df['time_to_cvs'] < pd.Timedelta(0), 'time_to_cvs'] = pd.NaT
registry_after_2019_df.loc[registry_after_2019_df['time_to_dci_ischemia'] < pd.Timedelta(0), 'time_to_dci_ischemia'] = pd.NaT
registry_after_2019_df.loc[registry_after_2019_df['time_to_dci_infarction'] < pd.Timedelta(0), 'time_to_dci_infarction'] = pd.NaT
ct_timings_after_2019_df.loc[ct_timings_after_2019_df['time_to_ct'] < pd.Timedelta(0), 'time_to_ct'] = pd.NaT

In [None]:
restrict_ct_timings_to_dci = False
restrict_ct_timings_to_cvs = True
if restrict_ct_timings_to_cvs:
    ct_timings_after_2019_df = ct_timings_after_2019_df[ct_timings_after_2019_df['CVS_YN'] == 1]
if restrict_ct_timings_to_dci:
    ct_timings_after_2019_df = ct_timings_after_2019_df[ct_timings_after_2019_df['DCI_ischemia'] == 1]

## Evaluate time to CVS and DCI 

In [None]:
registry_after_2019_df['time_to_cvs_days'] = registry_after_2019_df['time_to_cvs'].dt.total_seconds() / (60*60*24)
registry_after_2019_df['time_to_dci_ischemia_days'] = registry_after_2019_df['time_to_dci_ischemia'].dt.total_seconds() / (60*60*24)
registry_after_2019_df['time_to_dci_infarction_days'] = registry_after_2019_df['time_to_dci_infarction'].dt.total_seconds() / (60*60*24)
ct_timings_after_2019_df['time_to_ct_days'] = ct_timings_after_2019_df['time_to_ct'].dt.total_seconds() / (60*60*24)

In [None]:
registry_after_2019_df['time_to_cvs'].describe()

In [None]:
registry_after_2019_df['time_to_dci_ischemia'].describe()

In [None]:
registry_after_2019_df['time_to_dci_infarction'].describe()

In [None]:
ct_timings_after_2019_df['time_to_ct'].describe()

In [None]:
n_bins = 15

In [None]:
# bar plot with days on x axis and number of patients on y axis
g = sns.displot(data=registry_after_2019_df, x='time_to_cvs_days', hue='DCI_YN', multiple='stack', bins=n_bins, palette='viridis', kde=True, alpha=0.6)
ax = g.ax
ax.set_xlabel('Days')
ax.set_ylabel('Number of patients')
ax.set_title('Time to first detected vasospasm')

In [None]:
# bar plot with days on x axis and number of patients on y axis
g = sns.displot(data=registry_after_2019_df, x='time_to_dci_ischemia_days', multiple='stack', bins=n_bins, hue='DCI_infarct', palette='magma', alpha=0.6, kde=True)
ax = g.ax

ax.set_xlabel('Days')
ax.set_ylabel('Number of patients')
ax.set_title('Time to first detected DCI (ischemia)')

In [None]:
# bar plot with days on x axis and number of patients on y axis
g = sns.displot(data=registry_after_2019_df, x='time_to_dci_infarction_days', multiple='stack', bins=n_bins, hue='CVS_YN', palette='ocean', alpha=0.6, kde=True)
ax = g.ax

ax.set_xlabel('Days')
ax.set_ylabel('Number of patients')
ax.set_title('Time to first detected DCI (infarction)')

In [None]:
# bar plot with days on x axis and number of cts on y axis
palette = sns.color_palette(['lightgrey', 'black'])
g = sns.displot(data=ct_timings_after_2019_df, x='time_to_ct_days', bins=50, kde=True, alpha=0.3, hue='DCI_ischemia', palette=palette)
ax = g.ax

ax.set_xlabel('Days')
ax.set_ylabel('Number of CTs')
ax.set_title('Time to CT')

In [None]:
# overlay the distributions
common_norm = False

ax = sns.kdeplot(data=ct_timings_after_2019_df, x='time_to_ct_days', color='black', alpha=0.1, fill=True, common_norm=common_norm, label='CT')
sns.kdeplot(data=registry_after_2019_df, x='time_to_cvs_days', color='turquoise', alpha=0.2, fill=True, common_norm=common_norm, label='CVS')
sns.kdeplot(data=registry_after_2019_df, x='time_to_dci_ischemia_days', color='magenta', alpha=0.2, fill=True, common_norm=common_norm, label='DCI (ischemia)')
sns.kdeplot(data=registry_after_2019_df, x='time_to_dci_infarction_days', color='blue', alpha=0.2, fill=True, common_norm=common_norm, label='DCI (infarction)')

ax.set_xlabel('Days')
ax.set_ylabel('Density')
ax.set_title('Time to first detected CVS and DCI')

# add legend
ax.legend()


In [None]:
n_bins = 20
alpha = 0.35
overlay_kde = True
range = (0, 15)

ax1 = sns.histplot(data=ct_timings_after_2019_df, x='time_to_ct_days', color='lightgrey', alpha=alpha, bins=n_bins, label='CT', kde=overlay_kde, binrange=range)

ax2 = ax1.twinx()
sns.histplot(data=registry_after_2019_df, x='time_to_cvs_days', color='turquoise', alpha=alpha, bins=n_bins, label='CVS', kde=overlay_kde, ax=ax2, binrange=range)
sns.histplot(data=registry_after_2019_df, x='time_to_dci_ischemia_days', color='magenta', alpha=alpha, bins=n_bins, label='DCI (ischemia)', kde=overlay_kde, ax=ax2, binrange=range)
sns.histplot(data=registry_after_2019_df, x='time_to_dci_infarction_days', color='blue', alpha=alpha, bins=n_bins, label='DCI (infarction)', kde=overlay_kde, ax=ax2, binrange=range)

ax1.set_xlabel('Days')
ax1.set_ylabel('Number of CTs')
ax2.set_ylabel('Number of events')
ax2.set_title('Time to first detected CVS and DCI')

# add legend
ax2.legend()
# add legend for CT into legend of ax2
handles, labels = ax1.get_legend_handles_labels()
handles2, labels2 = ax2.get_legend_handles_labels()
ax2.legend(handles + handles2, labels + labels2)

# ax1 remove horizontal grid
ax1.yaxis.grid(False)

ax1.set_xlim(0, range[1])

In [None]:
# plot box plots for time to CVS, DCI ischemia and DCI infarction
palette = sns.color_palette(['turquoise', 'magenta', 'blue'])

ax = sns.boxplot(data=registry_after_2019_df[['time_to_cvs_days', 'time_to_dci_ischemia_days', 'time_to_dci_infarction_days']], palette=palette, boxprops=dict(alpha=.6))
ax.set_xticklabels(['CVS', 'DCI (ischemia)', 'DCI (infarction)'])
ax.set_ylabel('Days')
ax.set_title('Time to first detected CVS and DCI')


## Test for bimodality

dip test: measures multimodality in a sample by the maximum difference, over all sample points, between the empirical distribution function, and the unimodal distribution function that minimizes that maximum difference.

In [None]:
import diptest

In [None]:
dip, pval = diptest.diptest(registry_after_2019_df['time_to_dci_ischemia_days'].dropna().values)
dip, pval

In [None]:
dip, pval = diptest.diptest(registry_after_2019_df['time_to_dci_infarction_days'].dropna().values)
dip, pval

In [None]:
dip, pval = diptest.diptest(registry_after_2019_df['time_to_cvs_days'].dropna().values)
dip, pval

In [None]:
dip, pval = diptest.diptest(ct_timings_after_2019_df['time_to_ct_days'].dropna().values)
dip, pval