In [None]:
import seaborn as sns
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt

In [None]:
registry_path = '/Users/jk1/OneDrive - unige.ch/stroke_research/geneva_stroke_unit_dataset/data/stroke_registry/post_hoc_modified/stroke_registry_post_hoc_modified.xlsx'
output_path = '/Users/jk1/temp/opsum_output'

In [None]:
patient_selection_path = '/Users/jk1/temp/opsum_extraction_output/high_frequency_data_patient_selection_with_details.csv'

In [None]:
patient_selection_df = pd.read_csv(patient_selection_path, dtype = str)

In [None]:
stroke_df = pd.read_excel(registry_path)


In [None]:
from preprocessing.utils import create_registry_case_identification_column

patient_selection_df['case_admission_id'] = create_registry_case_identification_column(patient_selection_df)
stroke_df['case_admission_id'] = create_registry_case_identification_column(stroke_df)


In [None]:
from preprocessing.patient_selection.restrict_to_patient_selection import restrict_to_patient_selection

restricted_stroke_registry_df = restrict_to_patient_selection(stroke_df, patient_selection_path, restrict_to_event_period=False,
                                                                  verbose=True)

In [None]:
patient_selection_df['case_id'] = patient_selection_df['patient_id'].astype(str) + patient_selection_df['EDS_last_4_digits'].astype(str)

In [None]:
# if death in hospital, set mRs to 6
restricted_stroke_registry_df.loc[restricted_stroke_registry_df['Death in hospital'] == 'yes', '3M mRS'] = 6
# if 3M Death and 3M mRS nan, set mrs to 6
restricted_stroke_registry_df.loc[(restricted_stroke_registry_df['3M Death'] == 'yes') &
                                    (restricted_stroke_registry_df['3M mRS'].isna()), '3M mRS'] = 6

In [None]:
restricted_stroke_registry_df['3M delta mRS'] = restricted_stroke_registry_df['3M mRS'] - restricted_stroke_registry_df[
        'Prestroke disability (Rankin)']

In [None]:
# if death in hospital set 3M Death to yes
restricted_stroke_registry_df.loc[restricted_stroke_registry_df['Death in hospital'] == 'yes', '3M Death'] = 'yes'
# if 3M mRs == 6, set 3M Death to yes
restricted_stroke_registry_df.loc[restricted_stroke_registry_df['3M mRS'] == 6, '3M Death'] = 'yes'
# if 3M mRs not nan and not 6, set 3M Death to no
restricted_stroke_registry_df.loc[(restricted_stroke_registry_df['3M mRS'] != 6) &
                                  (~restricted_stroke_registry_df['3M mRS'].isna())
                                &(restricted_stroke_registry_df['3M Death'].isna()), '3M Death'] = 'no'

In [None]:
restricted_stroke_registry_df.loc[restricted_stroke_registry_df['3M Death'] == 'yes', '3M Death'] = 1
restricted_stroke_registry_df.loc[restricted_stroke_registry_df['3M Death'] == 'no', '3M Death'] = 0

In [None]:
restricted_stroke_registry_df['Death in hospital'].isnull().sum()

In [None]:
restricted_stroke_registry_df

In [None]:
outcome_columns = ["Symptomatic ICH",
"Symptomatic ICH date",
"Recurrent stroke",
"Recurrent stroke date",
"Orolingual angioedema",
"Death in hospital",
"Death at hospital date",
"Death at hospital time",
"Death at hospital cause",
"Epileptic seizure in hospital",
"Epileptic seizure in hospital date",
"Decompr. craniectomy",
"Decompr. craniectomy date",
"CEA",
"CEA date",
"CAS",
"CAS date",
"Other endovascular revascularization",
"Other surgical revascularization",
"Other surgical revascularization date",
"Other surgical revascularization spec",
"PFO closure",
"PFO closure date",
"Discharge destination",
"Discharge date",
"Discharge time",
"Duration of hospital stay (days)",
"3M date",
"3M mode",
"3M mRS",
"3M NIHSS","3M Stroke",
"3M Stroke date",
"3M ICH", '3M ICH date', '3M Death', '3M Death date', '3M Death cause',
       '3M Epileptic seizure', '3M Epileptic seizure date', '3M delta mRS']

In [None]:
treatment_columns = [
    "IVT with rtPA",
"IVT start date",
"IVT start time",
"Total rtPA dose",
"Onset to treatment (min.)",
"Door to treatment (min.)",
"IAT",
"Date of groin puncture",
"Time of groin puncture",
"Onset to groin puncture (min.)",
"Door to groin puncture (min.)",
"IAT end date",
"IAT end time",
"Anesthesia",
"IAT rtPA",
"IAT rtPA dose",
"IAT urokinase",
"IAT urokinase dose",
"IAT mech. treatment",
"IAT stent retriever",
"IAT aspiration",
"IAT distal retriever",
"IAT balloon",
"IAT intracranial stent",
"IAT extracranial stent",
"IAT other mechanical",
]

In [None]:
imaging_columns = [
    "1st brain imaging type",
"1st brain imaging date",
"1st brain imaging time",
"Door to image (min.)",
"1st brain imaging result",
"Acute perf. imaging type",
"Acute perf. imaging result",
"1st vascular imaging type",
"1st vascular imaging result",
    "FU brain imaging",
"FU brain imaging result",
"MCA",
"ACA",
"PCA",
"Vertebro-basilar",
"Ocular",
    "Etiology TOAST",
"Etiology TOAST other",
"Etiology",
"Etiology other",
]

In [None]:
outcome_df = selected_full_data_df[["case_admission_id"]+ outcome_columns]

In [None]:
outcome_df['3M mRS'].describe()

In [None]:
outcome_df['3M mRS'].plot(kind='hist', title='3M mRS', bins=50)
plt.show()

In [None]:
(outcome_df['3M mRS'] <= 1).value_counts()

In [None]:
outcome_df.loc[outcome_df['3M delta mRS']<0, '3M delta mRS'] = 0
outcome_df['3M delta mRS'].describe()

In [None]:
outcome_df['3M delta mRS'].plot(kind='hist', title='3M delta mRS', bins=50)
plt.show()

In [None]:
(outcome_df['3M delta mRS'] <= 1).value_counts()

In [None]:
outcome_df['3M NIHSS'].plot(kind='hist', title='3M NIHSS', bins=50)
plt.show()

In [None]:
outcome_df['3M NIHSS'].describe()

In [None]:
outcome_df['3M Death'].value_counts()

In [None]:
outcome_df['3M Stroke'].value_counts()

In [None]:
outcome_df['3M Epileptic seizure'].value_counts()

In [None]:
outcome_df['3M ICH'].value_counts()

In [None]:
outcome_df['Symptomatic ICH'].value_counts()

In [None]:
outcome_df['Recurrent stroke'].value_counts()

In [None]:
outcome_df['Duration of hospital stay (days)'].describe()

In [None]:
outcome_df.loc[outcome_df['Duration of hospital stay (days)']>365, 'Duration of hospital stay (days)'] = np.nan
outcome_df['Duration of hospital stay (days)'].plot.hist(bins=50)
plt.show()

# Extracting early neurologic deterioriation
Rise in in NIHSS of min 2 or 4 points in the first 72h

In [None]:
from preprocessing.scales_preprocessing.scales_preprocessing import preprocess_scales

data_path = '/Users/jk1/stroke_datasets/stroke_unit_dataset/per_value/Extraction_20211110'
scales_file_start = 'scale'
scales_files = [pd.read_csv(os.path.join(data_path, f), delimiter=';', encoding='utf-8')
                for f in os.listdir(data_path)
                if f.startswith(scales_file_start)]
scales_df = pd.concat(scales_files, ignore_index=True)

scales_df = preprocess_scales(scales_df)
scales_df.head()

In [None]:
scales_df['event_date'] = pd.to_datetime(scales_df['event_date'], format='%d.%m.%Y %H:%M')
# find first sample date for each patient admission id
first_sample_dates_df = scales_df.groupby('case_admission_id')['event_date'].min()
scales_df_with_rel_dates_df = scales_df.join(first_sample_dates_df, on='case_admission_id',
                                             rsuffix='_first').copy()
scales_df_with_rel_dates_df['relative_sample_date'] = (pd.to_datetime(scales_df_with_rel_dates_df['event_date'],
                                                                      format='%d.%m.%Y %H:%M') - pd.to_datetime(
    scales_df_with_rel_dates_df['event_date_first'], format='%d.%m.%Y %H:%M')).dt.total_seconds() / (60 * 60)

In [None]:
NIHSS_scale_df = scales_df_with_rel_dates_df[scales_df_with_rel_dates_df['scale'] == 'NIHSS'].copy()

In [None]:
early_neurologic_deterioriation_df = selected_full_data_df[["case_admission_id", 'NIH on admission']]

In [None]:
early_neurologic_deterioriation_df = early_neurologic_deterioriation_df.merge(NIHSS_scale_df, on='case_admission_id', how='left')

In [None]:
early_neurologic_deterioriation_df['delta_NIHSS'] =  early_neurologic_deterioriation_df['score'] - early_neurologic_deterioriation_df['NIH on admission']
early_neurologic_deterioriation_df.head()

In [None]:
early_neurologic_deterioriation_df['early_neurologic_deterioration_delta2'] = ((early_neurologic_deterioriation_df['delta_NIHSS'] >= 2) & (early_neurologic_deterioriation_df['relative_sample_date'] < 72))

In [None]:
early_neurologic_deterioriation_df['early_neurologic_deterioration_delta4'] = ((early_neurologic_deterioriation_df['delta_NIHSS'] >= 4) & (early_neurologic_deterioriation_df['relative_sample_date'] < 72))

In [None]:
# count the number of case_admission_ids with early neurologic deterioration
early_neurologic_deterioriation_df.groupby('case_admission_id').apply(lambda x: x['early_neurologic_deterioration_delta2'].any()).value_counts()

In [None]:
early_neurologic_deterioriation_df.groupby('case_admission_id').apply(lambda x: x['early_neurologic_deterioration_delta4'].any()).value_counts()

In [None]:
ax = sns.scatterplot(x='relative_sample_date', y='delta_NIHSS', hue='early_neurologic_deterioration_delta2',
                     data=early_neurologic_deterioriation_df, alpha=0.1, legend=False)
ax.set_xlim(0, 72)
plt.show()

In [None]:
pa_id = np.random.choice(early_neurologic_deterioriation_df['case_admission_id'].unique())
temp = early_neurologic_deterioriation_df[(early_neurologic_deterioriation_df['case_admission_id'] == pa_id)].copy()
ax = sns.scatterplot(x='relative_sample_date', y='score', data=temp, hue='early_neurologic_deterioration_delta2', legend=True)
ax.set_xlabel('Hours from admission')
ax.set_ylabel('NIHSS')
ax.set_title('Example of NIHSS progression for patient admission id: ' + str(pa_id))
ax.tick_params(axis="x", rotation=45)
plt.legend(bbox_to_anchor=(1.02, 1), loc='upper left', borderaxespad=0)

print('Admission NIHSS', early_neurologic_deterioriation_df[early_neurologic_deterioriation_df['case_admission_id'] == pa_id]['NIH on admission'].values[0])
plt.show()

Remarks:
- Early neurologic deterioration has to be clearly defined
   - Persistent increase? Transient increase?
