In [None]:
from getpass import getpass
from tqdm import tqdm
import pandas as pd
from utils.utils import load_encrypted_xlsx
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt

In [None]:
registry_data_path = '/Users/jk1/Library/CloudStorage/OneDrive-unige.ch/icu_research/dci_sah/data/sos_sah_data/post_hoc_modified_aSAH_DATA_2009_2023_24122023.xlsx'
hta_data_path = '/Users/jk1/Library/CloudStorage/OneDrive-unige.ch/icu_research/dci_sah/data/pdms_data/Transfer Urs.pietsch@kssg.ch 22.01.24, 15_34/20240116_SAH_SOS_Blutdruecke.csv'
registry_pdms_correspondence_path = '/Users/jk1/Library/CloudStorage/OneDrive-unige.ch/icu_research/dci_sah/data/pdms_data/registry_pdms_correspondence.csv'
noradrenaline_data_path = '/Users/jk1/Library/CloudStorage/OneDrive-unige.ch/icu_research/dci_sah/data/pdms_data/Transfer Urs.pietsch@kssg.ch 22.01.24, 15_34/20240116_SAH_SOS_EinzelGabeNoradrSpritzenpumpe.csv'

In [None]:
filter_before_2019 = True

In [None]:
registry_df = load_encrypted_xlsx(registry_data_path)
hta_df = pd.read_csv(hta_data_path, sep=';', decimal='.')
registry_pdms_correspondence_df = pd.read_csv(registry_pdms_correspondence_path)

In [None]:
number_of_patients = hta_df['pNr'].nunique()
print(f'Number of patients in HTA data: {number_of_patients}')

In [None]:
hta_df.head()

In [None]:
hta_df.timeBd.min()

Join data

In [None]:
registry_pdms_correspondence_df.rename(columns={'JoinedName': 'Name'}, inplace=True)
registry_pdms_correspondence_df['Date_birth'] = pd.to_datetime(registry_pdms_correspondence_df['Date_birth'],
                                                                   format='%Y-%m-%d')
hta_df = hta_df.merge(registry_pdms_correspondence_df, on='pNr', how='left')
registry_df = registry_df.merge(registry_pdms_correspondence_df, on=['SOS-CENTER-YEAR-NO.', 'Name', 'Date_birth'],
                                how='left')
number_of_patients = hta_df['pNr'].nunique()
print(f'Number of patients in HTA data: {number_of_patients}')

In [None]:
dci_df = registry_df[['pNr', 'DCI_ischemia', 'Date_admission']]
hta_df = hta_df.merge(dci_df, how='left', left_on='pNr', right_on='pNr')
number_of_patients = hta_df['pNr'].nunique()
print(f'Number of patients in HTA data: {number_of_patients}')

In [None]:
if filter_before_2019:
    hta_df = hta_df[hta_df['Date_admission'] >= pd.to_datetime('2019-01-01')]
    number_of_patients = hta_df['pNr'].nunique()
    print(f'Number of patients in HTA data: {number_of_patients}')

In [None]:
# check if all values in systole, diastole, mitteldruck are numeric
assert hta_df['systole'].apply(lambda x: pd.to_numeric(x, errors='coerce')).isnull().sum() == 0
assert hta_df['diastole'].apply(lambda x: pd.to_numeric(x, errors='coerce')).isnull().sum() == 0
assert hta_df['mitteldruck'].apply(lambda x: pd.to_numeric(x, errors='coerce')).isnull().sum() == 0

In [None]:
hta_df['timeBd'] = pd.to_datetime(hta_df['timeBd'], format='%Y-%m-%d %H:%M:%S.%f')
hta_df['relative_time'] = hta_df.apply(lambda x: (x['timeBd'] - x['Date_admission']).total_seconds() / 3600, axis=1)

In [None]:
# sns.scatterplot(data=hta_df, x='relative_time', y='mitteldruck', hue='DCI_ischemia')

In [None]:
hta_df['day'] = hta_df['relative_time'].apply(lambda x: int(x / 24) if pd.notnull(x) else np.nan)

In [None]:
# set figure size
plt.figure(figsize=(20, 10))

sns.boxplot(data=hta_df, x='day', y='mitteldruck', hue='DCI_ischemia', showfliers=False, palette='pastel')

# set y axis label 
plt.ylabel('MAP (mmHg)')
# set x axis label
plt.xlabel('Days since admission')

Censor data with concomitant noradrenaline

In [None]:
nor_df = pd.read_csv(noradrenaline_data_path, sep=';', decimal='.')

In [None]:
nor_df.head()

In [None]:
nor_df['Start'] = pd.to_datetime(nor_df['Start'], format='%Y-%m-%d %H:%M:%S.%f')
nor_df['Ende'] = pd.to_datetime(nor_df['Ende'], format='%Y-%m-%d %H:%M:%S.%f')

In [None]:
if filter_before_2019:
    nor_df = nor_df[nor_df['Start'] >= pd.to_datetime('2019-01-01')]

In [None]:
nor_df[(nor_df.pNr == 64354)]

In [None]:
hta_df[(hta_df.pNr == 64354)]

In [None]:
hta_df[(hta_df.pNr == 64354) & (hta_df.timeBd >= pd.to_datetime('2019-03-18 08:30:48.470')) & (hta_df.timeBd <= pd.to_datetime('2019-03-18 09:01:59.813'))]

In [None]:
# produce a filtered hta_df where for each pNr with timeBd between Start and Ende of a noradrenaline administration are dropped
filtered_hta_df = hta_df.copy()

for index, row in tqdm(nor_df.iterrows(), total=nor_df.shape[0]):
    pNr = row['pNr']
    start = row['Start']
    end = row['Ende']
    filtered_hta_df = filtered_hta_df[~((filtered_hta_df['pNr'] == pNr) & (filtered_hta_df['timeBd'] >= start) & (filtered_hta_df['timeBd'] <= end))]
    


In [None]:
filtered_hta_df[(filtered_hta_df.pNr == 64354) & (filtered_hta_df.timeBd >= pd.to_datetime('2019-03-18 08:30:48.470')) & (filtered_hta_df.timeBd <= pd.to_datetime('2019-03-18 09:01:59.813'))]

In [None]:
# set figure size
plt.figure(figsize=(20, 10))

sns.boxplot(data=filtered_hta_df, x='day', y='mitteldruck', hue='DCI_ischemia', showfliers=False, palette='pastel')

# set y axis label 
plt.ylabel('MAP (mmHg)')
# set x axis label
plt.xlabel('Days since admission')

In [None]:
fig, ax = plt.subplots(2, 2, figsize=(15, 10))
sns.histplot(hta_df['systole'], ax=ax[0, 0])
sns.histplot(hta_df['diastole'], ax=ax[0, 1])
sns.histplot(hta_df['mitteldruck'], ax=ax[1, 0])

-> values should be filtered to min /max to remove errors