In [None]:
import pandas as pd
import numpy as np
import getpass
import io
import msoffcrypto
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
sah_sos_data_path = '/Users/jk1/Library/CloudStorage/OneDrive-unige.ch/icu_research/dci_sah/data/sos_sah_data/aSAH_DATA_2009_2023_24122023.xlsx'
abg_data_path = '/Users/jk1/Library/CloudStorage/OneDrive-unige.ch/icu_research/dci_sah/data/pdms_data/Transfer Urs.pietsch@kssg.ch 22.01.24, 15_34/20240116_SAH_SOS_BGA.csv'
patient_id_link_data_path = '/Users/jk1/Library/CloudStorage/OneDrive-unige.ch/icu_research/dci_sah/data/pdms_data/Transfer Urs.pietsch@kssg.ch 22.01.24, 15_34/20240116_SAH_SOS_Patienten.csv'

In [None]:
password = getpass.getpass()

In [None]:
decrypted_workbook = io.BytesIO()
with open(sah_sos_data_path, 'rb') as file:
    office_file = msoffcrypto.OfficeFile(file)
    office_file.load_key(password=password)
    office_file.decrypt(decrypted_workbook)

In [None]:
registry_df = pd.read_excel(decrypted_workbook, sheet_name='DATA')
registry_df.head()

In [None]:
abg_df = pd.read_csv(abg_data_path, sep=';', decimal='.')
patient_id_link_df = pd.read_csv(patient_id_link_data_path, sep=';', decimal='.')

In [None]:
abg_df.head()

In [None]:
patient_id_link_df.head()

# Joining registry and pdms data

In [None]:
registry_df['Date_birth'] = pd.to_datetime(registry_df['Date_birth'], format='%d.%m.%Y')
registry_df['Date_admission'] = pd.to_datetime(registry_df['Date_admission'], format='%d.%m.%Y')

In [None]:
registry_df.rename(columns={'Name': 'JoinedName'}, inplace=True)

In [None]:
patient_id_link_df['GebDatum'] = pd.to_datetime(patient_id_link_df['GebDatum'], format='%Y-%m-%d')
patient_id_link_df['Eintritt'] = pd.to_datetime(patient_id_link_df['Eintritt'], format='%Y-%m-%d')

In [None]:
# join patient id link on registry by (first: dob, then admission date)
joined_registry_df = registry_df.merge(patient_id_link_df, how='left', left_on=['Date_birth', 'Date_admission'], right_on=['GebDatum', 'Eintritt'])

In [None]:
joined_registry_df[['JoinedName','Name', 'Vorname', 'Date_birth', 'Date_admission', 'GebDatum', 'Eintritt']].head()

In [None]:
# check that joinedName contains Name if name is not null
joined_registry_df['name_verification'] = joined_registry_df.apply(lambda x: x['Name'] in x['JoinedName'] if pd.notnull(x['Name']) else True, axis=1)

Proceed with manual verification of the remaining patients with non matching names

In [None]:
joined_registry_df[joined_registry_df.name_verification == False][['JoinedName','Name', 'Vorname', 'name_verification', 'Date_birth', 'Date_admission', 'GebDatum', 'Eintritt']]

## Analyse Sodium values

In [None]:
dci_df = joined_registry_df[['pNr', 'DCI_YN', 'Date_admission']]

In [None]:
abg_df = abg_df.merge(dci_df, how='left', left_on='pNr', right_on='pNr')

build relative time from admission column

original BGA time format: 2015-01-12 17:55:00.000

In [None]:
abg_df['timeBGA'] = pd.to_datetime(abg_df['timeBGA'], format='%Y-%m-%d %H:%M:%S.%f')

In [None]:
abg_df['relative_time'] = abg_df.apply(lambda x: (x['timeBGA'] - x['Date_admission']).total_seconds() / 3600, axis=1)

plot sodium values (na column) over time within the two groups

In [None]:
sns.scatterplot(data=abg_df, x='relative_time', y='na', hue='DCI_YN')

plot mean daily sodium values (na column) over time within the two groups

In [None]:
abg_df['day'] = abg_df['relative_time'].apply(lambda x: int(x / 24) if pd.notnull(x) else np.nan)

# set figure size
plt.figure(figsize=(20, 10))

sns.boxplot(data=abg_df, x='day', y='na', hue='DCI_YN', showfliers=False, palette='pastel')

# set y axis label 
plt.ylabel('Sodium (mmol/L)')
# set x axis label
plt.xlabel('Days since admission')

# save figure
plt.savefig('/Users/jk1/Downloads/sodium_over_time.png', dpi=300)