In [None]:
import os
import numpy as np
import pandas as pd
from preprocessing.vitals_preprocessing.vitals_preprocessing import preprocess_vitals
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
data_path = '/Users/jk1/stroke_datasets/stroke_unit_dataset/per_value/Extraction_20211110'
vitals_file_start = 'patientvalue'
vitals_files = [pd.read_csv(os.path.join(data_path, f), delimiter=';', encoding='utf-8', dtype=str)
             for f in os.listdir(data_path)
             if f.startswith(vitals_file_start)]
vitals_df = pd.concat(vitals_files, ignore_index=True)

In [None]:
vitals_df = preprocess_vitals(vitals_df, verbose=True)

In [None]:
vitals_df.head()

In [None]:
vitals_df.groupby('vital_name')['vital_value'].describe()


In [None]:
vitals_df['datetime'] = pd.to_datetime(vitals_df['datetime'], format='%d.%m.%Y %H:%M')
# find first sample date for each patient admission id
first_sample_dates_df = vitals_df.groupby('case_admission_id')['datetime'].min()
first_sample_dates_df.head(2)

In [None]:
vitals_df_with_rel_dates_df = vitals_df.join(first_sample_dates_df, on='case_admission_id', rsuffix='_first').copy()

In [None]:
vitals_df_with_rel_dates_df['relative_sample_date'] = \
    (pd.to_datetime(vitals_df_with_rel_dates_df['datetime'], format='%d.%m.%Y %H:%M')
     - pd.to_datetime(vitals_df_with_rel_dates_df['datetime_first'], format='%d.%m.%Y %H:%M'))\
        .dt.total_seconds() / (60*60)

In [None]:
# get random id from all patient admission ids
pa_id = np.random.choice(vitals_df_with_rel_dates_df['case_admission_id'].unique())
vital_name = 'respiratory_rate'
temp = vitals_df_with_rel_dates_df[(vitals_df_with_rel_dates_df['case_admission_id'] == pa_id)
                                          & (vitals_df_with_rel_dates_df['vital_name'].isin([vital_name]))].copy()
ax = sns.scatterplot(x='relative_sample_date', y='vital_value', data=temp, hue='vital_value', legend=False)
ax.set_xlabel('Hours from admission')
ax.set_ylabel(vital_name)
ax.tick_params(axis="x", rotation=45)

plt.show()

In [None]:
g = sns.relplot(x='relative_sample_date', y='vital_value', col='vital_name', col_wrap=4,
                data=vitals_df_with_rel_dates_df, hue='vital_name', legend=False, alpha=0.05, s=5,
            facet_kws=dict(sharey=False))
g.set(xlim=(0, 350), xlabel='Hours from admission', ylabel='Vital value')
plt.show()

In [None]:
for vital_name in vitals_df_with_rel_dates_df['vital_name'].unique():
    vital_df = vitals_df_with_rel_dates_df[vitals_df_with_rel_dates_df['vital_name'] == vital_name]
    g = sns.displot(x="vital_value", data=vital_df, kde=True, legend=False)
    g.ax.set_title(vital_name)
    g.ax.set_xlabel(vital_name)
    plt.show()