In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import os
import numpy as np
import seaborn as sns
from preprocessing.ventilation_preprocessing.ventilation_preprocessing import preprocess_ventilation


In [None]:
data_path = '/Users/jk1/stroke_datasets/stroke_unit_dataset/per_value/Extraction_20211110'
ventilation_file_start = 'ventilation'
ventilation_files = [pd.read_csv(os.path.join(data_path, f), delimiter=';', encoding='utf-8', dtype=str)
                     for f in os.listdir(data_path)
                     if f.startswith(ventilation_file_start)]

In [None]:
ventilation_df = pd.concat(ventilation_files, ignore_index=True)

In [None]:
fio2_df, spo2_df = preprocess_ventilation(ventilation_df)

In [None]:
fio2_df['datetime'] = pd.to_datetime(fio2_df['datetime'], format='%d.%m.%Y %H:%M')
# find first sample date for each patient admission id
first_sample_dates_df = fio2_df.groupby('case_admission_id')['datetime'].min()
first_sample_dates_df.head(2)

In [None]:
fio2_df_with_rel_dates_df = fio2_df.join(first_sample_dates_df, on='case_admission_id', rsuffix='_first').copy()
fio2_df_with_rel_dates_df['relative_sample_date'] = (pd.to_datetime(fio2_df_with_rel_dates_df['datetime'], format='%d.%m.%Y %H:%M')
                                                     - pd.to_datetime(fio2_df_with_rel_dates_df['datetime_first'], format='%d.%m.%Y %H:%M')).dt.total_seconds() / (60 * 60)

In [None]:
g = sns.relplot(x='relative_sample_date', y='FIO2',
                data=fio2_df_with_rel_dates_df, hue='FIO2', legend=False, alpha=0.3,
            facet_kws=dict(sharey=False))
g.set(xlim=(0, 350), xlabel='Hours from admission', ylabel='FiO2')
plt.show()

In [None]:
g = sns.displot(x="FIO2", data=fio2_df, kde=True, legend=False)
g.set_xlabels('FIO2')
g.set_titles('FIO2')
plt.show()

In [None]:
g = sns.displot(x="spo2", data=spo2_df, kde=True, legend=False)
g.set_xlabels('spO2')
g.set_titles('spO2')
plt.show()