In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import os
import seaborn as sns
from preprocessing.scales_preprocessing.scales_preprocessing import preprocess_scales


In [None]:
data_path = '/Users/jk1/stroke_datasets/stroke_unit_dataset/per_value/Extraction_20211110'
scales_file_start = 'scale'

In [None]:
eds_df = pd.read_csv(os.path.join(data_path, 'eds_j1.csv'), delimiter=';', encoding='utf-8', dtype={"patient_id":"string", "eds_end_4digit":"string", "eds_final_patient_id":"string"})

In [None]:
scales_files = [pd.read_csv(os.path.join(data_path, f), delimiter=';', encoding='utf-8', dtype=str)
                for f in os.listdir(data_path)
                if f.startswith(scales_file_start)]
scales_df = pd.concat(scales_files, ignore_index=True)

In [None]:

scales_df = preprocess_scales(scales_df, eds_df)
scales_df.head()

In [None]:
scales_df.groupby('scale')['score'].describe()

In [None]:
scales_df['event_date'] = pd.to_datetime(scales_df['event_date'], format='%d.%m.%Y %H:%M')
# find first sample date for each patient admission id
first_sample_dates_df = scales_df.groupby('case_admission_id')['event_date'].min()
first_sample_dates_df.head(2)

In [None]:
scales_df_with_rel_dates_df = scales_df.join(first_sample_dates_df, on='case_admission_id',
                                                              rsuffix='_first').copy()
scales_df_with_rel_dates_df['relative_sample_date'] = (pd.to_datetime(scales_df_with_rel_dates_df['event_date'], format='%d.%m.%Y %H:%M') - pd.to_datetime(scales_df_with_rel_dates_df['event_date_first'], format='%d.%m.%Y %H:%M')).dt.total_seconds() / (60 * 60)

In [None]:
g = sns.relplot(x='relative_sample_date', y='score', col='scale',
                data=scales_df_with_rel_dates_df, hue='scale', legend=False, alpha=0.1,
            facet_kws=dict(sharey=False))
g.set(xlim=(0, 350))
plt.show()

In [None]:
for scale in scales_df_with_rel_dates_df['scale'].unique():
    scale_df = scales_df_with_rel_dates_df[scales_df_with_rel_dates_df['scale'] == scale]
    g = sns.displot(x="score", data=scale_df, kde=True, legend=False)
    g.ax.set_title(scale)
    plt.show()

In [None]:
NIHSS_df = scales_df_with_rel_dates_df[scales_df_with_rel_dates_df['scale'] == 'NIHSS']
GCS_df = scales_df_with_rel_dates_df[scales_df_with_rel_dates_df['scale'] == 'Glasgow Coma Scale']

In [None]:
pa_id = '97572842_10062019'
pa_df = GCS_df[GCS_df['case_admission_id'] == pa_id]

ax = sns.scatterplot(x='relative_sample_date', y='score', data=pa_df, hue='score', legend=True)
ax.set_xlabel('Hours from admission')
ax.set_ylabel('NIHSS')

plt.show()
