 # Import Libraries

In [None]:
from __future__ import absolute_import, division, print_function
import os
import base64
import matplotlib
import matplotlib.pyplot as plt
import IPython
import numpy as np
import pandas as pd
import pickle
import pandas_gbq
import random
from tqdm.notebook import tnrange
from sklearn.preprocessing import MinMaxScaler
import joblib
from tableone import TableOne

# Print current working directory
print("Current working dir : %s" % os.getcwd())

# Import the Dataset

In [None]:
################################### ICU Reduced Filtered dataset #######################################################
with open(os.path.join(os.getcwd(),('ICU_ONSET_RANGE_LIM.pickle')), 'rb') as f:
    df = pickle.load(f)
print(f'Dataset loaded.')

# Population Statistics

In [None]:
df_pat = df.groupby(['PatientID']).max()

In [None]:
df_pat

In [None]:
columns = ['admission_age','ethnicity','gender','weight','first_hosp_stay','heart_rate_vs','charlson_comorbidity_index','SOFA_24h']

In [None]:
categorical = ['gender','ethnicity','first_hosp_stay']

In [None]:
groupby = ['SepsisLabel']

In [None]:
mytable = TableOne(df_pat, columns, categorical, groupby, pval=True)

In [None]:
print(mytable.tabulate(tablefmt="github"))

In [None]:
mytable.to_excel('mytable.xlsx')

In [None]:
mytable.to_latex('Statistics.tex')

# Count Diagnosis point

In [None]:
df_sepsis_agg = df[['PatientID','hr','SepsisLabel']].groupby(['SepsisLabel']).get_group(1)
df_sep_hour = df_sepsis_agg.groupby(['PatientID']).min().reset_index(drop=True).drop(columns='SepsisLabel')
df_sep_hour_count = pd.DataFrame(df_sep_hour.value_counts().sort_index()).rename(columns={0: "Count"}).reset_index(drop=False)

In [None]:
bins = [1, 6, 12, 24, 48, 72, 96, df_sep_hour.hr.max()]
groups = df_sep_hour.groupby(pd.cut(df_sep_hour.hr, bins))
groups.hr.count().plot(kind='bar', xlabel='Hour of Diagnosis', ylabel='Number of Septic Patients',figsize=(5,5),yticks=np.arange(0,6100,500))
plt.xticks(np.arange(7),('--6','--12','--24','--48','--72','--96','--500'), rotation=0);

In [None]:
bins = [1, 6, 12, df_sep_hour.hr.max()]
groups_2 = df_sep_hour.groupby(pd.cut(df_sep_hour.hr, bins))
groups_2.hr.count().plot(kind='bar', xlabel='Hour of Diagnosis', ylabel='Number of Septic Patients',figsize=(5,5),yticks=np.arange(0,6100,500))
plt.xticks(np.arange(3),('--6','--12','--500'), rotation=0);

In [None]:
n_values = 15
plt.bar(df_sep_hour_count.loc[:n_values,'hr'].values, df_sep_hour_count.loc[:n_values,'Count'].values)
plt.ylim(0,500)

In [None]:
count_df = pd.DataFrame(groups.hr.count()).rename(columns={"hr":"Count"})
count_df.to_csv('onset_hr_diagnosis.csv')

# Frequency of Variables

In [None]:
admin_feats = list(['stay_id','weight','admission_age','gender','PatientID','hr'])
comorbidity = list(['aids','age_score','myocardial_infarct','congestive_heart_failure',
                        'cerebrovascular_disease','charlson_comorbidity_index','chronic_pulmonary_disease',
                        'dementia','diabetes_with_cc', 'diabetes_without_cc', 'malignant_cancer',
                        'metastatic_solid_tumor', 'mild_liver_disease','paraplegia','peptic_ulcer_disease',
                        'peripheral_vascular_disease','respiratory_rate_spontaneous', 'rheumatic_disease',
                        'severe_liver_disease','renal_disease'])
vital_signs = list(['heart_rate_vs','RESP_RATE','OXYGEN_SATURATION','gcs_eyes','gcs_verbal','gcs_motor',
                    'gcs_unable','temperature_site_vs','GLUCOSE','TEMPERATURE','SBP_vs','DBP_vs','MBP_vs','gcs'])


In [None]:
# Categorical Data
object_list = []
for column in df.columns:
    if df[column].dtype == object or df[column].dtype == bool:
        # print(f"{column} with unique type {df[column].unique()}")
        object_list.append(column)
ignore_cols = object_list + comorbidity + vital_signs + ['stay_id','weight','admission_age','gender','PatientID','SOFA_24h']

In [None]:
df_moded = df[df.columns.difference(ignore_cols)]

In [None]:
df_sep = df_moded[df['SepsisLabel'] == 1]
df_non = df_moded[df['SepsisLabel'] == 0]

In [None]:
df_hr = df_moded.groupby(['hr']).count().drop(columns='SepsisLabel')
df_hr_sep = df_sep.groupby(['hr']).count().drop(columns='SepsisLabel')
df_hr_non = df_non.groupby(['hr']).count().drop(columns='SepsisLabel')

In [None]:
len(df_hr_sep.columns)

In [None]:
from scipy.signal import find_peaks
for i in df_hr_sep.columns:
    x = df_hr_sep[i].values
    print(i)
    peaks, _ = find_peaks(x, prominence=10, width=3)
    plt.plot(x)
    plt.plot(peaks, x[peaks], "x")
    plt.show()
    print('------------------')

In [None]:
fig, ax = plt.subplots(15, 4, sharey='row',figsize=(20,40))
fig.subplots_adjust(hspace=0.4, wspace=0.1)
plt.setp(ax, xticks=np.arange(0,50,4), xlim=[0,48], yticks=np.arange(0,10,1), ylim=[0,9])
id = 0
for i in range(15):
    for j in range(4):
        x = df_hr_sep.iloc[:,id].values
        peaks, _ = find_peaks(x, prominence=20, width=2)
        unique, counts = np.unique(np.diff(peaks), return_counts=True)
        ax[i, j].bar(unique, counts)
        ax[i, j].title.set_text(df_hr_sep.columns[id])
        id+=1
        if id > len(df_hr_sep.columns)-1:
            break

In [None]:
fig, ax = plt.subplots(15, 4, sharey='row',figsize=(20,40))
fig.subplots_adjust(hspace=0.4, wspace=0.1)
plt.setp(ax, xticks=np.arange(0,50,4), xlim=[0,48], yticks=np.arange(0,10,1), ylim=[0,5])
id = 0
for i in range(15):
    for j in range(4):
        x = df_hr_non.iloc[:,id].values
        peaks, _ = find_peaks(x, prominence=20, width=2)
        unique, counts = np.unique(np.diff(peaks), return_counts=True)
        ax[i, j].bar(unique, counts)
        ax[i, j].title.set_text(df_hr_non.columns[id])
        id+=1
        if id > len(df_hr_non.columns)-1:
            break