In [None]:
%reset

In [None]:
from numpy import median

import os, sys, random

import pandas as pd
import numpy as np
import swifter

from datetime import timedelta, date
from importlib import reload
from data_warehouse_utils.dataloader import DataLoader

from causal_inference.create_experiment.create_observations import create_observations
from causal_inference.create_experiment.create_covariates import add_covariates

In [None]:
# Reloads packages
os.chdir('/home/adam/files/causal_inference')
os.getcwd()

reload(sys.modules['causal_inference.create_experiment.create_treatment'])
reload(sys.modules['causal_inference.create_experiment.create_observations'])
reload(sys.modules['causal_inference.create_experiment.create_covariates'])

from causal_inference.create_experiment.create_observations import create_observations
from causal_inference.create_experiment.create_covariates import add_covariates

### 1. Loads proning sessions

Loads all proning sessions that matches a 'hash_patient_id' from Patients table in the
Data Warehouse.

In [None]:
dl = DataLoader()

In [None]:
df = create_observations(dl)

In [None]:
df_treated = df[df.treated == True]


In [None]:
df_treated, _ = add_covariates(dl=dl,
                               df=df_treated,
                               interval_start=5000,
                               interval_end=0,
                               covariates=['body_mass_index'])

In [None]:
df_treated.head()

In [None]:
df_treated.info()

In [None]:
df_treated[['bmi_first', 'body_mass_index']].sample(10)

In [None]:
os.chdir('/home/adam/files/data')
df_treated.to_csv('treated_data_skeleton.csv', index=False)

### 2. Analysis of proning sessions

In [None]:
os.chdir('/home/adam/files/data')
df_treated = pd.read_csv('treated_data_skeleton.csv')
df_treated.start_timestamp = df_treated.start_timestamp.astype('datetime64[ns]')
df_treated.end_timestamp = df_treated.start_timestamp.astype('datetime64[ns]')
df_treated.info()

In [None]:
print(df_treated.hash_patient_id.nunique(),
      "patients got treated.")
print("We loaded", len(df_treated.index),
      "treatment sessions of length at most 96 hours.")
print("With median length of", median(df_treated.duration_hours),
      "hours.")

In [None]:
# Distribution of proning sessions in hospitals

df_treated.pacmed_origin_hospital.value_counts()

In [None]:
#Distribution of outcomes

df_treated.outcome.value_counts()

In [None]:
# has died during session

df_treated.has_died_during_session.value_counts()

### 3. Loading inclusion criteria

In [None]:
df_treated, time_report = add_covariates(dl=dl,
                                         df=df_treated,
                                         interval_start=4,
                                         interval_end=0,
                                         covariates=['fio2',
                                                     'peep',
                                                     'po2_arterial']
                                         )

In [None]:
df_treated_6, time_report_6 = add_covariates(dl=dl,
                                         df=df_treated,
                                         interval_start=6,
                                         interval_end=0,
                                         covariates=['fio2',
                                                     'peep',
                                                     'po2_arterial']
                                         )

In [None]:
# rename columns
df_treated_6.rename(columns={"fio2_x": "fio2_4h",
                             "peep_x": "peep_4h",
                             "po2_arterial_x": "po2_arterial_4h",
                             "fio2_y": "fio2_6h",
                             "peep_y": "peep_6h",
                             "po2_arterial_y": "po2_arterial_6h"},
                    inplace=True)
df_treated_6.info()

In [None]:
os.chdir('/home/adam/files/data')

df_treated_6.to_csv('treated_data_skeleton_6h_inclusion.csv', index=False)
time_report_6.to_csv('treated_data_skeleton_6h_inclusion_time_report.csv', index=True)

### 4. Analysis

In [None]:
os.chdir('/home/adam/files/data')
df = pd.read_csv('treated_data_skeleton_6h_inclusion.csv')
df.start_timestamp = df.start_timestamp.astype('datetime64[ns]')
df.end_timestamp = df.start_timestamp.astype('datetime64[ns]')
df.info()

In [None]:
df = df.dropna(subset=['fio2_4h', 'peep_4h', 'po2_arterial_4h'])

In [None]:
df.info()

In [None]:
# add p_f ratio

df.loc[:, 'pf_4h'] = df['po2_arterial_4h'] / df['fio2_4h']
df.loc[:, 'pf_4h'] = df.pf_4h.map(lambda x: round(x * 100))

In [None]:
df.head()

#### Proning Length w.r.t. hospitals

In [None]:
df.pacmed_origin_hospital.value_counts()

In [None]:
import matplotlib.pyplot as plt

# An "interface" to matplotlib.axes.Axes.hist() method
n, bins, patches = plt.hist(x=df.duration_hours.values,
                            bins='auto',
                            color='#0504aa',
                            alpha=0.7,
                            rwidth=0.85)
plt.grid(axis='y', alpha=0.75)
plt.xlabel('Value')
plt.ylabel('Frequency')
plt.title('My Very Own Histogram')
plt.text(23, 45, r'$\mu=15, b=3$')
maxfreq = n.max()
# Set a clean upper y-axis limit.
plt.ylim(ymax=np.ceil(maxfreq / 10) * 10 if maxfreq % 10 else maxfreq + 10)

In [None]:
import seaborn as sns

hospitals = ['amc', 'vumc', 'olvg', 'erasmus', 'amphia']
df_hospitals = df[df.pacmed_origin_hospital.isin(hospitals)]

sns.displot(data= df_hospitals,
            x="duration_hours",
            hue='pacmed_origin_hospital',
            kind="kde",
            bw_adjust=.7,
            cut = 0)

#### Distribution of parameters w.r.t. hospitals

In [None]:
df_plot = df_hospitals[df.pf_4h < 300]

sns.displot(data=df_plot,
            x="pf_4h",
            hue='pacmed_origin_hospital',
            element="step",
            bins=10)

In [None]:
sns.displot(data=df[df.pf_4h < 300],
            x="pf_4h",
            element="step",
            bins=10)