In [None]:
import pandas as pd
import numpy as np

import sys, os

import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
os.chdir('/home/adam/files/data/17122020/')
df = pd.read_csv('data.csv')
df.start_timestamp = df.start_timestamp.astype('datetime64[ns]')
df.end_timestamp = df.end_timestamp.astype('datetime64[ns]')
df.info(max_cols=200)

In [None]:
print(df.has_died_during_session.value_counts())
df = df[~df.has_died_during_session]

In [None]:
print("Before:", df.treated.value_counts())
df.dropna(axis=0, how='any', subset=['pf_ratio_12h_outcome'], inplace=True)
print("After:", df.treated.value_counts())

In [None]:
df_plot = df
sns.distplot(df_plot['pf_ratio_inclusion_8h'],
             hist = True,
             kde = True,
             label='Inclusion')

sns.distplot(df_plot['pf_ratio_12h_outcome'],
             hist = True,
             kde = True,
             label='Outcome')
# Plot formatting
plt.legend(prop={'size': 12})
plt.title('P/F ratio improves for all patients')
plt.xlabel('pf_ratio')
plt.ylabel('Density')
plt.xlim(right=400)

plt.savefig('inclusion_8h_vs_outcome_12h.png')
# Figure comparing inclusion vs. outcome. Sessions included in the study with a
# non-missing outcome

In [None]:
df_plot = df[df.treated]
sns.distplot(df_plot['pf_ratio_12h_outcome'],
             hist = True,
             kde = True,
             label='Prone')

df_plot = df[~df.treated]
sns.distplot(df_plot['pf_ratio_12h_outcome'],
             hist = True,
             kde = True,
             label='Supine')
# Plot formatting
plt.legend(prop={'size': 12})
plt.title('Characteristics of Sessions at Inclusion in the Study: P/F ratio.')
plt.xlabel('pf_ratio')
plt.ylabel('Density')
plt.xlim(right=500)

# However no average difference for between two groups
print(df.loc[df.treated,'pf_ratio_12h_outcome'].mean())
print(df.loc[~df.treated, 'pf_ratio_12h_outcome'].mean())

In [None]:
# Convert output to differences and see if this helps

df['pf_ratio_diff'] = df['pf_ratio_12h_outcome'] - df['pf_ratio_inclusion_8h']
df['pf_ratio_diff'].describe()

In [None]:
df_plot = df[df.treated]
sns.distplot(df_plot['pf_ratio_diff'],
             hist = True,
             kde = True,
             label='Prone')

df_plot = df[~df.treated]
sns.distplot(df_plot['pf_ratio_diff'],
             hist = True,
             kde = True,
             label='Supine')
# Plot formatting
plt.legend(prop={'size': 12})
plt.title('Characteristics of Sessions at Inclusion in the Study: P/F ratio.')
plt.xlabel('pf_ratio')
plt.ylabel('Density')
plt.xlim(right=500)

print(df.loc[df.treated,'pf_ratio_diff'].mean())
print(df.loc[~df.treated, 'pf_ratio_diff'].mean())

In [None]:
df.info(max_cols=200)

In [None]:
columns_to_drop_1 = df.iloc[:, 0:4].columns.tolist()
columns_to_drop_2 = df.iloc[:, 5:11].columns.tolist()
columns_to_drop_3 = df.iloc[:, 14:18].columns.tolist()
columns_to_drop = columns_to_drop_1 + columns_to_drop_2 + columns_to_drop_3
df_model = df.drop(columns=columns_to_drop)
df_model = df_model.drop(columns=['has_died_during_session', 'fio2', 'po2', 'gender'])

In [None]:
thresh = round(0.75 * len(df_model.index))
df_model = df_model.dropna(thresh=thresh, axis=1)

# And old outcome
df_model = df_model.drop(columns=['pf_ratio_12h_outcome'])

In [None]:
df_model = df_model.drop(df_model.filter(regex='atc').columns, axis=1)
df_model = df_model.drop(df_model.filter(regex='nice').columns, axis=1)
df_model = df_model.drop(df_model.filter(regex='inclusion').columns, axis=1)

In [None]:
df_model.info()

In [None]:
treated = df_model.iloc[:,0].values.astype('int')
t = df_model.iloc[:,0].values
X = df_model.iloc[:, 1:23].values
y = df_model.iloc[:, 24].values


In [None]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
imp = SimpleImputer(missing_values=np.nan, strategy='mean')
imp.fit(X)
X = imp.transform(X)

# Standardize the predictors
scaler = StandardScaler().fit(X)
X = scaler.transform(X)

In [None]:
# Instantiate CausalModel

from causalinference import CausalModel

causal = CausalModel(y, t, X)

In [None]:
print(causal.summary_stats)

In [None]:
causal.est_propensity()
print(causal.propensity)

In [None]:
sns.distplot(causal.raw_data['pscore'][t],
             hist = True,
             kde = True,
             label='Prone')

sns.distplot(causal.raw_data['pscore'][~t],
             hist = True,
             kde = True,
             label='Supine')

# Plot formatting
plt.legend(prop={'size': 12})
plt.title('Characteristics of Sessions at Inclusion in the Study: P/F ratio.')
plt.xlabel('pf_ration_inclusion_4h')
plt.ylabel('Density')



In [None]:
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression(random_state=0, class_weight='balanced').fit(X, t)
pscore = clf.predict_proba(X)[:, 1]
causal.raw_data._dict['pscore'] = pscore


In [None]:
sns.distplot(causal.raw_data['pscore'][t],
             hist = True,
             kde = True,
             label='Prone')

sns.distplot(causal.raw_data['pscore'][~t],
             hist = True,
             kde = True,
             label='Supine')

# Plot formatting
plt.legend(prop={'size': 12})
plt.title('Characteristics of Sessions at Inclusion in the Study: P/F ratio.')
plt.xlabel('pf_ration_inclusion_4h')
plt.ylabel('Density')

In [None]:
from scipy.special import logit, expit
pscore_logit = logit(pscore)
#clf.score(X, t)

In [None]:
causal.trim()
causal.cutoff
print(causal.summary_stats)

In [None]:
causal.est_via_ols()
print(causal.estimates)

In [None]:
causal.est_via_blocking
print(causal.estimates)

In [None]:
causal.stratify()
print(causal.strata)

In [None]:
causal.stratify_s()
print(causal.strata)

In [None]:
causal.est_via_blocking()
print(causal.estimates)

In [None]:
causal.est_via_matching()
print(causal.estimates)




In [None]:
sns.distplot(pscore_logit[t],
             hist = True,
             kde = True,
             label='Prone')

sns.distplot(pscore_logit[~t],
             hist = True,
             kde = True,
             label='Supine')

# Plot formatting
plt.legend(prop={'size': 12})
plt.title('Characteristics of Sessions at Inclusion in the Study: P/F ratio.')
plt.xlabel('pf_ration_inclusion_4h')
plt.ylabel('Density')

Do it like in a book and cite the steps!

# How this compare to RCTs?

In [None]:
causal.blocks
print(causal.strata)

In [None]:
causal.stratify()

In [None]:

import numpy as np
from sklearn.impute import KNNImputer
nan = np.nan
X = [[1, 2, nan], [3, 4, 3], [nan, 6, 5], [8, 8, 7]]
imputer = KNNImputer(n_neighbors=2, weights="uniform")
imputer.fit_transform(X)