In [12]:
import evaluation.treatment_utils as  u
import evaluation.descriptive_utils as d
import pandas as pd
import numpy as np
import itertools
from scipy import stats
import matplotlib.pyplot as plt
from ipysankeywidget import SankeyWidget

In [13]:
from floweaver import *

In [14]:
data_path = '../../covid19_treatments_data/matched_single_treatments_der_val_addl_outcomes/'
outcome = 'COMORB_DEATH'

preload = True
matched = True
match_status = 'matched' if matched else 'unmatched'

SEEDS = range(1, 2)
# algorithm_list = ['lr','rf','cart','qda','gb','xgboost']
algorithm_list = ['lr','rf','cart','oct','xgboost','qda','gb']

#%% Generate predictions across all combinations
 #['CORTICOSTEROIDS', 'INTERFERONOR', 'ACEI_ARBS']

treatment = 'CORTICOSTEROIDS'
data_version = 'train' # in ['train','test','validation','validation_cremona','validation_hope']:
weighted_status = 'no_weights'
threshold = 0.01

In [15]:
## Set variables based on specifications (do not need to modify)
## Set variables that are dependent on these
treatment_list = [treatment, 'NO_'+treatment]
results_path = '../../covid19_treatments_results/'
version_folder = 'matched_single_treatments_der_val_addl_outcomes/'+str(treatment)+'/'+str(outcome)+'/'
save_path = results_path + version_folder + 'summary/'

training_set_name = treatment+'_hope_hm_cremona_matched_all_treatments_train.csv'

##  Load data for chosen version
X, Z, y = u.load_data(data_path,training_set_name,
                    split=data_version, matched=matched, prediction = outcome)

summary = pd.read_csv(save_path+data_version+'_'+match_status+'_bypatient_summary_'+weighted_status+'_t'+str(threshold)+'.csv')
Z_presc = summary['Prescribe']
Y_presc = summary['AverageProbability']

X['Z'] = Z     
X['Z_presc'] = Z_presc
X['Y'] = y
X['Y_presc'] = Y_presc

X_test, Z_test, y_test = u.load_data(data_path,training_set_name,
                    split='test', matched=matched, prediction = outcome)

CORTICOSTEROIDS_hope_hm_cremona_matched_all_treatments_train.csv
CORTICOSTEROIDS_hope_hm_cremona_matched_all_treatments_test.csv


In [93]:
# Set the default size to fit the documentation better.

## https://jiffyclub.github.io/palettable/colorbrewer/qualitative/

size = dict(width=570, height=300)

df_switch = X.groupby(['Z','Z_presc']).size().reset_index()
df_switch = df_switch.rename({'Z':'source','Z_presc':'target',0:'value'},axis=1)
df_switch['source'] = ['Given: \n'+x.replace("_"," ").capitalize() for x in df_switch['source']]
df_switch['target'] = ['Prescribed: \n'+x.replace("_"," ").capitalize() for x in df_switch['target']]
    
nodes = {
    'given': ProcessGroup(df_switch['source'].unique().tolist()),
    'prescribed': ProcessGroup(df_switch['target'].unique().tolist()),
}

nodes['given'].partition = Partition.Simple('process', values = df_switch['source'].unique().tolist())
nodes['prescribed'].partition = Partition.Simple('process', values = df_switch['target'].unique().tolist())

ordering = [
    ['given'],       # put "farms" on the left...
    ['prescribed'],   # ... and "customers" on the right.
]

bundles = [
    Bundle('given', 'prescribed'),
]

sdd = SankeyDefinition(nodes, bundles, ordering)
weave(sdd, df_plot).to_widget(**size)

SankeyWidget(groups=[{'id': 'given', 'type': 'process', 'title': '', 'nodes': ['given^Given: \nCorticosteroids…

In [None]:
## Gender


In [57]:
def create_flows(X, feature, label_dict = {}):
    df_switch = X.groupby(['Z','Z_presc',feature]).size().reset_index()
    df_switch = df_switch.rename({'Z':'source','Z_presc':'target', feature:'type',0:'value'},axis=1)
    if label_dict != {}:
        df_switch['type'] = df_switch['type'].replace(label_dict)
    df_switch['source'] = ['Given: \n'+x.replace("_"," ").capitalize() for x in df_switch['source']]
    df_switch['target'] = ['Prescribed: \n'+x.replace("_"," ").capitalize() for x in df_switch['target']]
    return df_switch


def plot_flows(df_plot, palette):
    partition_by_type = Partition.Simple('type', df_plot['type'].unique().tolist())
    sdd = SankeyDefinition(nodes, bundles, ordering, flow_partition=partition_by_type)
    return weave(sdd, df_plot,palette=palette).to_widget(**size)

In [91]:
X['AgeGroup'] = pd.cut(X['AGE'], bins=[0,40,55,70,110],right=False, labels=False)
X['CreatinineGroups'] = pd.cut(X['CREATININE'], bins=[0,0.8,2],right=False)

SankeyWidget(groups=[{'id': 'given', 'type': 'process', 'title': '', 'nodes': ['given^Given: \nCorticosteroids…

In [None]:
## By gender
df_plot = create_flows(X, 'GENDER_MALE', {0:'Female',1:'Male'})
plot_flows(df_plot, {'Male': 'lightblue', 'Female': 'red'})

In [92]:
## By CRP
df_plot = create_flows(X, 'PCR_B', {0:'Normal CRP',1:'Elevated CRP'})
plot_flows(df_plot, {'Male': 'lightblue', 'Female': 'red'})

SankeyWidget(groups=[{'id': 'given', 'type': 'process', 'title': '', 'nodes': ['given^Given: \nCorticosteroids…

In [94]:
## By O2 sat
df_plot = create_flows(X, 'SAT02_BELOW92')
plot_flows(df_plot,  palette =  {0: 'lightblue', 1: 'red'})
# {0: 'lightblue', 1: 'steelblue', 2:'blue', 3:'darkblue'})

SankeyWidget(groups=[{'id': 'given', 'type': 'process', 'title': '', 'nodes': ['given^Given: \nCorticosteroids…

In [97]:
## Blood pressure plot
df_plot = create_flows(X, 'BLOOD_PRESSURE_ABNORMAL_B')
plot_flows(df_plot,  palette =  {0: 'lightblue', 1: 'red'})
# {0: 'lightblue', 1: 'steelblue', 2:'blue', 3:'darkblue'})

SankeyWidget(groups=[{'id': 'given', 'type': 'process', 'title': '', 'nodes': ['given^Given: \nCorticosteroids…