In [51]:
import pickle
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import warnings
from sklearn.model_selection import train_test_split
from dops import *
warnings.filterwarnings('ignore')

In [52]:
with open('./Data/states_bpleq65.p','rb') as f:
    states = pickle.load(f)

In [53]:
patient_ids = list(states.keys())

In [54]:
mini_patient = patient_ids[:1000]

In [55]:
pid = mini_patient[0]
df_clean = states[pid].loc[:,~states[pid].columns.isin(['Times','bicarbonate_ind', 'bun_ind','creatinine_ind',
                                             'fio2_ind','glucose_ind','hct_ind','hr_ind','lactate_ind',
                                            'magnesium_ind','meanbp_ind','platelets_ind','potassium_ind',
                                             'sodium_ind','spo2_ind','spontaneousrr_ind','temp_ind','urine_ind','wbc_ind'])]

df_clean.insert(0,'ICU_ID',pid)
df_x_train = df_clean.loc[:,~df_clean.columns.isin(['sofa','oasis','saps'])]
df_y_train = df_clean[['sofa','oasis','saps']]

x_train = df_x_train.iloc[0]
y_train = df_y_train.iloc[0]


for pid in mini_patient[1:]:
    df_clean = states[pid].loc[:,~states[pid].columns.isin(['Times','bicarbonate_ind', 'bun_ind','creatinine_ind',
                                             'fio2_ind','glucose_ind','hct_ind','hr_ind','lactate_ind',
                                            'magnesium_ind','meanbp_ind','platelets_ind','potassium_ind',
                                             'sodium_ind','spo2_ind','spontaneousrr_ind','temp_ind','urine_ind','wbc_ind'])]
    
    df_clean.insert(0,'ICU_ID',pid)
    df_x_train = df_clean.loc[:,~df_clean.columns.isin(['sofa','oasis','saps'])]
    df_y_train = df_clean[['sofa','oasis','saps']]
    
    x_train = pd.concat([x_train,df_x_train.iloc[0]],axis=1)
    y_train = pd.concat([y_train,df_y_train.iloc[0]],axis=1)

In [56]:
x_train = x_train.T.reset_index(drop=True)
y_train = y_train.T.reset_index(drop=True)

In [57]:
##1's indicate the patient's feature is in bad condition, 0's means feature values in normal

x_train['age'] = np.where(x_train['age']>=60,1,0)
x_train['weight'] = np.where(x_train['weight']>=75,1,0)
x_train['hrs_from_admit_to_icu'] = np.where(x_train['hrs_from_admit_to_icu']>=1,1,0)
x_train['bicarbonate'] = np.where(x_train['bicarbonate']<=20,1,0)
x_train['bun'] = np.where(np.logical_and(x_train['bun']>=7,x_train['bun']<=20),0,1)
x_train['creatinine'] = np.where(np.logical_and(x_train['creatinine']>=0.5,x_train['creatinine']<=1.2),0,1)
x_train['fio2'] = np.where(x_train['fio2']>=0.5,1,0)
x_train['glucose'] = np.where(x_train['glucose']>=125,1,0)
x_train['hct'] = np.where(np.logical_and(x_train['hct']>=37,x_train['hct']<=52),0,1)
x_train['hr'] = np.where(np.logical_and(x_train['hr']>=70,x_train['hr']<=119),0,1)
x_train['lactate'] = np.where(x_train['lactate']>=2,1,0)
x_train['magnesium'] = np.where(np.logical_and(x_train['magnesium']>=1.5,x_train['magnesium']<=2.5),0,1)
x_train['meanbp'] = np.where(x_train['meanbp']<=65,1,0)
x_train['platelets'] = np.where(np.logical_and(x_train['platelets']>=140,x_train['platelets']<=450),0,1)
x_train['potassium'] = np.where(np.logical_and(x_train['potassium']>=3,x_train['potassium']<=4.9),0,1)
x_train['sodium'] = np.where(np.logical_and(x_train['sodium']>=125,x_train['potassium']<=144),0,1)
x_train['spo2'] = np.where(x_train['spo2']<=95,1,0)
x_train['spontaneousrr'] = np.where(np.logical_and(x_train['spontaneousrr']>=12,x_train['spontaneousrr']<=25),0,1)
x_train['temp'] = np.where(x_train['temp']>=39,1,0)
x_train['urine'] = np.where(x_train['urine']<=100,1,0)
x_train['wbc'] = np.where(np.logical_and(x_train['wbc']>=4.3,x_train['wbc']<=10.8),0,1)


In [58]:
x_train

Unnamed: 0,ICU_ID,age,is_F,weight,surg_ICU,is_not_white,is_emergency,is_urgent,hrs_from_admit_to_icu,bicarbonate,...,magnesium,meanbp,platelets,potassium,sodium,spo2,spontaneousrr,temp,urine,wbc
0,200028.0,1,0.0,1,0.0,0.0,0.0,0.0,1,1,...,0,0,1,0,0,0,0,0,1,1
1,200033.0,1,0.0,0,1.0,0.0,1.0,0.0,0,0,...,0,0,0,0,0,0,0,0,1,1
2,200049.0,1,0.0,1,0.0,0.0,1.0,0.0,1,0,...,0,1,1,0,0,0,0,0,1,1
3,200069.0,1,0.0,1,1.0,1.0,0.0,0.0,1,0,...,0,0,0,0,0,0,0,0,1,0
4,200072.0,0,0.0,1,1.0,0.0,1.0,0.0,0,0,...,0,0,0,0,0,0,0,0,1,0
5,200075.0,1,1.0,0,0.0,0.0,1.0,0.0,0,0,...,0,0,0,0,0,0,0,0,1,0
6,200087.0,1,0.0,1,0.0,0.0,1.0,0.0,1,0,...,0,0,0,0,0,1,1,0,1,0
7,200098.0,1,0.0,1,0.0,1.0,1.0,0.0,0,0,...,0,0,0,0,0,0,0,0,1,0
8,200099.0,1,0.0,0,1.0,0.0,0.0,0.0,1,0,...,1,1,1,0,0,1,0,0,1,0
9,200116.0,0,0.0,1,0.0,0.0,1.0,0.0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [59]:
df_for_median = x_train.drop(['ICU_ID','is_F','is_not_white','is_emergency','is_urgent','surg_ICU'],axis=1)
median_dict = {}
for column in df_for_median.columns:
    #median_dict[column] = np.median(df_for_median[column].values)
    print(column,np.sum(df_for_median[column].values))
    #print(column)

age 669
weight 547
hrs_from_admit_to_icu 474
bicarbonate 149
bun 602
creatinine 278
fio2 1000
glucose 604
hct 726
hr 97
lactate 261
magnesium 23
meanbp 81
platelets 195
potassium 148
sodium 18
spo2 109
spontaneousrr 96
temp 8
urine 929
wbc 440


In [60]:
y_train

Unnamed: 0,sofa,oasis,saps
0,10.0,35.0,20.0
1,1.0,24.0,15.0
2,8.0,23.0,16.0
3,3.0,25.0,15.0
4,0.0,25.0,12.0
5,8.0,45.0,21.0
6,7.0,38.0,20.0
7,3.0,39.0,17.0
8,7.0,24.0,17.0
9,5.0,23.0,16.0


In [61]:
output = pd.concat([x_train,y_train],axis=1)

In [62]:
output.to_csv('synthetic.csv')

In [63]:
coverage = np.array([[1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,0,1,1],
[0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0],
[0,0,1,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1],
[1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
[0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
[0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0],
[0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0],
[0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
[1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0],
[1,0,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0],
[1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0],
[1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0],
[1,0,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0],
[1,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1],
[0,0,1,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0],
[1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0],
[1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,0,0,0],
[1,0,1,0,0,1,1,0,1,1,1,1,1,0,1,1,1,1,0,0,1,1,1,1,1,1],
[1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1],
[1,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,1,1,0,0,0,0,0],
[1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,1,1,0,0,0,0,0],
[1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0],
[1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0],
[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1],
[1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0],
[1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,1,0,1]])

In [64]:
x_train_dops = x_train.drop(['ICU_ID'],axis=1).values
y_train_dops = y_train['saps'].values

In [65]:
X_train, X_test,y_train,y_test = train_test_split(x_train_dops,y_train_dops,test_size=0.2)

In [66]:
X_train[3]

array([1., 0., 0., 1., 0., 1., 0., 0., 0., 1., 0., 1., 0., 1., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 1., 0.])

In [67]:
y_train[3]

21.0

In [68]:
res, pred_theta, max_item_index = dops(X_train, y_train, X_test, coverage, 30, 0.8, np.random.rand(26), eta=1, iters=500, verbose=True)






Iter 0 : [0.41224252 0.79193848 0.21225082 0.94223676 0.73086526 0.88870757
 0.55904158 0.76515307 0.43596822 0.3805918  0.01681118 0.6542198
 0.11586469 0.45220629 0.14632138 0.98073877 0.39265113 0.18108597
 0.77444624 0.80236979 0.53985251 0.3380734  0.55621914 0.81146923
 0.92545479 0.8096024 ]


KeyboardInterrupt: 