In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tqdm
import pymalts_c as pm
import sklearn.linear_model as lm
import sklearn.svm as svm
import sklearn.ensemble as en
import warnings
warnings.filterwarnings("ignore")

path_to_git = '/Users/harshparikh/Documents/GitHub/iic_causal_inference/'

iic_burden = pd.read_pickle(path_to_git + 'aim1/data_to_fit_CNNIIC_iic_burden_smooth.pickle')

iic_burden.keys()

sns.set(font_scale=3,style='whitegrid')

In [2]:
def plot_unit(E,D,Dnames):
    n_drugs = D.shape[1]
    drug_names = Dnames
    fig,axs = plt.subplots(nrows=1+D.shape[1],ncols=1,
                           sharex=True,
                           figsize=(15, 6 + 1.25*n_drugs),
                           gridspec_kw = {'height_ratios':[4]+[1 for i in range(D.shape[1])]})
    axs[0].plot( E, c='black',label='Observed')
    axs[0].set_title('IIC Burden')
    for i in range(1,1+n_drugs):
        y = D[:,i-1]
        axs[i].imshow(y[np.newaxis,:], cmap="copper", aspect="auto")
        axs[i].set_title(drug_names[i-1])
    

def setup_data(iic_burden,frame=10):
    df = pd.DataFrame(iic_burden['C'],columns=iic_burden['Cname'],index=iic_burden['sids'])
    df['mRS'] = iic_burden['Y']
    n = df.shape[0]
    df_wave = pd.DataFrame()
    for i in (range(n)):
        try:
            E = iic_burden['Pobs'][i][:frame]
            Ename = ['iic_%d'%(j) for j in range(frame)]
            D = iic_burden['D'][i][:frame,:]
            time_of_intervention = np.argmax(D.sum(axis=1)>0)
            if D.sum(axis=1)[time_of_intervention] == 0:
                time_of_intervention = frame-1
            D_flat = D.reshape(-1,)
            Dname_flat = np.array([ ['%s_%d'%(drug,j) for drug in iic_burden['Dname'] ] for j in range(frame) ]).reshape(-1,)
            D_summary = D.sum(axis=0)
            First_D = iic_burden['Dname'][np.argmax(D[time_of_intervention,:])]
            Dname_summary = iic_burden['Dname']
            E_summary_pre = np.nanmean(E[:time_of_intervention])
            E_summary_post = np.nanmean(E[time_of_intervention:])
            if D.sum(axis=1)[time_of_intervention] == 0:
                time_of_intervention = np.inf
                First_D = 0
                E_summary_post = E_summary_pre
            df_temp = pd.DataFrame( [[time_of_intervention]+[First_D]+list(E)+[E_summary_pre,E_summary_post]+list(D_flat)+list(D_summary)], 
                                   index = [df.index[i]],
                                   columns = ['time_of_intervention']+['drug_administered']+Ename+['avg_iic_pre','avg_iic_post']+list(Dname_flat)+list(Dname_summary))
    #         print(df.index[i],df_temp)
            df_wave = df_wave.append(df_temp)
        except:
            continue
    df = df.join(df_wave)
    return df

def setup_coarsen_data(iic_burden,window = 36,summarize='max',**kwargs):
    df = pd.DataFrame(iic_burden['C'],columns=iic_burden['Cname'],index=iic_burden['sids'])
    df['mRS'] = iic_burden['Y']
    n = df.shape[0]
    E_col = []
    D_col = []
    for i in (range(n)):
        t = len(iic_burden['Pobs'][i])
        if t>window:
            if summarize == 'max':
                E = np.max([ np.nanmean(iic_burden['Pobs'][i][j:j+window]) for j in range(t-window) ])
            if summarize == 'proportion':
                threshold = kwargs.get('threshold',0.5)
                E = np.nanmean( np.array([ np.nanmean(iic_burden['Pobs'][i][j:j+window]) for j in range(t-window) ]) > threshold )
            if summarize == 'mean':
                E = np.mean([ np.nanmean(iic_burden['Pobs'][i][j:j+window]) for j in range(t-window) ])
        else:
            E = np.nanmean(iic_burden['Pobs'][i])
        E_col.append(E)
        D = np.nanmean(iic_burden['D'][i],axis=0)
        D_col.append(D)
    df['E'] = E_col
    df_D = pd.DataFrame(D_col,columns=iic_burden['Dname'],index=df.index)
    df = df.join(df_D,how='inner')
    return df

In [4]:
df_coarsen = setup_coarsen_data(iic_burden,window = 6,summarize='max')
df_pkpd = pd.read_csv('../aim1/step6_simulator/results_iic_burden_smooth/params_mean_CNNIIC_iic_burden_smooth_cauchy_expit_lognormal_drugoutside_ARMA2,6_iter1000.csv',
                     index_col=0, header=0)
df_pkpd = df_pkpd[['alpha0', 'alpha[1]','alpha[2]', 'b[lacosamide]', 'b[levetiracetam]',
                   'b[midazolam]', 'b[pentobarbital]', 'b[phenobarbital]', 'b[propofol]','b[valproate]']].fillna(0)
df_coarsen_1 = df_coarsen[['Gender', 'Age', 'marrital',
                           'Hx CVA (including TIA)', 'Hx HTN', 'Hx Sz /epilepsy', 
                           'Hx brain surgery', 'Hx CKD', 'Hx CAD/MI', 'Hx CHF',
                           'Hx DM', 'Hx of HLD', 'Hx tobacco (including ex-smokers)', 
                           'Hx ETOH abuse any time in their life (just when in the hx is mentioned)', 
                           'Hx other substance abuse, any time in their life', 'Hx cancer (other than CNS cancer)',
                           'Hx CNS cancer', 'Hx COPD/ Asthma', 
                           'premorbid MRS before admission  (modified ranking scale),before admission', 
                           'hydrocephalus  (either on admission or during hospital course)   QPID', 
                           'iMV  (initial (on admission) mechanical ventilation)',
                           'Primary systemic dx Sepsis/Shock', 'Worst GCS in 1st 24',
                           'neuro_dx_Seizures/status epilepticus', 'prim_dx_Respiratory disorders',  
                           'mRS', 'E'
                           ,'lacosamide', 'levetiracetam', 'midazolam', 'pentobarbital', 'phenobarbital', 'propofol', 'valproate']].dropna()

df_coarsen_1 = df_coarsen_1.join(df_pkpd,how='inner')
discrete = ['Gender', 'marrital','Hx CVA (including TIA)', 'Hx HTN', 'Hx Sz /epilepsy', 
            'Hx brain surgery', 'Hx CKD', 'Hx CAD/MI', 'Hx CHF',
            'Hx DM', 'Hx of HLD', 'Hx tobacco (including ex-smokers)', 
            'Hx ETOH abuse any time in their life (just when in the hx is mentioned)', 
            'Hx other substance abuse, any time in their life', 'Hx cancer (other than CNS cancer)',
            'Hx CNS cancer', 'Hx COPD/ Asthma', 
            'premorbid MRS before admission  (modified ranking scale),before admission', 
            'hydrocephalus  (either on admission or during hospital course)   QPID', 
            'iMV  (initial (on admission) mechanical ventilation)',
            'Primary systemic dx Sepsis/Shock',
            'neuro_dx_Seizures/status epilepticus', 'prim_dx_Respiratory disorders']

df_coarsen_1['Y'] = (df_coarsen_1['mRS'] > 3).astype(float)

drug_level = ( (df_coarsen_1['levetiracetam'] > 20) + 
              (df_coarsen_1['propofol']>1) + 
              (df_coarsen_1['midazolam']>0) +
              (df_coarsen_1['lacosamide']>0) +
              (df_coarsen_1['pentobarbital']>0) +
              (df_coarsen_1['phenobarbital']>0) +
              (df_coarsen_1['valproate']>0) ).astype(int)

df_coarsen_1['drug'] = drug_level

In [5]:
df_coarsen_1

Unnamed: 0,Gender,Age,marrital,Hx CVA (including TIA),Hx HTN,Hx Sz /epilepsy,Hx brain surgery,Hx CKD,Hx CAD/MI,Hx CHF,...,alpha[2],b[lacosamide],b[levetiracetam],b[midazolam],b[pentobarbital],b[phenobarbital],b[propofol],b[valproate],Y,drug
sid3,1.0,45.91,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.145594,0.000000,1.091907,0.0,0.0,0.0,0.000000,0.0,1.0,1
sid4,1.0,53.99,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.488529,0.000000,0.060738,0.0,0.0,0.0,0.021095,0.0,1.0,1
sid5,1.0,23.08,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.068614,38.447169,0.000000,0.0,0.0,0.0,23.976416,0.0,0.0,1
sid7,1.0,32.11,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.002136,0.000000,4.504308,0.0,0.0,0.0,0.015713,0.0,1.0,1
sid8,0.0,67.24,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.259320,0.000000,0.441685,0.0,0.0,0.0,0.895914,0.0,1.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
sid1986,0.0,58.96,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.174925,0.000000,0.003123,0.0,0.0,0.0,0.000000,0.0,1.0,1
sid1989,1.0,61.04,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.063510,0.000000,0.004756,0.0,0.0,0.0,0.000000,0.0,0.0,1
sid1990,1.0,24.09,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.420321,0.000000,0.003011,0.0,0.0,0.0,0.000000,0.0,0.0,1
sid1991,0.0,74.84,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.489493,0.000000,0.353056,0.0,0.0,0.0,0.055648,0.0,1.0,1


In [6]:
n_repeats = 5
n_splits = 3
df_malts_data = df_coarsen_1[['Age','Y','E','drug']]
discrete = []
m = pm.malts_mf(outcome='Y',
                treatment='E', 
                data=df_malts_data,
                n_repeats=n_repeats,
                n_splits=n_splits,
                k_tr=2,
                k_est=2,
                discrete = discrete,
                C=0.01)

ValueError: Supported target types are: ('binary', 'multiclass'). Got 'continuous' instead.