In [None]:
import numpy as np
import pandas as pd
import random
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OrdinalEncoder
from sklearn import ensemble
from sklearn import tree
from imblearn.over_sampling import SMOTENC
import ipywidgets as widgets
from ipywidgets import HBox, Label

In [None]:
def randrange(bound):
    import random
    lb, ub = bound
    n = random.random()
    return("{:.1f}".format(lb+(ub-lb)*n))

In [None]:
def is_same_patient(X,pat):
    for i in range(len(X)):
        x=X.iloc[i:i+1]
        ls= (pat.values==x.values)
        #print(ls)
        if (False in ls[0]):
            #this means that the patient is unique
            return(False)
        else:
            #this means a copy of the patient pat was found in X
            return(True)

In [None]:
def change_nan_normal(X,perc_nan):
    normal_values = {"Total bilirubin":[5,27],"ALT":[3,53],"CK":[39,308],"Neutrophil":[1.7,5.8],"hsTnl":[1.8,34.2],"Urea":[3,7.4],"CRP":[1,5],"Hb":[13.4,17.1],"WBC":[3.7,9.2],"Lymphocyte":[1,3.1],"LDH":[110,210],"Creatinine":[65,110],"Platelet":[145,370],"Ferritin":[159,1441],"pH":[7.35,7.45],"pCO2":[4.7,6],"PO2":[10,13.3],"HCO3":[22,26],"PT":[10.3,13.4],"aPTT":[26,34]}
    X_new=pd.DataFrame()
    for i in X.columns:
        if sum(np.isnan(X[i])) <perc_nan*len(X):
            X_new[i]=X[i]
            cnt=-1
            for j in np.isnan(X_new[i]):
                cnt+=1
                if j is True:
                    x=0
                    x=randrange(normal_values[i])
                    X_new[i][cnt]=x
        else:
            pass
    
    return(X_new)
def change_nan_normal_disc(X,perc_nan):
    X_new=pd.DataFrame()
    for i in X.columns:
        if sum(np.isnan(X[i])) <perc_nan*len(X):
            X_new[i]=X[i]
            cnt=-1
            for j in np.isnan(X_new[i]):
                cnt+=1
                if j is True:
                    X_new[i][cnt]=0
        else:
            pass
    
    return(X_new)

In [None]:
def MLAs(X_train,y_train,X_test,class_names):

    # random forest classifier
    rfc=ensemble.RandomForestClassifier(n_estimators=100)
    rfc.fit(X_train,y_train)
    r2=rfc.predict(X_test)
    r3=np.around(rfc.predict_proba(X_test),decimals=2)
    r3 = list(np.ravel(r3))
    print(class_names,'with probabilities:',r3)
#Below we make sure that predictions are not returned with same probabilities 
    m = max(r3)
    mind = [i for i, j in enumerate(r3) if j == m]
    if len(mind) == 1:
        if m == r3[int(r2)]:
            print('The patient triage result:',class_names[int(r2)],'\n')
            return r2
        else:
            print("Error! m!=r2 :", m, " != ", r2)
    else:
        MLAs(X_train,y_train,X_test,class_names)
        

In [None]:
def bound_to_discrete(el,lis):
    #print('Bounding started')
    if len(lis)==2:
        lb=lis[0]
        ub =lis[1]
        if lb <= float(el) <= ub:
            return(0)
        elif float(el)<lb:
            return(-1)
        elif float(el)>ub:
            return(1)
    else:
        b=lis[0]
        if float(el)<= b:
            return(0)
        else:
            return(1)

def discretize_DF(X,normal_ranges):
    keys=list(normal_ranges.keys())
#    print('keys',keys)
    ls_feat=list(np.intersect1d(X.columns,keys))
#    print('ls_feat',ls_feat)
    X_disc=pd.DataFrame()
    for i in ls_feat:
#        print('feat=',i)
        ls_entries=[]
#         print('length of column',i,'=',len(X[i]))
        for j in range(len(X)):
            x=0
            x=bound_to_discrete(X[i][j],normal_ranges[i])
            ls_entries.append(x)
            
        X_disc[i]=ls_entries
#        print('appended a list')
    return(X_disc,ls_feat)

def full_discr_DF(X):
    ranges = {"Total bilirubin":[5,27],"ALT":[53],"CK":[39,308],"Neutrophil":[1.7,5.8],"hsTnl":[34.2],"Urea":[3,7.4],"CRP":[5],"Hb":[13.4,17.1],"WBC":[3.7,9.2],"Lymphocyte":[1,3.1],"LDH":[110,210],"Creatinine":[65,110],"Platelet":[145,370],"Ferritin":[159,1441],"pH":[7.35,7.45],"pCO2":[4.7,6],"PO2":[10,13.3],"HCO3":[22,26],"PT":[10.3,13.4],"aPTT":[26,34]}  
    pre_feat=['Age','Chronic heart disease','Hypertension','Asthma','Chronic renal impairment','Diabetes Melitus','Hematological malignancy','Chronic liver disease','Chronic lung disease','Asplenia','Sex']
    X_disc=pd.DataFrame()
    X_disc[pre_feat]=X[pre_feat]
    #print('assignment of pre_feat worked')
    X_aux,ls_features=discretize_DF(X,ranges)
    X_disc[ls_features]=X_aux[ls_features]
    #print('and also the assignment of the rest of the features')
    return(X_disc)

In [None]:
#if two dataframes do not have the same features (columns), it finds the intersection between the two lists 
#of features and then spits out the "reduced" dataframes (dataframes containing only the intersection features) 

def check_features(X_tr,X_tt):
    ls_feat=list(np.intersect1d(X_tr.columns, X_tt.columns))
    X_tr_fin=X_tr[ls_feat]
    X_tt_fin=X_tt[ls_feat]
    return(X_tr_fin,X_tt_fin,ls_feat)

In [None]:
def fsmain(X_tr,y_tr,sm_train,X_tt,discretize,categorical_features_ls,n_outcome_labels):
     
    #with the function below we are making sure that both X_tr, and X_tt have the same features, since 
    #from "cleaner" some features in one may have enough counting and not on the other dataframe
    

    X_tr,X_tt,list_feat_fin=check_features(X_tr,X_tt)
    X_tt=X_tt.astype(np.float).round(decimals=1)
    X_tr=X_tr.astype(np.float).round(decimals=1)
    
    class_names = []    
    if(n_outcome_labels == 2):
        class_names = ['Alive','Dead']
        y_tr=y_tr["Final Status DoA"]

    elif(n_outcome_labels == 4): 
        class_names = ['ACUTE', 'CONV.', 'DEATH', 'HOME']
        y_tr=y_tr["Final Status 4"]
 
    if discretize==2:
        X_tr=full_discr_DF(X_tr)

    
    ###add function that checks whether the categorical features_ls fed is bigger (aside from age) than list_feat_fin
    categ_feat_index_ls=[]
    for i in categorical_features_ls:
        index=0
        index=X_tr.columns.get_loc(i)
        categ_feat_index_ls.append(index)

    
    #find the lowest label count possible for smoting (smote winterpolates between neighbours, 
    #so if there is a label with, say, m data entries, then smote can interpolate among these m. The default
    #number of neighbour is 5 ,so if m<5 smote gives an error. )
    
    def k_neig_smoting(outcomes_df):
        lowest_count=[]
        out_mod=np.nan
        out_mod=outcomes_df[~np.isnan(outcomes_df)]
        (unique, counts) = np.unique(out_mod, return_counts=True)
        frequencies = np.asarray((unique, counts)).T   
        x=np.amin(frequencies[:,1])
        return(x)
            
    # This functions creates sm inputting the lowest number of entries among each label,k0, 
    #so that smote can interpolate
    
    def init_smote(k0):
        if float(k0)<6:
            return(SMOTENC(k_neighbors=int(k0)-1,sampling_strategy='auto',random_state=0,categorical_features=categ_feat_index_ls))
        else:
            return(SMOTENC(k_neighbors=5,sampling_strategy='auto',random_state=0,categorical_features=categ_feat_index_ls))
    

            

    X_train,y_train=X_tr,y_tr
    X_test= X_tt        

    if sm_train==0:
        pass
    else:
        k0=k_neig_smoting(y_train)
        sm=init_smote(k0)
        X_train,y_train=sm.fit_resample(X_train,y_train)

    if discretize==1:
        X_train=full_discr_DF(X_train)
    else:
        pass

    
    #finally we run the ML algorithms and select the most accurate one
    
    outcome = MLAs(X_train,y_train,X_test,class_names)
    return(outcome)
    #remember to check for equal probabilities

In [None]:
# X_tr= pd.read_csv (r'X_tr.csv', index_col=0)
# X_train=change_nan_normal(X_tr,0.3)
X_trd= pd.read_csv (r'X_trd.csv', index_col=0)
X_train=change_nan_normal_disc(X_trd,0.3)
y_train=pd.read_csv(r'y_tr.csv',index_col=0)

In [None]:
import ipywidgets as widgets
import numpy
import pandas as pd
global normal_values
normal_values = {"Total bilirubin":[5,27],"ALT":[53],"CK":[39,308],"Neutrophil":[1.7,5.8],"hsTnl":[34.2],"Urea":[3,7.4],"CRP":[5],"Hb":[13.4,17.1],"WBC":[3.7,9.2],"Lymphocyte":[1,3.1],"LDH":[110,210],"Creatinine":[65,110],"Platelet":[120,381],"Ferritin":[20,250],"pH":[7.35,7.45],"pCO2":[4.7,6],"PO2":[10,13.3],"HCO3":[22,26],"PT":[10.3,13.4],"aPTT":[26,34]}

In [None]:
class features:
    def __init__(self, label):
        self.label = label
    
    def button1_observer(self,sender):
        
        buttons[self.label][3].unobserve(self.button3_observer, names=['value'])
        buttons[self.label][3].index = None   
        
        buttons[self.label][2].unobserve(self.button2_observer, names=['value'])
        buttons[self.label][2].index = None  
        
        buttons[self.label][2].observe(self.button2_observer, names=['value'])
        buttons[self.label][3].observe(self.button3_observer, names=['value'])

    def button2_observer(self,sender):
        buttons[self.label][1].unobserve(self.button1_observer, names=['value'])
        buttons[self.label][1].index = None
        
        buttons[self.label][3].unobserve(self.button3_observer, names=['value'])
        buttons[self.label][3].index = None   
        
        buttons[self.label][1].observe(self.button1_observer, names=['value'])
        buttons[self.label][3].observe(self.button3_observer, names=['value'])
        
    def button3_observer(self,sender):
        buttons[self.label][1].unobserve(self.button1_observer, names=['value'])
        buttons[self.label][1].index = None
        
        buttons[self.label][2].unobserve(self.button2_observer, names=['value'])
        buttons[self.label][2].index = None  
        
        buttons[self.label][1].observe(self.button1_observer, names=['value'])
        buttons[self.label][2].observe(self.button2_observer, names=['value'])
        
    def binarybutton1_observer(self,sender):
        buttons[self.label][2].unobserve(self.binarybutton2_observer, names=['value'])
        buttons[self.label][2].index = None     
        buttons[self.label][2].observe(self.binarybutton2_observer, names=['value'])

    def binarybutton2_observer(self,sender):
        buttons[self.label][1].unobserve(self.binarybutton1_observer, names=['value'])
        buttons[self.label][1].index = None
        buttons[self.label][1].observe(self.binarybutton1_observer, names=['value'])

In [None]:
def radiobuttons():
    global buttons
    buttons = {}
    buttons['Sex'] = [widgets.Label("Sex", layout=widgets.Layout(width='160px')),widgets.RadioButtons(options=['F'],layout=widgets.Layout(width='265px')), widgets.RadioButtons(options=['M'])] 
    normal_values = {"Total bilirubin":[5,27],"ALT":[53],"CK":[39,308],"Neutrophil":[1.7,5.8],"hsTnl":[34.2],"Urea":[3,7.4],"CRP":[5],"Hb":[13.4,17.1],"WBC":[3.7,9.2],"Lymphocyte":[1,3.1],"LDH":[110,210],"Creatinine":[65,110],"Platelet":[120,381],"Ferritin":[20,250],"pH":[7.35,7.45],"pCO2":[4.7,6],"PO2":[10,13.3],"HCO3":[22,26],"PT":[10.3,13.4],"aPTT":[26,34]}
    comorbs={'Chronic heart disease':['N','Y'],'Hypertension':['N','Y'],'Asthma':['N','Y'],'Chronic renal impairment':['N','Y'],'Diabetes Melitus':['N','Y'],'Hematological malignancy':['N','Y'],'Chronic liver disease':['N','Y'],'Chronic lung disease':['N','Y'],'Asplenia':['N','Y']}
    units = ["umol/L","IU/L","IU/L","10\u2079/L","ng/L","mmol/L","mg/L","g/dL","10\u2079/L","10\u2079/L","IU/L","umol/L","10\u2079/L","pmol/L","","kPa","kPa","mmol/L","s","s"]
    j = 0
    for val in normal_values:
        
        if val in ['hsTnl', 'ALT', 'CRP']:
            buttons[val] = [widgets.Label(val+" ("+units[j]+")",layout=widgets.Layout(width='160px'))]
            buttons[val].append(widgets.RadioButtons(options=['Normal < ' + str(normal_values[val][0])],layout=widgets.Layout(width='265px'))) #, description = val+": ")) 
            buttons[val].append(widgets.RadioButtons(options=['High > ' + str(normal_values[val][0])]))
            
        else:
            buttons[val] = [widgets.Label(val+" ("+units[j]+")",layout=widgets.Layout(width='180px'))]
            buttons[val].append(widgets.RadioButtons(options=['Low < ' + str(normal_values[val][0])]))#, description = val+": ")] 
            buttons[val].append(widgets.RadioButtons(options=['Normal']))
            buttons[val].append(widgets.RadioButtons(options=['High > ' + str(normal_values[val][1])]))
        j+=1
    
    #Tweak for pH
    buttons["pH"][0] = widgets.Label("pH",layout=widgets.Layout(width='180px'))
    for comorb in comorbs:
        buttons[comorb] = [widgets.Label(comorb,layout=widgets.Layout(width='160px'))]
        buttons[comorb].append(widgets.RadioButtons(options=['N'],layout=widgets.Layout(width='265px'))) #, description = comorb+": ")] 
        buttons[comorb].append(widgets.RadioButtons(options=['Y']))
        
    for button in buttons:
        for i in range(len(buttons[button])):
            buttons[button][i].index = None

    f_objs = [features(val) for val in normal_values]
    f_objs = [features('Sex')] + f_objs + [features(comorb) for comorb in comorbs]

    buttons['Sex'][1].observe(f_objs[0].binarybutton1_observer, names=['value'])
    buttons['Sex'][2].observe(f_objs[0].binarybutton2_observer, names=['value'])
    c=1
    for val in normal_values: 
        
        if val in ['hsTnl', 'ALT', 'CRP']:
            buttons[val][1].observe(f_objs[c].binarybutton1_observer, names=['value'])
            buttons[val][2].observe(f_objs[c].binarybutton2_observer, names=['value'])
            
        else:
            buttons[val][1].observe(f_objs[c].button1_observer, names=['value'])
            buttons[val][2].observe(f_objs[c].button2_observer, names=['value'])
            buttons[val][3].observe(f_objs[c].button3_observer, names=['value'])
        c = c+1
    
    for comorb in comorbs:
        buttons[comorb][1].observe(f_objs[c].binarybutton1_observer, names=['value'])
        buttons[comorb][2].observe(f_objs[c].binarybutton2_observer, names=['value'])
        c = c+1

In [None]:
def ytrap(val):
    valf=int(val.value)
    temp = pd.DataFrame(columns = ["Final Status 4", "Final Status 3", "Final Status DoA"], data = [[None,None,None]])
    if valf == 0:
        temp["Final Status 4"].iloc[0] = 0
        temp["Final Status 3"].iloc[0] = 0
        temp["Final Status DoA"].iloc[0] = 0
    elif valf == 1:
        temp["Final Status 4"].iloc[0] = 1
        temp["Final Status 3"].iloc[0] = 1
        temp["Final Status DoA"].iloc[0] = 0
    elif valf == 2:
        temp["Final Status 4"].iloc[0] = 2
        temp["Final Status 3"].iloc[0] = np.nan
        temp["Final Status DoA"].iloc[0] = 1
    elif valf==3:
        temp["Final Status 4"].iloc[0] = 3
        temp["Final Status 3"].iloc[0] = 2
        temp["Final Status DoA"].iloc[0] = 0
    else:
        print('what the?')
    
    ycache = pd.read_csv('y_tr.csv',index_col=0)
    ycache = ycache.append(temp,ignore_index=True)
    ycache.to_csv('y_tr.csv',header=True)

In [None]:
#### import os.path
import os

pcache = None
if os.path.isfile("CachePatients.csv"):
    pcache = pd.read_csv('CachePatients.csv')
    
newbutton = widgets.Button(description="New Patient")
disbutton = widgets.Button(description="Discharge Patient")
newoutput = widgets.Output()
disoutput = widgets.Output()

display(widgets.HBox([newbutton, disbutton]), newoutput, disoutput)
global patientdf 
comorbs={'Chronic heart disease':['N','Y'],'Hypertension':['N','Y'],'Asthma':['N','Y'],'Chronic renal impairment':['N','Y'],'Diabetes Melitus':['N','Y'],'Hematological malignancy':['N','Y'],'Chronic liver disease':['N','Y'],'Chronic lung disease':['N','Y'],'Asplenia':['N','Y']}
patientdf = pd.DataFrame(columns = ["Name","Age","Sex"]+list(normal_values.keys()) + list(comorbs.keys()))

gpc=0
pd.set_option("display.max_columns",None)

def on_button_clicked(b):
    with newoutput:
        global gpc
        
        disoutput.clear_output()
        newoutput.clear_output()
        radiobuttons()
        print("Input Patient Details")
#       print("Patient ID: ", gpc)
        
        NAME = widgets.Label("Patient Name")
        display(NAME)
        nameval = widgets.Text()
        display(nameval)
        
        AGE = widgets.Label("Age")
        display(AGE)
        ageval = widgets.Text()
        display(ageval)
        
        display(widgets.VBox([widgets.HBox(buttons[button]) for button in buttons]))
        submitb = widgets.Button(description="Submit!")
        display(submitb)

        def submit_on_clicked(b):
#             newoutput.clear_output()
            def value(x):
                return (x.value != None)
            
            entry = [nameval.value,ageval.value]
            flagempty = False
            for val in buttons:
                comorbs={'Chronic heart disease':['N','Y'],'Hypertension':['N','Y'],'Asthma':['N','Y'],'Chronic renal impairment':['N','Y'],'Diabetes Melitus':['N','Y'],'Hematological malignancy':['N','Y'],'Chronic liver disease':['N','Y'],'Chronic lung disease':['N','Y'],'Asplenia':['N','Y']}
                if val in ['hsTnl', 'ALT', 'CRP'] or val in comorbs.keys() or val == 'Sex':
                    k = map(value,buttons[val][1:])
                    k = list(k)
                    if k[0]:
                        k = 0
                    elif k[1]:
                        k = 1
                    else:
                        flagempty = True
                else:
                    k = map(value,buttons[val][1:])
                    k = list(k)
                    if k[0]:
                        k = -1
                    elif k[1]:
                        k = 0
                    elif k[2]:
                        k = 1
                    else:
                        flagempty = True
                entry.append(k)
            if flagempty:
                # It seems that once this flag is triggered line 83 throws an error
                print("Must Specify All Values")
                return
            else:
                        
                if os.path.isfile("CachePatients.csv"):
                    dfr = pd.read_csv("CachePatients.csv")
                    gpc = max(dfr.iloc[:,0]) + 1
                else:
                    tempdf = pd.DataFrame(columns = ["Name","Age","Sex"] +list(normal_values.keys()) + list(comorbs.keys()) + ["Status DoA", "Status 4"])
                    tempdf.to_csv('CachePatients.csv')
                
                newoutput.clear_output()
                patientdf.loc[gpc] = entry
                print("The Assigned Patient ID is ", gpc, " with the following details")
                display(patientdf.head())
        #The magic happens here
            categ_feat=list(X_train.columns)
            categ_feat.remove('Age')
            s2 = fsmain(X_train,y_train,1,patientdf,0,categ_feat,2)
            patientdf["Status DoA"] = s2
            s4 = fsmain(X_train,y_train,1,patientdf,0,categ_feat,4)
            patientdf["Status 4"] = s4
            patientdf.to_csv('CachePatients.csv', mode='a', header=False)
            global pcache
            pcache = pd.read_csv('CachePatients.csv')
            
            
        submitb.on_click(submit_on_clicked)

##################### DISCHARGE MODULE ############################
def dis_on_clicked(b):
    if os.path.isfile("CachePatients.csv"):
        pass
    else:
        print("No patient to be discharged")
        return
    
    with disoutput:
        newoutput.clear_output()
        disoutput.clear_output()
        PID = widgets.Label("Patient ID")
        display(PID)
        patid = widgets.Text()
        display(patid)
        
        dsubmitb = widgets.Button(description="Submit!")
        display(dsubmitb)
        def submit_discharge(b):
            disoutput.clear_output()
            if not(int(patid.value) in pcache.index):
                print("A patient with this ID does not exist")
                return
            print("Do you want to discharge: ",pcache.loc[int(patid.value),"Name"])
            YES = widgets.Button(description="Yes")
            NO = widgets.Button(description="No")
            display(widgets.HBox([YES,NO]))
            
            def yclick(b):
                disoutput.clear_output()
                print("Please enter the Final Status of the Patient: ")
                print(pcache.loc[int(patid.value),"Name"])
                print('ACUTE = 0', 'CONV. = 1', 'DEATH = 2', 'HOME = 3')
                OUT = widgets.Label("Final Status")
                display(OUT)
                outval = widgets.Text()
                display(outval)
                CONFIRM = widgets.Button(description="Confirm!")
                display(CONFIRM)
                def cclick(b):
                    elem = pcache[["Age","Sex"]+list(normal_values.keys()) + list(comorbs.keys())].iloc[pcache.index.get_loc(int(patid.value)):pcache.index.get_loc(int(patid.value))+1]
                    tcache = pd.read_csv('X_trd.csv',index_col=0)
                    tcache = tcache.append(elem,ignore_index=True)
                    tcache.to_csv('X_trd.csv',header=True)
                    ytrap(outval)
                    
                CONFIRM.on_click(cclick)
            
            NO.on_click(dis_on_clicked)
                
            YES.on_click(yclick)
            
        dsubmitb.on_click(submit_discharge)
    
disbutton.on_click(dis_on_clicked)
newbutton.on_click(on_button_clicked)

In [None]:
new_pat=X_train.iloc[0:1]
new_pat['ALT']=55
new_pat['Ferritin']=1500

X_train,new_pat,ls_feat=check_features(X_train,new_pat)
new_pat=full_discr_DF(new_pat)


In [None]:

categ_feat=list(X_train.columns)
categ_feat.remove('Age')


print('\n','No smoting')
fsmain(X_train,y_train,0,new_pat,2,categ_feat,2)
# print('Smoting - Discrete (pre smoting) data')
# fsmain(X_train,y_train,1,new_pat,1,categ_feat,2)
print('\n','Smoting - Discrete (post smoting) data')
fsmain(X_train,y_train,1,new_pat,2,categ_feat,2)
print('\n','No smoting')
fsmain(X_train,y_train,0,new_pat,2,categ_feat,4)
# print('Smoting - Discrete (pre smoting) data')
# fsmain(X_train,y_train,1,new_pat,1,categ_feat,4)
print('\n','Smoting - Discrete (post smoting) data')
fsmain(X_train,y_train,1,new_pat,2,categ_feat,4)



In [None]:
# categ_feat=list(X_train.columns)
# categ_feat.remove('Age')


# print('\n','No smoting')
# fsmain(X_train,y_train,0,patientdf,2,categ_feat,2)
# # print('Smoting - Discrete (pre smoting) data')
# # fsmain(X_train,y_train,1,new_pat,1,categ_feat,2)
# print('\n','Smoting - Discrete (post smoting) data')
# fsmain(X_train,y_train,1,patientdf,2,categ_feat,2)
# print('\n','No smoting')
# fsmain(X_train,y_train,0,patientdf,2,categ_feat,4)
# # print('Smoting - Discrete (pre smoting) data')
# # fsmain(X_train,y_train,1,new_pat,1,categ_feat,4)
# print('\n','Smoting - Discrete (post smoting) data')
# fsmain(X_train,y_train,1,patientdf,2,categ_feat,4)