In [None]:
import pandas as pd
import ast
import numpy as np
np.random.seed(123456)

from pm4py.objects.log.util import dataframe_utils
from pm4py.objects.conversion.log import converter as log_converter

In [None]:
# Load trace identifiers and metadata
trace_metadata = pd.read_csv("trace_identifiers.csv")

# Load event log
def convert(x):
    try:
        x = x.replace("nan", "None")
        return ast.literal_eval(x)   
    except Exception as e:
        print(e)        
        return d
    
log_csv = pd.read_csv("traces.csv", sep=',')
log_csv = dataframe_utils.convert_timestamp_columns_in_df(log_csv)
log_csv = log_csv.sort_values("time:timestamp")
log_csv["metadata"] = log_csv["metadata"].apply(lambda x: convert(str(x)))
event_log = log_converter.apply(log_csv)

In [None]:
trace_metadata["label"] = 0

for idx, row in trace_metadata.iterrows():
    subject_id = row.subject_id
    expire_flag = row.expire_flag
    pi_hadm_id = row.pi_hadm_id
    
    if expire_flag == 1:
        trace_oi = None
        for trace in event_log:
            if trace.attributes["concept:name"] == subject_id:
                trace_oi = trace
                break # Now the correct trace has been found. Break the loop and continue from there on trace_oi
        
        last_event = trace_oi[-1]             
        if last_event["concept:name"] == "Discharge":
            if last_event["metadata"]["admission_id"] == pi_hadm_id and last_event["metadata"]["discharge_location"] == "DEAD/EXPIRED":
                trace_metadata["label"].iloc[idx] = 1
            else:
                trace_metadata["label"].iloc[idx] = 0
                    
        elif last_event["concept:name"] != "Discharge":
            for event in trace_oi:
                if event["concept:name"] == "Discharge":
                    if event["metadata"]["admission_id"] == pi_hadm_id:
                        if event["metadata"]["discharge_location"] == "DEAD/EXPIRED":
                            trace_metadata["label"].iloc[idx] = 1
                    else:
                        trace_metadata["label"].iloc[idx] = 0
        else:
            print("There are events with no Discharge")

    else:
        trace_metadata["label"].iloc[idx] = 0

In [None]:
rows = []
for trace in event_log:
    trace_identifier = trace.attributes["concept:name"]
    #print(trace_identifier)
    trace_meta = trace_metadata[trace_metadata.subject_id == trace_identifier]
    #print(len(trace_meta))

    if len(trace_meta) > 0:
        trace_meta = trace_meta.iloc[0]
        #print("Trace of patient", trace_identifier)    
        row = dict()
        row["subject_id"] = trace_identifier
        row["pi_hadm_id"] = trace_meta.pi_hadm_id
        row["gender"] = trace_meta.gender
        row["AliveDeadCensor"] = trace_meta.label
    
        sodium_values = []
        hemoglobin_values = []
        ptt_values = []
        potassium_values = []
        blood_urea_nitrogen_values = []
        bicarbonate_values = []
        chloride_values = []
        creatinine_values = []
        glucose_values = []
        anion_gap_values = []
        lactate_values = []
        bilirubin_values = []
        hematocrit_values = []
        wbc_values = []
        platelet_values = []
        pt_values = []
        inr_values = []
        albumin_values = []

        for event in trace:
            if "hadm_id" in event["metadata"].keys():
                if event["metadata"]["hadm_id"] == trace_meta.pi_hadm_id:
                
                    if event["concept:name"] == "Lab" and event["metadata"]["loinc"] == "2951-2":
                        #print(event["metadata"]["value"])
                        sodium_values.append(float(event["metadata"]["value"]))
            
                    if event["concept:name"] == "Lab" and event["metadata"]["loinc"] == "718-7":
                        if "UNABLE TO REPORT" not in event["metadata"]["value"]:
                            if "DONE" not in event["metadata"]["value"]:
                                #print(event["metadata"]["value"])
                                hemoglobin_values.append(float(event["metadata"]["value"]))
        
                    if event["concept:name"] == "Lab" and event["metadata"]["loinc"] == "3173-2":
                        if ">" not in event["metadata"]["value"]:
                            if "ERROR" not in event["metadata"]["value"]:
                                if "150 IS HIGHEST MEASURED PTT" not in event["metadata"]["value"]:
                                    #print(event["metadata"]["value"])
                                    ptt_values.append(float(event["metadata"]["value"]))
                    
                    if event["concept:name"] == "Lab" and event["metadata"]["loinc"] == "2823-3":
                        if "GREATER THAN 10" not in event["metadata"]["value"]:
                            #print(event["metadata"]["value"])
                            potassium_values.append(float(event["metadata"]["value"]))
                            
                    if event["concept:name"] == "Lab" and event["metadata"]["loinc"] == "3094-0":
                        #print(event["metadata"]["value"])
                        blood_urea_nitrogen_values.append(float(event["metadata"]["value"]))
                        
                    if event["concept:name"] == "Lab" and event["metadata"]["loinc"] == "1963-8":
                        if "LESS THAN 5" not in event["metadata"]["value"]:
                            if "GREATER THAN 50" not in event["metadata"]["value"]:
                                #print(event["metadata"]["value"])
                                bicarbonate_values.append(float(event["metadata"]["value"]))
                    
                    if event["concept:name"] == "Lab" and event["metadata"]["loinc"] == "2075-0":
                        #print(event["metadata"]["value"])
                        chloride_values.append(float(event["metadata"]["value"]))
                        
                    if event["concept:name"] == "Lab" and event["metadata"]["loinc"] == "2160-0":
                        if "LESS THAN 0.2" not in event["metadata"]["value"]:
                            if "LESS THAN 0.5" not in event["metadata"]["value"]:
                                if "LESS THAN 0.3" not in event["metadata"]["value"]:
                                    if "<0.3 CONFIRMED BY DILUTION" not in event["metadata"]["value"]:
                                        if "LESS THAN 0.4" not in event["metadata"]["value"]:
                                            #print(event["metadata"]["value"])
                                            creatinine_values.append(float(event["metadata"]["value"]))
                        
                    if event["concept:name"] == "Lab" and event["metadata"]["loinc"] == "2345-7":
                        if ">1000" not in event["metadata"]["value"]:
                            if "GREATER THAN 750" not in event["metadata"]["value"]:
                                #print(event["metadata"]["value"])
                                glucose_values.append(float(event["metadata"]["value"]))
                        
                    if event["concept:name"] == "Lab" and event["metadata"]["loinc"] == "1863-0":
                        if event["metadata"]["value"] is not None:
                            #print(event["metadata"]["value"])
                            anion_gap_values.append(float(event["metadata"]["value"]))
                        
                    if event["concept:name"] == "Lab" and event["metadata"]["loinc"] == "2532-0":
                        #print(event["metadata"]["value"])
                        lactate_values.append(float(event["metadata"]["value"]))
                        
                    if event["concept:name"] == "Lab" and event["metadata"]["loinc"] == "1975-2":
                        #print(event["metadata"]["value"])
                        bilirubin_values.append(float(event["metadata"]["value"]))
                        
                    if event["concept:name"] == "Lab" and event["metadata"]["loinc"] == "4544-3":
                        #print(event["metadata"]["value"])
                        hematocrit_values.append(float(event["metadata"]["value"]))
                        
                    if event["concept:name"] == "Lab" and event["metadata"]["loinc"] == "804-5":
                        if "<0.1" not in event["metadata"]["value"]:
                            #print(event["metadata"]["value"])
                            wbc_values.append(float(event["metadata"]["value"]))
                        
                    if event["concept:name"] == "Lab" and event["metadata"]["loinc"] == "777-3":
                        if "<5" not in event["metadata"]["value"]:
                            #print(event["metadata"]["value"])
                            platelet_values.append(float(event["metadata"]["value"]))
                        
                    if event["concept:name"] == "Lab" and event["metadata"]["loinc"] == "5902-2":
                        if "ERROR" not in event["metadata"]["value"]:
                            if ">100" not in event["metadata"]["value"]:
                                if "HEMOLYZED, SLIGHTLY" not in event["metadata"]["value"]:
                                    #print(event["metadata"]["value"])
                                    pt_values.append(float(event["metadata"]["value"]))
                        
                    if event["concept:name"] == "Lab" and event["metadata"]["loinc"] == "5895-7":
                        if "ERROR" not in event["metadata"]["value"]:
                            if ">22.8" not in event["metadata"]["value"]:
                                if "GREATER THAN 15.7" not in event["metadata"]["value"]:
                                    if ">15.7" not in event["metadata"]["value"]:
                                        if ">21.8" not in event["metadata"]["value"]:
                                            if ">63" not in event["metadata"]["value"]:
                                                if ">20.2" not in event["metadata"]["value"]:
                                                    if ">66.1" not in event["metadata"]["value"]:
                                                        #print(event["metadata"]["value"])
                                                        inr_values.append(float(event["metadata"]["value"]))
                        
                    if event["concept:name"] == "Lab" and event["metadata"]["loinc"] == "1751-7":
                        if "LESS THAN 1.0" not in event["metadata"]["value"]:
                            if "<1.0" not in event["metadata"]["value"]:
                                #print(event["metadata"]["value"])
                                albumin_values.append(float(event["metadata"]["value"]))
                                    
                
            if "admission_id" in event["metadata"].keys():
                if event["metadata"]["admission_id"]  == trace_meta.pi_hadm_id:
                    #print(event)
 
                    if event["concept:name"] == "Lab" and event["metadata"]["loinc"] == "2951-2":
                        #print(event["metadata"]["value"])
                        sodium_values.append(float(event["metadata"]["value"]))
            
                    if event["concept:name"] == "Lab" and event["metadata"]["loinc"] == "718-7":
                        if "UNABLE TO REPORT" not in event["metadata"]["value"]:
                            if "DONE" not in event["metadata"]["value"]:
                                #print(event["metadata"]["value"])
                                hemoglobin_values.append(float(event["metadata"]["value"]))
        
                    if event["concept:name"] == "Lab" and event["metadata"]["loinc"] == "3173-2":
                        if ">" not in event["metadata"]["value"]:
                            if "ERROR" not in event["metadata"]["value"]:
                                if "150 IS HIGHEST MEASURED PTT" not in event["metadata"]["value"]:
                                    #print(event["metadata"]["value"])
                                    ptt_values.append(float(event["metadata"]["value"]))
                                    
                    if event["concept:name"] == "Lab" and event["metadata"]["loinc"] == "2823-3":
                        if "GREATER THAN 10" not in event["metadata"]["value"]:
                            #print(event["metadata"]["value"])
                            potassium_values.append(float(event["metadata"]["value"]))
                            
                    if event["concept:name"] == "Lab" and event["metadata"]["loinc"] == "3094-0":
                        #print(event["metadata"]["value"])
                        blood_urea_nitrogen_values.append(float(event["metadata"]["value"]))
                        
                    if event["concept:name"] == "Lab" and event["metadata"]["loinc"] == "1963-8":
                        if "LESS THAN 5" not in event["metadata"]["value"]:
                            if "GREATER THAN 50" not in event["metadata"]["value"]:
                                #print(event["metadata"]["value"])
                                bicarbonate_values.append(float(event["metadata"]["value"]))
                    
                    if event["concept:name"] == "Lab" and event["metadata"]["loinc"] == "2075-0":
                        #print(event["metadata"]["value"])
                        chloride_values.append(float(event["metadata"]["value"]))
                        
                    if event["concept:name"] == "Lab" and event["metadata"]["loinc"] == "2160-0":
                        if "LESS THAN 0.2" not in event["metadata"]["value"]:
                            if "LESS THAN 0.5" not in event["metadata"]["value"]:
                                if "LESS THAN 0.3" not in event["metadata"]["value"]:
                                    if "<0.3 CONFIRMED BY DILUTION" not in event["metadata"]["value"]:
                                        if "LESS THAN 0.4" not in event["metadata"]["value"]:
                                            #print(event["metadata"]["value"])
                                            creatinine_values.append(float(event["metadata"]["value"]))
                        
                    if event["concept:name"] == "Lab" and event["metadata"]["loinc"] == "2345-7":
                        if ">1000" not in event["metadata"]["value"]:
                            if "GREATER THAN 750" not in event["metadata"]["value"]:
                                #print(event["metadata"]["value"])
                                glucose_values.append(float(event["metadata"]["value"]))
                        
                    if event["concept:name"] == "Lab" and event["metadata"]["loinc"] == "1863-0":
                        if event["metadata"]["value"] is not None:
                            #print(event["metadata"]["value"])
                            anion_gap_values.append(float(event["metadata"]["value"]))
            
                    if event["concept:name"] == "Lab" and event["metadata"]["loinc"] == "2532-0":
                        #print(event["metadata"]["value"])
                        lactate_values.append(float(event["metadata"]["value"]))
                        
                    if event["concept:name"] == "Lab" and event["metadata"]["loinc"] == "1975-2":
                        #print(event["metadata"]["value"])
                        bilirubin_values.append(float(event["metadata"]["value"]))
                        
                    if event["concept:name"] == "Lab" and event["metadata"]["loinc"] == "4544-3":
                        #print(event["metadata"]["value"])
                        hematocrit_values.append(float(event["metadata"]["value"]))
                        
                    if event["concept:name"] == "Lab" and event["metadata"]["loinc"] == "804-5":
                        if "<0.1" not in event["metadata"]["value"]:
                            #print(event["metadata"]["value"])
                            wbc_values.append(float(event["metadata"]["value"]))
                        
                    if event["concept:name"] == "Lab" and event["metadata"]["loinc"] == "777-3":
                        if "<5" not in event["metadata"]["value"]:
                            #print(event["metadata"]["value"])
                            platelet_values.append(float(event["metadata"]["value"]))
                        
                    if event["concept:name"] == "Lab" and event["metadata"]["loinc"] == "5902-2":
                        if "ERROR" not in event["metadata"]["value"]:
                            if ">100" not in event["metadata"]["value"]:
                                if "HEMOLYZED, SLIGHTLY" not in event["metadata"]["value"]:
                                    #print(event["metadata"]["value"])
                                    pt_values.append(float(event["metadata"]["value"]))
                        
                    if event["concept:name"] == "Lab" and event["metadata"]["loinc"] == "5895-7":
                        if "ERROR" not in event["metadata"]["value"]:
                            if ">22.8" not in event["metadata"]["value"]:
                                if "GREATER THAN 15.7" not in event["metadata"]["value"]:
                                    if ">15.7" not in event["metadata"]["value"]:
                                        if ">21.8" not in event["metadata"]["value"]:
                                            if ">63" not in event["metadata"]["value"]:
                                                if ">20.2" not in event["metadata"]["value"]:
                                                    if ">66.1" not in event["metadata"]["value"]:
                                                        #print(event["metadata"]["value"])
                                                        inr_values.append(float(event["metadata"]["value"]))
                        
                    if event["concept:name"] == "Lab" and event["metadata"]["loinc"] == "1751-7":
                        if "LESS THAN 1.0" not in event["metadata"]["value"]:
                            if "<1.0" not in event["metadata"]["value"]:
                                #print(event["metadata"]["value"])
                                albumin_values.append(float(event["metadata"]["value"]))                
            
     
        row["sodium"] = np.mean(sodium_values)
        row["hemoglobin"] = np.mean(hemoglobin_values)
        row["ptt"] = np.mean(ptt_values)
        row["potassium"] = np.mean(potassium_values)
        row["blood_urea_nitrogen"] = np.mean(blood_urea_nitrogen_values)
        row["bicarbonate"] = np.mean(bicarbonate_values)
        row["chloride"] = np.mean(chloride_values)
        row["creatinine"] = np.mean(creatinine_values)
        row["glucose"] = np.mean(glucose_values)
        row["anion_gap"] = np.mean(anion_gap_values)
        row["lactate"] = np.mean(lactate_values)
        row["bilirubin"] = np.mean(bilirubin_values)
        row["hematocrit"] = np.mean(hematocrit_values)
        row["wbc"] = np.mean(wbc_values)
        row["platelet"] = np.mean(platelet_values)
        row["pt"] = np.mean(pt_values)
        row["inr"] = np.mean(inr_values)
        row["albumin"] = np.mean(albumin_values)
        rows.append(row)
        #print()

In [None]:
df = pd.DataFrame(rows)
df.to_csv("new_dataframe.csv", index=None)
#print(df)

In [None]:
import seaborn as sb
import matplotlib.pyplot as plt
import sklearn

from pandas import Series, DataFrame
from pylab import rcParams
from sklearn import preprocessing

In [None]:
from sklearn.metrics import roc_auc_score
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import scale
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_predict

from sklearn import metrics
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score, recall_score

In [None]:
Address = 'C:/Users/jeann/Desktop/IE 392 File/new_dataframe.csv'
IE_data = pd.read_csv(Address)

IE_data.columns = ['subject_id', 'pi_hadm_id','gender', 'AliveDeadcensor','sodium', 'hemoglobin', 'ptt', 'potassium',
                   'blood_urea_nitrogen', 'bicarbonate', 'chloride', 'creatinine', 'glucose', 'anion_gap', 'lactate',
                   'bilirubin', 'hematocrit', 'wbc', 'platelet', 'pt', 'inr', 'albumin']
#IE_data

In [None]:
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
gender_m = IE_data['gender']
gender_encoded = label_encoder.fit_transform(gender_m)

In [None]:
# 1 = male/ 0 = female
gender_DF = pd.DataFrame(gender_encoded, columns=['Male_gender'])
print(gender_DF)

In [None]:
IE_data.drop(['gender'], axis=1, inplace=True)

In [None]:
IE_new = pd.concat([IE_data, gender_DF], axis=1, verify_integrity=True).astype(float)
#IE_new

In [None]:
IE_new.info()

In [None]:
IE_new.describe()

In [None]:
IE_new["ptt"].fillna(38.293761, inplace=True)

In [None]:
IE_new["lactate"].fillna(372.152025, inplace=True)

In [None]:
IE_new["bilirubin"].fillna(2.322692, inplace=True)

In [None]:
IE_new["pt"].fillna(16.258176, inplace=True)

In [None]:
IE_new["inr"].fillna(1.514620, inplace=True)

In [None]:
IE_new["albumin"].fillna(2.873788, inplace=True)

In [None]:
IE_new.info()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(IE_new.drop('AliveDeadcensor', axis=1),
                                                    IE_new['AliveDeadcensor'], test_size=0.2,
                                                    random_state=200)

In [None]:
print(X_train.shape)
print(y_train.shape)
y_train

In [None]:
LogReg = LogisticRegression(random_state= 123456, max_iter = 1000)
LogReg.fit(X_train, y_train)

y_pred = LogReg.predict(X_test)

roc_auc_score(y_test, y_pred)

In [None]:
from sklearn.tree import DecisionTreeRegressor

DecisionTreeRegModel = DecisionTreeRegressor(random_state= 123456)
DecisionTreeRegModel.fit(X_train, y_train)

y_pred2 = DecisionTreeRegModel.predict(X_test)

roc_auc_score(y_test, y_pred2)

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification

clf = RandomForestClassifier(random_state= 123456)
clf.fit(X_train,y_train)
y_pred3 =clf.predict(X_test)

roc_auc_score(y_test, y_pred3)

In [None]:
from sklearn import svm

clf = svm.LinearSVC(random_state= 123456, max_iter = 1000000)
clf.fit(X_train,y_train)
y_pred4 = clf.predict(X_test)

roc_auc_score(y_test, y_pred4)