In [1]:
import pandas as pd
from warnings import simplefilter
simplefilter(action='ignore')
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier
from sklearn.linear_model import RidgeClassifier
import shap
from sklearn.model_selection import KFold, cross_val_score, train_test_split
from sklearn.preprocessing import StandardScaler
import pickle

In [2]:
file_path = 'MIMIC_WITHOUT_OUTLIERS_ITERATIVE_IMPUTATION.csv'
df = pd.read_csv(file_path)

In [3]:
Y = df['DOD_LABEL']

X = df[['AGE','GENDER_M', 'MARITAL_STATUS_DIVORCED', 'MARITAL_STATUS_LIFE PARTNER',
       'MARITAL_STATUS_MARRIED', 'MARITAL_STATUS_SEPARATED',
       'MARITAL_STATUS_SINGLE', 'MARITAL_STATUS_UNKNOWN (DEFAULT)',
       'MARITAL_STATUS_WIDOWED', 'BMI', 'HEART_RATE',
       'FLAG_HEART_RATE_ALARM_LOW', 'FLAG_HEART_RATE_ALARM_HIGH',
       'OXYGEN_SATURATION', 'FLAG_OXYGEN_SATURATION_ALARM_HIGH',
       'FLAG_OXYGEN_SATURATION_ALARM_LOW', 'ARTERIAL_BLOOD_PRESSURE_SYSTOLIC',
       'ARTERIAL_BLOOD_PRESSURE_DIASTOLIC']]

# define the scaler
scaler = StandardScaler()
# fit on the training dataset
scaler.fit(X)
# scale the training dataset
X = scaler.transform(X)

In [4]:
#Train machine learning Models

lr = LogisticRegression(C=0.03359818286283781, multi_class='multinomial',penalty='l2', solver='newton-cg').fit(X,Y)
rc = RidgeClassifier(alpha=0.5).fit(X,Y)
knn = KNeighborsClassifier(algorithm='auto', n_neighbors= 14, p=1, weights='uniform').fit(X,Y)
rf = RandomForestClassifier(criterion='gini', max_depth=12, max_features='sqrt', min_samples_leaf=4, 
                                       min_samples_split=10).fit(X,Y)
svm = SVC(kernel='linear', probability=True, C=0.1, gamma='scale').fit(X,Y)
xgb = XGBClassifier(learning_rate=0.1, max_depth=5, min_child_weight=4, n_estimators=20).fit(X,Y)
dt = DecisionTreeClassifier(criterion='entropy', max_depth=18, max_features='auto', min_samples_leaf=1, min_samples_split=2, 
                              splitter='best').fit(X,Y)

In [5]:
X_features = df[['AGE','GENDER_M', 'MARITAL_STATUS_DIVORCED', 'MARITAL_STATUS_LIFE PARTNER',
       'MARITAL_STATUS_MARRIED', 'MARITAL_STATUS_SEPARATED',
       'MARITAL_STATUS_SINGLE', 'MARITAL_STATUS_UNKNOWN (DEFAULT)',
       'MARITAL_STATUS_WIDOWED', 'BMI', 'HEART_RATE',
       'FLAG_HEART_RATE_ALARM_LOW', 'FLAG_HEART_RATE_ALARM_HIGH',
       'OXYGEN_SATURATION', 'FLAG_OXYGEN_SATURATION_ALARM_HIGH',
       'FLAG_OXYGEN_SATURATION_ALARM_LOW', 'ARTERIAL_BLOOD_PRESSURE_SYSTOLIC',
       'ARTERIAL_BLOOD_PRESSURE_DIASTOLIC']]

In [None]:
#Calculate Shap values Logistic Regression
explainer_lr = shap.LinearExplainer(lr, X, feature_dependence="independent")
shap_values_lr = explainer_lr.shap_values(X)

#Plot Shap Values
shap.summary_plot(shap_values_lr, X, feature_names=X_features.columns)

In [None]:
#Calculate Shap values Ridge Classifier
explainer_rc = shap.LinearExplainer(rc, X, feature_dependence="independent")
shap_values_rc = explainer_rc.shap_values(X)

#Plot shap values
shap.summary_plot(shap_values_rc, X, feature_names=X_features.columns)

In [None]:
#Caluclate Shap Values Random Forest
explainer_rf = shap.Explainer(rf)
shap_values_rf = explainer_rf.shap_values(X)
    
#Plot Shap Values
shap.summary_plot(shap_values_rf, X, feature_names=X_features.columns)

In [None]:
#Calculate and save shap values XGB
explainer_xgb = shap.Explainer(xgb)
shap_values_xgb = explainer_xgb(X)

shap.summary_plot(shap_values_xgb, X, feature_names=X_features.columns)

In [None]:
# Calculate and save shap values for Decision Tree
explainer_dt = shap.Explainer(dt)
shap_values_dt = explainer_dt.shap_values(X)

shap.summary_plot(shap_values_dt, X, feature_names=X_features.columns)

In [None]:
# Calculate and save shap values for KNN

explainer_knn = shap.explainers.Sampling(knn.predict_proba, X)
shap_values_knn = explainer_knn.shap_values(shap.sample(X, 100))

shap.summary_plot(shap_values_knn, X, feature_names=X_features.columns)

In [None]:
# Calculate and save shap values for SVM

explainer_svm = shap.explainers.Sampling(svm.predict_proba, X)
shap_values_svm = explainer_svm.shap_values(shap.sample(X, 100))

shap.summary_plot(shap_values_svm, X, feature_names=X_features.column