In [None]:
import pandas as pd
import numpy as np

df1 = pd.read_csv('your_data.csv')
df1.shape

In [None]:
#count the percent of your labels

df1['mort1y'].value_counts(normalize=True) * 100

In [None]:
#do you have any NA

df1.isna().sum().sum()

In [None]:
#split the labels from the data

target_name = "mort1y"

df = df1.drop(columns=[target_name])
df_target = df1[target_name]

In [None]:
#split the categorical variables from the numerical variables
#the categorical variables

cate_df = df.select_dtypes(include= 'object')
cate_df

In [None]:
#the numerical variables

numb_df = df.select_dtypes(include= [np.number])
numb_df

The Numeric Values

In [None]:
# summarize the number of rows with missing values for each column

percent_missing = numb_df.isnull().sum() * 100 / len(numb_df)

missing_value_df = pd.DataFrame({'percent_missing': percent_missing})

missing_value_df.sort_values('percent_missing', inplace=True)

missing_value_df

In [None]:
#Normalization of data

from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
norm_numb_df = pd.DataFrame(scaler.fit_transform(numb_df), columns = numb_df.columns)
norm_numb_df.head(5)

In [None]:
# define the imputer
# using the KNN methos 
#you can change the number of neighbors to any number (n_neighbors)

from sklearn.impute import KNNImputer

imputer = KNNImputer(n_neighbors = 5, weights='uniform', metric='nan_euclidean')

In [None]:
new_numb_df = pd.DataFrame(imputer.fit_transform(norm_numb_df),columns = norm_numb_df.columns)
new_numb_df

In [None]:
#count NA after the transformation

new_numb_df.isna().sum().sum()

The Categorical Values

In [None]:
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

In [None]:
# summarize the number of rows with missing values for each column

percent_missing1 = cate_df.isnull().sum() * 100 / len(numb_df)

missing_value_df1 = pd.DataFrame({'percent_missing': percent_missing1})

missing_value_df1.sort_values('percent_missing', inplace=True)

missing_value_df1

In [None]:
#convert the categorical data into dummies variables

encode_cate_df = pd.get_dummies(cate_df, columns=cate_df.columns)
encode_cate_df.head(5)

In [None]:
new_cate_df = pd.DataFrame(imputer.fit_transform(encode_cate_df),columns = encode_cate_df.columns)
new_cate_df.head(5)

In [None]:
#count after the transformation

new_cate_df.isna().sum().sum()

Merging the Data 

In [None]:
new_cate_df.shape , new_numb_df.shape

In [None]:
new_df_x = pd.concat([new_cate_df, new_numb_df.reindex(new_cate_df.index)], axis=1)
new_df_x.shape

In [None]:
new_df = pd.concat([new_df_x, df_target.reindex(new_df_x.index)], axis=1)
new_df.shape

In [None]:
new_df.head(5)

In [None]:
new_df.isna().sum().sum()

In [None]:
# Import modules
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils

# for modeling
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.callbacks import EarlyStopping

#metrics
from sklearn import metrics
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve, auc
from sklearn.metrics import matthews_corrcoef

#plots
import seaborn as sns
import matplotlib.pyplot as plt

Prepare the data for the machine learning models

In [None]:
from sklearn.model_selection import train_test_split

target_name = "mort1y"

X = new_df.drop(columns=[target_name])
y = new_df[target_name]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.2, random_state=0)

SVM

In [None]:
from sklearn import svm

#Create a svm Classifier
clf = svm.SVC(kernel='linear') 

#Train the model using the training sets
clf.fit(X_train, y_train)

#Predict the response for test dataset
svm_pred = clf.predict(X_test)

In [None]:
print(classification_report(y_test, svm_pred))
print(confusion_matrix(y_test, svm_pred))

In [None]:
svm = confusion_matrix(y_test, svm_pred)
sns.heatmap(svm, annot=True, cmap="Paired_r", linewidth=2, linecolor='w', fmt='.0f')
plt.xlabel('Predicted Value')
plt.ylabel('True Value')
plt.show()

In [None]:
print("ROC AUC score for undersampled data: ", roc_auc_score(y_test, svm_pred))

In [None]:
matthews_corrcoef(y_test, svm_pred)

Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import RocCurveDisplay
from sklearn.datasets import load_wine


rfc = RandomForestClassifier(bootstrap= True, max_depth= 10, max_features= 'sqrt', n_estimators= 15, random_state=42)
rfc.fit(X_train, y_train)
rfc_predictions = rfc.predict(X_test)

In [None]:
print(classification_report(y_test, rfc_predictions))
print(confusion_matrix(y_test, rfc_predictions))

In [None]:
rf = confusion_matrix(y_test, rfc_predictions)
sns.heatmap(rf, annot=True, cmap="Paired_r", linewidth=2, linecolor='w', fmt='.0f')
plt.xlabel('Predicted Value')
plt.ylabel('True Value')
plt.show()

In [None]:
print("ROC AUC score for undersampled data: ", roc_auc_score(y_test, rfc_predictions))

In [None]:
matthews_corrcoef(y_test, rfc_predictions)

Decision Tree

In [None]:
from sklearn.tree import DecisionTreeClassifier

model = DecisionTreeClassifier(criterion= 'gini', max_depth= 2, min_samples_leaf= 50)
model.fit(X_train, y_train)
predictions = model.predict(X_test)

In [None]:
print(classification_report(y_test, predictions))
print(confusion_matrix(y_test, predictions))

In [None]:
dt = confusion_matrix(y_test, predictions)
sns.heatmap(dt, annot=True, cmap="Paired_r", linewidth=2, linecolor='w', fmt='.0f')
plt.xlabel('Predicted Value')
plt.ylabel('True Value')
plt.show()

In [None]:
print("ROC AUC score for undersampled data: ", roc_auc_score(y_test, predictions))

In [None]:
matthews_corrcoef(y_test, predictions)

XgBoost Classifier

In [None]:
import xgboost as xgb
from xgboost import XGBClassifier

xgb_model = xgb.XGBClassifier(base_score=0.5, booster='gbtree', callbacks=None,
              colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1,
              early_stopping_rounds=None, enable_categorical=False,
              eval_metric=None, gamma=0, gpu_id=-1, grow_policy='depthwise',
              importance_type=None, interaction_constraints='',
              learning_rate=0.1, max_bin=256, max_cat_to_onehot=4,
              max_delta_step=0, max_depth=3, max_leaves=0, min_child_weight=1,
              monotone_constraints='()', n_estimators=140,
              n_jobs=4, nthread=4, num_parallel_tree=1, predictor='auto',
              random_state=42, reg_alpha=0)

xgb_model.fit(X_train, y_train)
predicted_y = xgb_model.predict(X_test)

In [None]:
print(metrics.classification_report(y_test, predicted_y))
print(metrics.confusion_matrix(y_test, predicted_y))

In [None]:
xgb = confusion_matrix(y_test, predicted_y)
sns.heatmap(xgb, annot=True, cmap="Paired_r", linewidth=2, linecolor='w', fmt='.0f')
plt.xlabel('Predicted Value')
plt.ylabel('True Value')
plt.show()

In [None]:
print("ROC AUC score for undersampled data: ", roc_auc_score(y_test, predicted_y))

In [None]:
matthews_corrcoef(y_test, predicted_y)

GradientBoostingClassifier

In [None]:
from sklearn.ensemble import GradientBoostingClassifier

gbc = GradientBoostingClassifier(learning_rate= 0.1, loss= 'log_loss', max_depth= 20, n_estimators= 10)
gbc.fit(X_train, y_train)

gbc_predicted_y = gbc.predict(X_test)

In [None]:
print(metrics.classification_report(y_test, gbc_predicted_y))
print(metrics.confusion_matrix(y_test, gbc_predicted_y))

In [None]:
gbc = confusion_matrix(y_test, gbc_predicted_y)
sns.heatmap(gbc, annot=True, cmap="Paired_r", linewidth=2, linecolor='w', fmt='.0f')
plt.xlabel('Predicted Value')
plt.ylabel('True Value')
plt.show()

In [None]:
print("ROC AUC score for undersampled data: ", roc_auc_score(y_test, gbc_predicted_y))

In [None]:
matthews_corrcoef(y_test, gbc_predicted_y)

AUC Curve

In [None]:
SVM_fpr, SVM_tpr, threshold = roc_curve(y_test, svm_pred)
auc_SVM = auc(SVM_fpr, SVM_tpr)

RF_fpr, RF_tpr, threshold = roc_curve(y_test, rfc_predictions)
auc_RF = auc(RF_fpr, RF_tpr)

DT_fpr, DT_tpr, threshold = roc_curve(y_test, predictions)
auc_DT = auc(DT_fpr, DT_tpr)

XGB_fpr, XGB_tpr, threshold = roc_curve(y_test, predicted_y)
auc_XGB = auc(XGB_fpr, XGB_tpr)

GBC_fpr, GBC_tpr, threshold = roc_curve(y_test, gbc_predicted_y)
auc_GBC = auc(GBC_fpr, GBC_tpr)

plt.figure(figsize=(5, 5), dpi=100)

#plt.plot(NN_fpr,  NN_tpr,  linestyle='-', label='NN (auc = %0.3f)'  % auc_NN )
plt.plot(SVM_fpr, SVM_tpr, linestyle='-', label='SVM (auc = %0.3f)' % auc_SVM)
plt.plot(RF_fpr,  RF_tpr,  marker='.',    label='RF (auc = %0.3f)'  % auc_RF )
plt.plot(DT_fpr,  DT_tpr,  marker='.',    label='DT (auc = %0.3f)'  % auc_DT )
plt.plot(XGB_fpr, XGB_tpr, marker='.',    label='XGB (auc = %0.3f)' % auc_XGB)
plt.plot(GBC_fpr, GBC_tpr, marker='.',    label='GBC (auc = %0.3f)' % auc_GBC)

plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')

plt.legend()

plt.show()
plt.savefig('bfs.jpg')

Imbalanced Data

Under-Sampling

In [None]:
# import random undersampling 

from collections import Counter
from imblearn.under_sampling import RandomUnderSampler

In [None]:
new_df.shape

In [None]:
# summarize class distribution
print("Before undersampling: ", Counter(y_train))

# define undersampling strategy
undersample = RandomUnderSampler(sampling_strategy='majority')

# fit and apply the transform
X_train_under, y_train_under = undersample.fit_resample(X_train, y_train)

# summarize class distribution
print("After undersampling: ", Counter(y_train_under))

SVM

In [None]:
from sklearn import svm

#Create a svm Classifier
clf = svm.SVC(kernel='linear') 

#Train the model using the training sets
clf.fit(X_train_under, y_train_under)

#Predict the response for test dataset
svm_pred_under = clf.predict(X_test)

In [None]:
print(classification_report(y_test, svm_pred_under))
print(confusion_matrix(y_test, svm_pred_under))

In [None]:
svm = confusion_matrix(y_test, svm_pred_under)
sns.heatmap(svm, annot=True, cmap="Paired_r", linewidth=2, linecolor='w', fmt='.0f')
plt.xlabel('Predicted Value')
plt.ylabel('True Value')
plt.show()

In [None]:
print("ROC AUC score for undersampled data: ", roc_auc_score(y_test, svm_pred_under))

In [None]:
matthews_corrcoef(y_test, svm_pred_under)

Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import RocCurveDisplay
from sklearn.datasets import load_wine


rfc = RandomForestClassifier(bootstrap= True, max_depth= 10, max_features= 'sqrt', n_estimators= 15, random_state=42)
rfc.fit(X_train_under, y_train_under)
rfc_predictions_under = rfc.predict(X_test)

In [None]:
print(classification_report(y_test, rfc_predictions_under))
print(confusion_matrix(y_test, rfc_predictions_under))

In [None]:
rf = confusion_matrix(y_test, rfc_predictions_under)
sns.heatmap(rf, annot=True, cmap="Paired_r", linewidth=2, linecolor='w', fmt='.0f')
plt.xlabel('Predicted Value')
plt.ylabel('True Value')
plt.show()

In [None]:
print("ROC AUC score for undersampled data: ", roc_auc_score(y_test, rfc_predictions_under))

In [None]:
matthews_corrcoef(y_test, rfc_predictions_under)

Decision Tree

In [None]:
from sklearn.tree import DecisionTreeClassifier

model = DecisionTreeClassifier(criterion= 'gini', max_depth= 2, min_samples_leaf= 50)
model.fit(X_train_under, y_train_under)
dt_pred_under = model.predict(X_test)

In [None]:
print(classification_report(y_test, dt_pred_under))
print(confusion_matrix(y_test, dt_pred_under))

In [None]:
dt = confusion_matrix(y_test, dt_pred_under)
sns.heatmap(dt, annot=True, cmap="Paired_r", linewidth=2, linecolor='w', fmt='.0f')
plt.xlabel('Predicted Value')
plt.ylabel('True Value')
plt.show()

In [None]:
print("ROC AUC score for undersampled data: ", roc_auc_score(y_test, dt_pred_under))

In [None]:
matthews_corrcoef(y_test, dt_pred_under)

XGBoost Classifier

In [None]:
import xgboost as xgb
from xgboost import XGBClassifier
from sklearn import metrics

xgb_model = xgb.XGBClassifier(base_score=0.5, booster='gbtree', callbacks=None,
              colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1,
              early_stopping_rounds=None, enable_categorical=False,
              eval_metric=None, gamma=0, gpu_id=-1, grow_policy='depthwise',
              importance_type=None, interaction_constraints='',
              learning_rate=0.1, max_bin=256, max_cat_to_onehot=4,
              max_delta_step=0, max_depth=3, max_leaves=0, min_child_weight=1,
              monotone_constraints='()', n_estimators=140,
              n_jobs=4, nthread=4, num_parallel_tree=1, predictor='auto',
              random_state=42, reg_alpha=0)

xgb_model.fit(X_train_under, y_train_under)
predicted_y_under = xgb_model.predict(X_test)

In [None]:
print(metrics.classification_report(y_test, predicted_y_under))
print(metrics.confusion_matrix(y_test, predicted_y_under))

In [None]:
xgb = confusion_matrix(y_test, predicted_y_under)
sns.heatmap(xgb, annot=True, cmap="Paired_r", linewidth=2, linecolor='w', fmt='.0f')
plt.xlabel('Predicted Value')
plt.ylabel('True Value')
plt.show()

In [None]:
print("ROC AUC score for undersampled data: ", roc_auc_score(y_test, predicted_y_under))

In [None]:
matthews_corrcoef(y_test, predicted_y_under)

GradientBoostClassifier

In [None]:
from sklearn.ensemble import GradientBoostingClassifier

gbc = GradientBoostingClassifier(learning_rate= 0.1, loss= 'log_loss', max_depth= 20, n_estimators= 10)
gbc.fit(X_train_under, y_train_under)

gbc_predicted_y_under = gbc.predict(X_test)

In [None]:
print(metrics.classification_report(y_test, gbc_predicted_y_under))
print(metrics.confusion_matrix(y_test, gbc_predicted_y_under))

In [None]:
gbc = confusion_matrix(y_test, gbc_predicted_y_under)
sns.heatmap(gbc, annot=True, cmap="Paired_r", linewidth=2, linecolor='w', fmt='.0f')
plt.xlabel('Predicted Value')
plt.ylabel('True Value')
plt.show()

In [None]:
print("ROC AUC score for undersampled data: ", roc_auc_score(y_test, gbc_predicted_y_under))

In [None]:
matthews_corrcoef(y_test, gbc_predicted_y_under)

AUC

In [None]:
SVM_fpr, SVM_tpr, threshold = roc_curve(y_test, svm_pred_under)
auc_SVM = auc(SVM_fpr, SVM_tpr)

RF_fpr, RF_tpr, threshold = roc_curve(y_test, rfc_predictions_under)
auc_RF = auc(RF_fpr, RF_tpr)

DT_fpr, DT_tpr, threshold = roc_curve(y_test, dt_pred_under)
auc_DT = auc(DT_fpr, DT_tpr)

XGB_fpr, XGB_tpr, threshold = roc_curve(y_test, predicted_y_under)
auc_XGB = auc(XGB_fpr, XGB_tpr)

GBC_fpr, GBC_tpr, threshold = roc_curve(y_test, gbc_predicted_y_under)
auc_GBC = auc(GBC_fpr, GBC_tpr)

plt.figure(figsize=(5, 5), dpi=100)

#plt.plot(NN_fpr,  NN_tpr,  linestyle='-', label='NN (auc = %0.3f)'  % auc_NN )
plt.plot(SVM_fpr, SVM_tpr, linestyle='-', label='SVM (auc = %0.3f)' % auc_SVM)
plt.plot(RF_fpr,  RF_tpr,  marker='.',    label='RF (auc = %0.3f)'  % auc_RF )
plt.plot(DT_fpr,  DT_tpr,  marker='.',    label='DT (auc = %0.3f)'  % auc_DT )
plt.plot(XGB_fpr, XGB_tpr, marker='.',    label='XGB (auc = %0.3f)' % auc_XGB)
plt.plot(GBC_fpr, GBC_tpr, marker='.',    label='GBC (auc = %0.3f)' % auc_GBC)

plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')

plt.legend()

plt.show()
plt.savefig('bfs.jpg')

Over-Sampling ==> SMOTE (Synthetic Minority Oversampling Technique))

In [None]:
# import SMOTE oversampling 

from imblearn.over_sampling import SMOTE

In [None]:
# summarize class distribution
print("Before undersampling: ", Counter(y_train))

# define oversampling strategy
SMOTE = SMOTE()

# fit and apply the transform
X_train_smote, y_train_smote = SMOTE.fit_resample(X_train, y_train)

# summarize class distribution
print("After oversampling: ",Counter(y_train_smote))

SVM

In [None]:
from sklearn import svm

#Create a svm Classifier
clf = svm.SVC(kernel='linear') 

#Train the model using the training sets
clf.fit(X_train_smote, y_train_smote)

#Predict the response for test dataset
svm_pred_over = clf.predict(X_test)

In [None]:
print(classification_report(y_test, svm_pred_over))
print(confusion_matrix(y_test, svm_pred_over))

In [None]:
svm = confusion_matrix(y_test, svm_pred_over)
sns.heatmap(svm, annot=True, cmap="Paired_r", linewidth=2, linecolor='w', fmt='.0f')
plt.xlabel('Predicted Value')
plt.ylabel('True Value')
plt.show()

In [None]:
print("ROC AUC score for undersampled data: ", roc_auc_score(y_test, svm_pred_over))

In [None]:
matthews_corrcoef(y_test, svm_pred_over)

Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import RocCurveDisplay
from sklearn.datasets import load_wine


rfc = RandomForestClassifier(bootstrap= True, max_depth= 10, max_features= 'sqrt', n_estimators= 15, random_state=42)
rfc.fit(X_train_smote, y_train_smote)
rfc_predictions_over = rfc.predict(X_test)

In [None]:
print(classification_report(y_test, rfc_predictions_over))
print(confusion_matrix(y_test, rfc_predictions_over))

In [None]:
rf = confusion_matrix(y_test, rfc_predictions_over)
sns.heatmap(rf, annot=True, cmap="Paired_r", linewidth=2, linecolor='w', fmt='.0f')
plt.xlabel('Predicted Value')
plt.ylabel('True Value')
plt.show()

In [None]:
print("ROC AUC score for undersampled data: ", roc_auc_score(y_test, rfc_predictions_over))

In [None]:
matthews_corrcoef(y_test, rfc_predictions_over)

Decision Tree

In [None]:
from sklearn.tree import DecisionTreeClassifier

model = DecisionTreeClassifier(criterion= 'gini', max_depth= 2, min_samples_leaf= 50)
model.fit(X_train_smote, y_train_smote)
dt_pred_over = model.predict(X_test)

In [None]:
print(classification_report(y_test, dt_pred_over))
print(confusion_matrix(y_test, dt_pred_over))

In [None]:
dt = confusion_matrix(y_test, dt_pred_over)
sns.heatmap(dt, annot=True, cmap="Paired_r", linewidth=2, linecolor='w', fmt='.0f')
plt.xlabel('Predicted Value')
plt.ylabel('True Value')
plt.show()

In [None]:
print("ROC AUC score for undersampled data: ", roc_auc_score(y_test, dt_pred_over))

In [None]:
matthews_corrcoef(y_test, dt_pred_over)

XGBoostClassifier

In [None]:
import xgboost as xgb
from xgboost import XGBClassifier
from sklearn import metrics

xgb_model = xgb.XGBClassifier(base_score=0.5, booster='gbtree', callbacks=None,
              colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1,
              early_stopping_rounds=None, enable_categorical=False,
              eval_metric=None, gamma=0, gpu_id=-1, grow_policy='depthwise',
              importance_type=None, interaction_constraints='',
              learning_rate=0.1, max_bin=256, max_cat_to_onehot=4,
              max_delta_step=0, max_depth=3, max_leaves=0, min_child_weight=1,
              monotone_constraints='()', n_estimators=140,
              n_jobs=4, nthread=4, num_parallel_tree=1, predictor='auto',
              random_state=42, reg_alpha=0)

xgb_model.fit(X_train_smote, y_train_smote)
predicted_y_over = xgb_model.predict(X_test)

In [None]:
print(metrics.classification_report(y_test, predicted_y_over))
print(metrics.confusion_matrix(y_test, predicted_y_over))

In [None]:
xgb = confusion_matrix(y_test, predicted_y_over)
sns.heatmap(xgb, annot=True, cmap="Paired_r", linewidth=2, linecolor='w', fmt='.0f')
plt.xlabel('Predicted Value')
plt.ylabel('True Value')
plt.show()

In [None]:
print("ROC AUC score for undersampled data: ", roc_auc_score(y_test, predicted_y_over))

In [None]:
matthews_corrcoef(y_test, predicted_y_over)

GradientBoostClassifier

In [None]:
from sklearn.ensemble import GradientBoostingClassifier

gbc = GradientBoostingClassifier(learning_rate= 0.1, loss= 'log_loss', max_depth= 20, n_estimators= 10)
gbc.fit(X_train_smote, y_train_smote)

gbc_predicted_y_over = gbc.predict(X_test)

In [None]:
print(metrics.classification_report(y_test, gbc_predicted_y_over))
print(metrics.confusion_matrix(y_test, gbc_predicted_y_over))

In [None]:
gbc = confusion_matrix(y_test, gbc_predicted_y_over)
sns.heatmap(gbc, annot=True, cmap="Paired_r", linewidth=2, linecolor='w', fmt='.0f')
plt.xlabel('Predicted Value')
plt.ylabel('True Value')
plt.show()

In [None]:
print("ROC AUC score for undersampled data: ", roc_auc_score(y_test, gbc_predicted_y_under))

In [None]:
matthews_corrcoef(y_test, gbc_predicted_y_under)

AUC

In [None]:
SVM_fpr, SVM_tpr, threshold = roc_curve(y_test, svm_pred_over)
auc_SVM = auc(SVM_fpr, SVM_tpr)

RF_fpr, RF_tpr, threshold = roc_curve(y_test, rfc_predictions_over)
auc_RF = auc(RF_fpr, RF_tpr)

DT_fpr, DT_tpr, threshold = roc_curve(y_test, dt_pred_over)
auc_DT = auc(DT_fpr, DT_tpr)

XGB_fpr, XGB_tpr, threshold = roc_curve(y_test, predicted_y_over)
auc_XGB = auc(XGB_fpr, XGB_tpr)

GBC_fpr, GBC_tpr, threshold = roc_curve(y_test, gbc_predicted_y_over)
auc_GBC = auc(GBC_fpr, GBC_tpr)

plt.figure(figsize=(5, 5), dpi=100)

#plt.plot(NN_fpr,  NN_tpr,  linestyle='-', label='NN (auc = %0.3f)'  % auc_NN )
plt.plot(SVM_fpr, SVM_tpr, linestyle='-', label='SVM (auc = %0.3f)' % auc_SVM)
plt.plot(RF_fpr,  RF_tpr,  marker='.',    label='RF (auc = %0.3f)'  % auc_RF )
plt.plot(DT_fpr,  DT_tpr,  marker='.',    label='DT (auc = %0.3f)'  % auc_DT )
plt.plot(XGB_fpr, XGB_tpr, marker='.',    label='XGB (auc = %0.3f)' % auc_XGB)
plt.plot(GBC_fpr, GBC_tpr, marker='.',    label='GBC (auc = %0.3f)' % auc_GBC)

plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')

plt.legend()

plt.show()
plt.savefig('bfs.jpg')