In [None]:
#Neonatal sepsis prediction model using Support Vector Machine (linear and non-linear), Logistic regression, K-nearest neighbor, Naïve bayes and Decision tree
#libraries for dataframe
import pandas as pd
from pandas import DataFrame
import numpy as np
from scipy.stats import norm
from sklearn import utils

In [None]:
#libraries for plots
import matplotlib
matplotlib.use('TkAgg')
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
#libraries for preprocessing and validation
from sklearn import preprocessing
import imblearn
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split

In [None]:
#libraries for models
from sklearn.ensemble import RandomForestClassifier
from sklearn import svm
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV

In [None]:
#libraries for evaluation
#Confusion matrix, Accuracy, sensitivity and specificity
from sklearn.metrics import roc_curve, auc
from scipy import stats
from sklearn import metrics
from sklearn.metrics import classification_report, confusion_matrix
from itertools import cycle
from scipy import interp

In [None]:
#Loading the data
data=pd.read_csv('/Users/Helen/Desktop/Thesis/Dennis_Neonatal_Sepsis_D.csv')
data

In [None]:
# Statistical analysis of data
data=pd.read_csv('/Users/Helen/Desktop/Thesis/Dennis_Neonatal_Sepsis_D.csv')
data.describe()

In [None]:
# Get list of categorical variables
s = (data.dtypes == 'object')
object_cols = list(s[s].index)

print("Categorical variables:")
print(object_cols)

In [None]:
#to print the categorical variables
obj_data = data.select_dtypes(include=['object']).copy()
obj_data.head()

In [None]:
#print numeric features
numeric_features = data.select_dtypes(exclude=[object]).columns.values
print (numeric_features)

In [None]:
#to get total number of the values missing in each variable
total = data.isnull().sum().sort_values(ascending=False)
total

In [None]:
#to get the percentage of the missing values in each variable
percent = (data.isnull().sum() / data.isnull().count()).sort_values(ascending=False)
percent

In [None]:
#to get the total number of missing values and percent of the missing values for each variable
missing_data = pd.concat([total, percent], axis=1, keys=['Missing', 'Percent'])
missing_data

In [None]:
#Impute missing values using mean
data['platelet_count'] = data['platelet_count'].fillna(data['platelet_count'].mean())
data['wbc'] = data['wbc'].fillna(data['wbc'].mean())
data['duration_of_labour'] = data['duration_of_labour'].fillna(data['duration_of_labour'].mean())
data['duration_of_ROM'] = data['duration_of_ROM'].fillna(data['duration_of_ROM'].mean())
data['heart_rate'] = data['heart_rate'].fillna(data['heart_rate'].mean())
data['respiratory_rate'] = data['respiratory_rate'].fillna(data['respiratory_rate'].mean())
data['neut_count'] = data['neut_count'].fillna(data['neut_count'].mean())
#data['lym_count'] = data['lym_count'].fillna(data['lym_count'].mean())
#data['mon_count'] = data['mon_count'].fillna(data['mon_count'].mean())
#data['eos_count'] = data['eos_count'].fillna(data['eos_count'].mean())
#data['bas_count'] = data['bas_count'].fillna(data['bas_count'].mean())
#data['rbc'] = data['rbc'].fillna(data['rbc'].mean())

In [None]:
#to count the missing values in each column
data.isnull().sum().sort_values(ascending=False)

In [None]:
#to get a list of column names containing NaNs(missing values)
data.columns[data.isnull().any()]

In [None]:
def get_percentage_missing(series):
    """ Calculates percentage of NaN values in DataFrame
    :param series: Pandas DataFrame object
    :return: float
    """
    num = series.isnull().sum()
    den = len(series)
    return round(num/den, 2)

# Only include columns that contain any NaN values
data_with_any_null_values = data[data.columns[data.isnull().any()].tolist()]

get_percentage_missing(data_with_any_null_values)

In [None]:
list(set(data.dtypes.tolist()))

In [None]:
#to print the numerical variables
data_num = data.select_dtypes(include = ['float64', 'int64'])
data_num.head()

In [None]:
#Plots graph of numeric variables
data_num.hist(figsize=(8, 8));
sv_lab = 'no neontal sepsis'
nsv_lab = 'neonatal sepsis'
sns.set(color_codes=True)
SMALL_SIZE = 10
plt.rc('legend', fontsize=SMALL_SIZE) 
fig, axes = plt.subplots(nrows=3, ncols=4, figsize=(12,12))
ax = sns.distplot(data_num[data_num['neonatal_sepsis']== 1].age_days, bins=20, label=sv_lab, ax=axes[0][0])
ax = sns.distplot(data_num[data_num['neonatal_sepsis']== 0].age_days, bins=20, label=nsv_lab, ax=axes[0][0])
ax.legend()
ax = sns.distplot(data_num[data_num['neonatal_sepsis']== 1].gest_age, bins=20, label=sv_lab, ax=axes[0][1])
ax = sns.distplot(data_num[data_num['neonatal_sepsis']== 0].gest_age, bins=20, label=nsv_lab, ax=axes[0][1])
ax.legend()
ax = sns.distplot(data_num[data_num['neonatal_sepsis']== 1].duration_of_labour, bins=20, label=sv_lab, ax=axes[0][2])
ax = sns.distplot(data_num[data_num['neonatal_sepsis']== 0].duration_of_labour, bins=20, label=nsv_lab, ax=axes[0][2])
ax.legend()
ax = sns.distplot(data_num[data_num['neonatal_sepsis']== 1].duration_of_ROM, bins=20, label=sv_lab, ax=axes[0][3])
ax = sns.distplot(data_num[data_num['neonatal_sepsis']== 0].duration_of_ROM, bins=20, label=nsv_lab, ax=axes[0][3])
ax.legend()
ax = sns.distplot(data_num[data_num['neonatal_sepsis']== 1].weight, bins=20, label=sv_lab, ax=axes[1][0])
ax = sns.distplot(data_num[data_num['neonatal_sepsis']== 0].weight, bins=20, label=nsv_lab, ax=axes[1][0])
ax.legend()
ax = sns.distplot(data_num[data_num['neonatal_sepsis']== 1].temperature, bins=20, label=sv_lab, ax=axes[1][1])
ax = sns.distplot(data_num[data_num['neonatal_sepsis']== 0].temperature, bins=20, label=nsv_lab, ax=axes[1][1])
ax.legend()
ax = sns.distplot(data_num[data_num['neonatal_sepsis']== 1].respiratory_rate, bins=20, label=sv_lab, ax=axes[1][2])
ax = sns.distplot(data_num[data_num['neonatal_sepsis']== 0].respiratory_rate, bins=20, label=nsv_lab, ax=axes[1][2])
ax.legend()
ax = sns.distplot(data_num[data_num['neonatal_sepsis']== 1].heart_rate, bins=20, label=sv_lab, ax=axes[1][3])
ax = sns.distplot(data_num[data_num['neonatal_sepsis']== 0].heart_rate, bins=20, label=nsv_lab, ax=axes[1][3])
ax.legend()
ax = sns.distplot(data_num[data_num['neonatal_sepsis']== 1].wbc, bins=20, label=sv_lab, ax=axes[2][0])
ax = sns.distplot(data_num[data_num['neonatal_sepsis']== 0].wbc, bins=20, label=nsv_lab, ax=axes[2][0])
ax.legend()
ax = sns.distplot(data_num[data_num['neonatal_sepsis']== 1].platelet_count, bins=20, label=sv_lab, ax=axes[2][1])
ax = sns.distplot(data_num[data_num['neonatal_sepsis']== 0].platelet_count, bins=20, label=nsv_lab, ax=axes[2][1])
ax.legend()
plt.xlabel('neut_count', fontsize=14)
ax = sns.kdeplot(data_num[data_num['neonatal_sepsis']== 1].neut_count, bw=0.5, label=sv_lab, ax=axes[2][2])
ax = sns.kdeplot(data_num[data_num['neonatal_sepsis']== 0].neut_count, bw=0.5, label=nsv_lab, ax=axes[2][2])
ax.legend()
#ax = sns.kdeplot(data_num[data_num['neonatal_sepsis']== 1].rbc, bw=0.5, label=sv_lab, ax=axes[2][3])
#ax = sns.kdeplot(data_num[data_num['neonatal_sepsis']== 0].rbc, bw=0.5, label=nsv_lab, ax=axes[2][3])
#plt.xlabel('rbc', fontsize=14)
#ax.legend()
plt.tight_layout()
plt.show()

In [None]:
# Plots graph of categorical variables
color = ['blue', 'green']
fig, axes = plt.subplots(3, 4, figsize=(16, 10))
sns.countplot('neonatal_sepsis', data=data, ax=axes[0, 0], alpha = 0.5)
sns.countplot('gender', hue='neonatal_sepsis', data=data, ax=axes[0, 1], alpha = 0.5)
sns.countplot('maternal_febrile', hue='neonatal_sepsis', data=data, ax=axes[0, 2], alpha = 0.5)
sns.countplot('fever_during_labour', hue='neonatal_sepsis', data=data, ax=axes[0, 3], alpha = 0.5)
sns.countplot('abnormal_vag_discharge', hue='neonatal_sepsis', data=data, ax=axes[1, 0], alpha = 0.5)
sns.countplot('antibiotic_given', hue='neonatal_sepsis', data=data, ax=axes[1, 1], alpha = 0.5)
sns.countplot('place_of_delivery', hue='neonatal_sepsis', data=data, ax=axes[1, 2], alpha = 0.5)
sns.countplot('mode_of_delivery', hue='neonatal_sepsis', data=data, ax=axes[1, 3], alpha = 0.5)
sns.countplot('rupture_of_mem', hue='neonatal_sepsis', data=data, ax=axes[2, 0], alpha = 0.5)
sns.countplot('foul_smelling_liquor', hue='neonatal_sepsis', data=data, ax=axes[2, 1], alpha = 0.5)
sns.countplot('fever', hue='neonatal_sepsis', data=data, ax=axes[2, 2], alpha = 0.5)
sns.countplot('cold_body', hue='neonatal_sepsis', data=data, ax=axes[2, 3], alpha = 0.5)
plt.tight_layout()
plt.show()

In [None]:
# Plots graph of categorical variables
color = ['blue', 'green']
fig, axes = plt.subplots(3, 4, figsize=(16, 10))
sns.countplot('poor_feeding', hue='neonatal_sepsis', data=data, ax=axes[0, 0], alpha = 0.5)
sns.countplot('crying_excessively', hue='neonatal_sepsis', data=data, ax=axes[0, 1], alpha = 0.5)
sns.countplot('weak_cry', hue='neonatal_sepsis', data=data, ax=axes[0, 2], alpha = 0.5)
sns.countplot('lethargy', hue='neonatal_sepsis', data=data, ax=axes[0, 3], alpha = 0.5)
sns.countplot('respiratory_difficulty', hue='neonatal_sepsis', data=data, ax=axes[1, 0], alpha = 0.5)
sns.countplot('respiratory_distress', hue='neonatal_sepsis', data=data, ax=axes[1, 1], alpha = 0.5)
sns.countplot('tachypnoea', hue='neonatal_sepsis', data=data, ax=axes[1, 2], alpha = 0.5)
sns.countplot('apnoea', hue='neonatal_sepsis', data=data, ax=axes[1, 3], alpha = 0.5)
sns.countplot('crp_count', hue='neonatal_sepsis', data=data, ax=axes[2, 0], alpha = 0.5)
sns.countplot('blood_culture', hue='neonatal_sepsis', data=data, ax=axes[2, 1], alpha = 0.5)
plt.tight_layout()
plt.show()

In [None]:
# heatmap matix to represent correlation between neonatal sepsis and other features
fig, ax = plt.subplots(figsize=(8, 6))
# Compute the correlation matrix
correlation = data.select_dtypes(include=['float64','int64']).iloc[:, 1:].corr()
sns.heatmap(correlation, ax=ax, vmax=1, annot =True, annot_kws={'size': 7}, square=True)
plt.xticks(rotation=90)
plt.yticks(rotation=360)
plt.title('Correlation matrix')
plt.tight_layout()
plt.show()

In [None]:
#Numpy’s tril() function to extract Lower Triangle Matrix
np.tril(np.ones(correlation.shape)).astype(np.bool)[0:5,0:5]

In [None]:
# extract lower triangular correlation matrix using pandas’ where() function
data_lt = correlation.where(np.tril(np.ones(correlation.shape)).astype(np.bool))

In [None]:
#upper triangular matrix has NaN and lower triangular matrix has correlation values.
data_lt.iloc[0:5,0:3]

In [None]:
#get lower triangular correlation heatmap
hmap=sns.heatmap(data_lt,annot=True,cmap="Spectral")
plt.show()

In [None]:
data_num_corr = data_num.corr()['neonatal_sepsis'][:-1] # -1 because the latest row is SalePrice
golden_list = data_num_corr[abs(data_num_corr) > 0.5].sort_values(ascending=False)
print("There is {} strongly correlated values with neonatal_sepsis:\n{}".format(len(golden_list), golden_list))

In [None]:
target_var = 'neonatal_sepsis'
corr = data.corr()[target_var]
correlation = (corr[np.argsort(corr, axis=0)[::-1]])
plt.figure(figsize=(10, 10))
correlation.plot(kind="barh", fontsize=10, color = 'r')
plt.title('Positive and Negative correlation with Target: Neonatal Sepsis')
plt.tight_layout()
plt.show()


In [None]:
#scatterplot of numeric variables with target variable
sns.set(style="ticks", color_codes=True, font_scale=0.8)
features_mean = ['age_days', 'gest_age', 'duration_of_labour', 'duration_of_ROM', 'weight', 'neonatal_sepsis']
sns.pairplot(data[features_mean], hue='neonatal_sepsis', markers=["o", "s"], palette='husl', height=1.0)
plt.show()

In [None]:
#scatterplot of numeric variables with target variable
#sns.set(style="ticks", color_codes=True, font_scale=0.8)
#features_mean_2 = ['temperature', 'respiratory_rate', 'heart_rate', 'wbc', 'neut_count', 'neonatal_sepsis']
#sns.pairplot(data[features_mean_2], hue='neonatal_sepsis', markers=["o", "s"], palette='husl', height=1.0)
#plt.show()
#'lym_count',

In [None]:
#scatterplot of numeric variables with target variable
#sns.set(style="ticks", color_codes=True, font_scale=0.8)
#features_mean_2 = ['mon_count', 'eos_count', 'bas_count', 'rbc', 'platelet_count', 'neonatal_sepsis']
#sns.pairplot(data[features_mean_2], hue='neonatal_sepsis', markers=["o", "s"], palette='husl', height=1.0)
#plt.show()

In [None]:
#calculate z-score of each numeric variable
# normalize all ... except for the target itself!
cols = ['age_days', 'gest_age', 'duration_of_labour', 'duration_of_ROM', 'weight', 'temperature', 'respiratory_rate',
        'heart_rate', 'wbc', 'neut_count', 'platelet_count']
#'lym_count', 'mon_count', 'eos_count', 'bas_count', 'rbc',
data[cols] = preprocessing.scale(data[cols]) # scale between 0 and 1.
data[cols]
#'rbc', 

In [None]:
#Create lists for categorical variables(columns) to generate the dummy variables,
col = ['gender', 'maternal_febrile', 'fever_during_labour', 'abnormal_vag_discharge', 'antibiotic_given', 'place_of_delivery',
       'mode_of_delivery', 'rupture_of_mem', 'foul_smelling_liquor', 'fever', 'cold_body', 'poor_feeding', 'crying_excessively',
       'weak_cry', 'lethargy', 'respiratory_difficulty', 'respiratory_distress', 'tachypnoea', 'apnoea', 'crp_count',
       'blood_culture']
data = pd.get_dummies(data, columns=col)
print (data.columns)

In [None]:
names = data.columns.values
rfc = RandomForestClassifier()
Y = data['neonatal_sepsis']
X = data.drop('neonatal_sepsis', 1)
rfc.fit(X, Y)
    
# Print the results
importance = rfc.feature_importances_
sorted_importances = np.argsort(importance)
features = np.arange(len(names)-1)
print("Features sorted by their score:")
print(sorted(zip(map(lambda x: round(x, 4), rfc.feature_importances_), names), reverse=True))

In [None]:
# Plot the data
fig = plt.figure(figsize=(10, 10))
ax = fig.add_subplot(111)
ax.barh(features, importance[sorted_importances], align='center', color = 'green', alpha = 0.5)
plt.yticks(features, names[sorted_importances])
plt.xlabel("Relative Importance")
plt.title("Variable Importance")
plt.tight_layout()
plt.show()


In [None]:
print (data.neonatal_sepsis.value_counts())

In [None]:
y = data['neonatal_sepsis']
x = data.drop('neonatal_sepsis', 1)
oversampler = SMOTE(random_state=15)
X_bal, y_bal = oversampler.fit_sample(x, y)
print(X_bal, y_bal)

In [None]:
print(y_bal.value_counts())

In [None]:
counter = y_bal
print(counter)

In [None]:
#support vector machine linear model
parameters = {'kernel': ['linear'], 'C':[1,2,3,4,5,6,7,8,9,10], 'gamma':[0.01,0.02,0.03,0.04,0.05,0.10,0.2,0.3,0.4,0.5]}
seed = 7
model_svml = svm.SVC()
grid = GridSearchCV(model_svml , parameters)
skf = StratifiedKFold(n_splits=10, random_state=seed, shuffle=True)
skf.get_n_splits(X_bal, y_bal)
#ROC curve
mean_tpr = 0.0
mean_fpr = np.linspace(0, 1, 100)
colors = cycle(['cyan', 'indigo', 'seagreen', 'yellow', 'blue', 'darkorange', 'red', 'brown', 'green', 'grey'])
lw = 2
i = 0
auc_svml = {}
for (train_index, test_index), color in zip (skf.split(X_bal, y_bal), colors):
    X_trainN, X_testN = X_bal.iloc[train_index], X_bal.iloc[test_index]
    y_trainN, y_testN = y_bal[train_index], y_bal[test_index]
    grid.fit(X_trainN, y_trainN)
    pred_svml = grid.predict(X_testN)
    # Compute ROC curve and area the curve
    fpr, tpr, thresholds = roc_curve(y_bal[test_index], pred_svml)
    mean_tpr += np.interp(mean_fpr, fpr, tpr)
    mean_tpr[0] = 0.0
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, lw=lw, color=color, label='ROC fold %d (area = %0.2f)' % (i + 1, roc_auc))
    auc_svml[i] = roc_auc
    i += 1
print(auc_svml)
plt.plot([0, 1], [0, 1], linestyle='--', lw=lw, color='k')
mean_tpr /= skf.get_n_splits(X_bal, y_bal)
mean_tpr[-1] = 1.0
mean_auc_svml = auc(mean_fpr, mean_tpr)
plt.plot(mean_fpr, mean_tpr, color='g', linestyle='--', label='Mean ROC (area = %0.2f)' % mean_auc_svml, lw=lw)
print('Average SVM Linear classifer accuracy = %0.2f' % (mean_auc_svml*100.0))
plt.xlim([-0.05, 1.05])
plt.ylim([-0.05, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve: Support Vector Machine - Linear')
plt.legend(loc="lower right")
plt.show()

In [None]:
# Creating the confusion matrix
cm_l = metrics.confusion_matrix(y_bal[test_index],pred_svml)
# Assigning columns names
cm_svml = pd.DataFrame(cm_l, 
            columns = ['Predicted Negative', 'Predicted Positive'],
            index = ['Actual Negative', 'Actual Positive'])
# Showing the confusion matrix
cm_svml

In [None]:
# Creating a function to report confusion metrics
# save confusion matrix and slice into four pieces
TP = 36
TN = 35
FP = 3
FN = 1
print('True Positives:', TP)
print('True Negatives:', TN)
print('False Positives:', FP)
print('False Negatives:', FN)
    
# calculate accuracy
conf_accuracy = (float (TP+TN) / float(TP + TN + FP + FN))
print('Accuracy: ', conf_accuracy) 
    
# calculate mis-classification
conf_misclassification = 1- conf_accuracy
print('Mis-Classification: ', conf_misclassification)
    
# calculate the sensitivity
conf_sensitivity = (TP / float(TP + FN))
print('Sensitivity: ', conf_sensitivity) 
# calculate the specificity
conf_specificity = (TN / float(TN + FP))
print('Specificity: ', conf_specificity) 
    
# calculate ppv
conf_ppv = (TP / float(TP + FP))
print('PPV: ', conf_ppv)
# calculate npv
conf_npv = (TN / float(TN + FN))
print('NPV: ', conf_npv)

In [None]:
# store the predicted probabilities for class 1 (septic)
parameters = {'kernel': ['linear'], 'C':[1,2,3,4,5,6,7,8,9,10], 'gamma':[0.01,0.02,0.03,0.04,0.05,0.10,0.2,0.3,0.4,0.5]}
model_svml2 = svm.SVC(probability=True)
grid = GridSearchCV(model_svml2 , parameters)
grid.fit(X_trainN, y_trainN)
y_pred_prob = grid.predict_proba(X_testN)[:, 1]

In [None]:
# predict sepsis if the predicted probability is greater than 0.11
from sklearn.preprocessing import binarize
threshold = 0.11
y_pred_classl = binarize([y_pred_prob], threshold)[0]

In [None]:
# Creating the confusion matrix
cm_l1 = metrics.confusion_matrix(y_bal[test_index],y_pred_classl)
# Assigning columns names
cm_svml1 = pd.DataFrame(cm_l1, 
            columns = ['Predicted Negative', 'Predicted Positive'],
            index = ['Actual Negative', 'Actual Positive'])
# Showing the confusion matrix
cm_svml1

In [None]:
# Creating a function to report confusion metrics
# save confusion matrix and slice into four pieces
TP = 36
TN = 30
FP = 8
FN = 1
print('True Positives:', TP)
print('True Negatives:', TN)
print('False Positives:', FP)
print('False Negatives:', FN)
    
# calculate accuracy
conf_accuracy = (float (TP+TN) / float(TP + TN + FP + FN))
print('Accuracy: ', conf_accuracy) 
    
# calculate mis-classification
conf_misclassification = 1- conf_accuracy
print('Mis-Classification: ', conf_misclassification)
    
# calculate the sensitivity
conf_sensitivity = (TP / float(TP + FN))
print('Sensitivity: ', conf_sensitivity) 
# calculate the specificity
conf_specificity = (TN / float(TN + FP))
print('Specificity: ', conf_specificity) 
    
# calculate ppv
conf_ppv = (TP / float(TP + FP))
print('PPV: ', conf_ppv)
# calculate npv
conf_npv = (TN / float(TN + FN))
print('NPV: ', conf_npv)

In [None]:
import numpy as np
from mlxtend.evaluate import mcnemar_table

# The correct target (class) labels
y_target = y_testN

# Class labels predicted by model 1
y_model1 = pred_svml

# Class labels predicted by model 2
y_model2 = y_pred_classl

tb = mcnemar_table(y_target=y_target, 
                   y_model1=y_model1, 
                   y_model2=y_model2)

print(tb)

In [None]:
tb_svmlr = np.array([[66, 0],
                     [5, 4]])

In [None]:
from mlxtend.evaluate import mcnemar

chi2, p = mcnemar(ary=tb_svmlr, exact=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
from mlxtend.evaluate import mcnemar

chi2, p = mcnemar(ary=tb_svmlr, corrected=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
tb_svml = np.array([[29, 9],
                    [1, 36]])

In [None]:
#importing mcnemar_table and mcnemar
from mlxtend.evaluate import mcnemar_table
from mlxtend.evaluate import mcnemar
chi2, p = mcnemar(ary=tb_svml, exact=False, corrected=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
chi2, p = mcnemar(ary=tb_svml, exact=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
# predict sepsis if the predicted probability is greater than 0.88
from sklearn.preprocessing import binarize
threshold = 0.88
y_pred_classl2 = binarize([y_pred_prob], threshold)[0]

In [None]:
# Creating the confusion matrix
cm_l2 = metrics.confusion_matrix(y_bal[test_index],y_pred_classl2)
# Assigning columns names
cm_svml2 = pd.DataFrame(cm_l2, 
            columns = ['Predicted Negative', 'Predicted Positive'],
            index = ['Actual Negative', 'Actual Positive'])
# Showing the confusion matrix
cm_svml2

In [None]:
# Creating a function to report confusion metrics
# save confusion matrix and slice into four pieces
TP = 33
TN = 37
FP = 1
FN = 4
print('True Positives:', TP)
print('True Negatives:', TN)
print('False Positives:', FP)
print('False Negatives:', FN)
    
# calculate accuracy
conf_accuracy = (float (TP+TN) / float(TP + TN + FP + FN))
print('Accuracy: ', conf_accuracy) 
    
# calculate mis-classification
conf_misclassification = 1- conf_accuracy
print('Mis-Classification: ', conf_misclassification)
    
# calculate the sensitivity
conf_sensitivity = (TP / float(TP + FN))
print('Sensitivity: ', conf_sensitivity) 
# calculate the specificity
conf_specificity = (TN / float(TN + FP))
print('Specificity: ', conf_specificity) 
    
# calculate ppv
conf_ppv = (TP / float(TP + FP))
print('PPV: ', conf_ppv)
# calculate npv
conf_npv = (TN / float(TN + FN))
print('NPV: ', conf_npv)

In [None]:
import numpy as np
from mlxtend.evaluate import mcnemar_table

# The correct target (class) labels
y_target = y_testN

# Class labels predicted by model 1
y_model1 = pred_svml

# Class labels predicted by model 2
y_model2 = y_pred_classl2

tb = mcnemar_table(y_target=y_target, 
                   y_model1=y_model1, 
                   y_model2=y_model2)

print(tb)

In [None]:
tb_svmlr1 = np.array([[68, 2],
                      [3, 2]])

In [None]:
chi2, p = mcnemar(ary=tb_svmlr1, exact=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
tb_svml1 = np.array([[37, 1],
                     [4, 33]])

In [None]:
chi2, p = mcnemar(ary=tb_svml1, exact=False, corrected=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
chi2, p = mcnemar(ary=tb_svml1, exact=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
#support vector machine radial basis function model
parameters = {'kernel': ['rbf'], 'C': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 'gamma': 
              [0.01, 0.02, 0.03, 0.04, 0.05, 0.10, 0.2, 0.3, 0.4, 0.5]}
seed = 7
model_svmrbf = svm.SVC()
grid = GridSearchCV(model_svmrbf, parameters)
skf = StratifiedKFold(n_splits=10, random_state=seed, shuffle=True)
skf.get_n_splits(X_bal, y_bal)
mean_tpr = 0.0
mean_fpr = np.linspace(0, 1, 100)
colors = cycle(['cyan', 'indigo', 'seagreen', 'yellow', 'blue', 'darkorange', 'red', 'brown', 'green', 'grey'])
lw = 2
i = 0
auc_svmrbf = {}
for (train_index, test_index), color in zip (skf.split(X_bal, y_bal), colors):
    X_trainN, X_testN = X_bal.iloc[train_index], X_bal.iloc[test_index]
    y_trainN, y_testN = y_bal[train_index], y_bal[test_index]
    grid.fit(X_trainN, y_trainN)
    pred_svmrbf = grid.predict(X_testN)
    # Compute ROC curve and area the curve
    fpr, tpr, thresholds = roc_curve(y_bal[test_index], pred_svmrbf)
    mean_tpr += np.interp(mean_fpr, fpr, tpr)
    mean_tpr[0] = 0.0
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, lw=lw, color=color, label='ROC fold %d (area = %0.2f)' % (i + 1, roc_auc))
    auc_svmrbf[i] = roc_auc
    i += 1
print(auc_svmrbf)
plt.plot([0, 1], [0, 1], linestyle='--', lw=lw, color='k')
mean_tpr /= skf.get_n_splits(X_bal, y_bal)
mean_tpr[-1] = 1.0
mean_auc_svmrbf = auc(mean_fpr, mean_tpr)
plt.plot(mean_fpr, mean_tpr, color='g', linestyle='--', label='Mean ROC (area = %0.2f)' % mean_auc_svmrbf, lw=lw)
print('Average SVM Radial classifer accuracy = %0.2f' % (mean_auc_svmrbf*100.0))
plt.xlim([-0.05, 1.05])
plt.ylim([-0.05, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve: Support Vector Machine - Radial')
plt.legend(loc="lower right")
plt.show()

In [None]:
# Creating the confusion matrix
cm_rbf = metrics.confusion_matrix(y_bal[test_index],pred_svmrbf)
# Assigning columns names
cm_rbf = pd.DataFrame(cm_rbf, 
            columns = ['Predicted Negative', 'Predicted Positive'],
            index = ['Actual Negative', 'Actual Positive'])
# Showing the confusion matrix
cm_rbf

In [None]:
# Creating a function to report confusion metrics
# save confusion matrix and slice into four pieces
TP = 35
TN = 36
FP = 2
FN = 2
print('True Positives:', TP)
print('True Negatives:', TN)
print('False Positives:', FP)
print('False Negatives:', FN)
    
# calculate accuracy
conf_accuracy = (float (TP+TN) / float(TP + TN + FP + FN))
print('Accuracy: ', conf_accuracy) 
    
# calculate mis-classification
conf_misclassification = 1- conf_accuracy
print('Mis-Classification: ', conf_misclassification)
    
# calculate the sensitivity
conf_sensitivity = (TP / float(TP + FN))
print('Sensitivity: ', conf_sensitivity) 
# calculate the specificity
conf_specificity = (TN / float(TN + FP))
print('Specificity: ', conf_specificity) 
    
# calculate ppv
conf_ppv = (TP / float(TP + FP))
print('PPV: ', conf_ppv)
# calculate npv
conf_npv = (TN / float(TN + FN))
print('NPV: ', conf_npv)

In [None]:
# store the predicted probabilities for class 1 (septic)
parameters = {'kernel': ['rbf'], 'C': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 'gamma': 
              [0.01, 0.02, 0.03, 0.04, 0.05, 0.10, 0.2, 0.3, 0.4, 0.5]}
model_svmrbf2 = svm.SVC(probability=True)
grid = GridSearchCV(model_svmrbf2, parameters)
grid.fit(X_trainN, y_trainN)
y_pred_prob2 = grid.predict_proba(X_testN)[:, 1]

In [None]:
# predict sepsis if the predicted probability is greater than 0.11
from sklearn.preprocessing import binarize
threshold = 0.11
y_pred_class_rbfl = binarize([y_pred_prob2], threshold)[0]

In [None]:
# Creating the confusion matrix
cm_rbf1 = metrics.confusion_matrix(y_bal[test_index],y_pred_class_rbfl)
# Assigning columns names
cm_rbf1 = pd.DataFrame(cm_rbf1, 
            columns = ['Predicted Negative', 'Predicted Positive'],
            index = ['Actual Negative', 'Actual Positive'])
# Showing the confusion matrix
cm_rbf1

In [None]:
# Creating a function to report confusion metrics
# save confusion matrix and slice into four pieces
TP = 37
TN = 36
FP = 2
FN = 0
print('True Positives:', TP)
print('True Negatives:', TN)
print('False Positives:', FP)
print('False Negatives:', FN)
    
# calculate accuracy
conf_accuracy = (float (TP+TN) / float(TP + TN + FP + FN))
print('Accuracy: ', conf_accuracy) 
    
# calculate mis-classification
conf_misclassification = 1- conf_accuracy
print('Mis-Classification: ', conf_misclassification)
    
# calculate the sensitivity
conf_sensitivity = (TP / float(TP + FN))
print('Sensitivity: ', conf_sensitivity) 
# calculate the specificity
conf_specificity = (TN / float(TN + FP))
print('Specificity: ', conf_specificity) 
    
# calculate ppv
conf_ppv = (TP / float(TP + FP))
print('PPV: ', conf_ppv)
# calculate npv
conf_npv = (TN / float(TN + FN))
print('NPV: ', conf_npv)

In [None]:
import numpy as np
from mlxtend.evaluate import mcnemar_table

# The correct target (class) labels
y_target = y_testN

# Class labels predicted by model 1
y_model1 = pred_svmrbf

# Class labels predicted by model 2
y_model2 = y_pred_class_rbfl 

tb = mcnemar_table(y_target=y_target, 
                   y_model1=y_model1, 
                   y_model2=y_model2)

print(tb)

In [None]:
tb_rbfn = np.array([[71, 2],
                    [0, 2]])

In [None]:
chi2, p = mcnemar(ary=tb_rbfn, exact=False, corrected=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
tb_rbf = np.array([[35, 3],
                   [0, 37]])

In [None]:
chi2, p = mcnemar(ary=tb_rbf, exact=False, corrected=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
chi2, p = mcnemar(ary=tb_rbf, exact=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
# predict sepsis if the predicted probability is greater than 0.88
from sklearn.preprocessing import binarize
threshold = 0.88
y_pred_class_rbf2 = binarize([y_pred_prob2], threshold)[0]

In [None]:
# Creating the confusion matrix
cm_rbf2 = metrics.confusion_matrix(y_bal[test_index],y_pred_class_rbf2)
# Assigning columns names
cm_rbf2 = pd.DataFrame(cm_rbf2, 
            columns = ['Predicted Negative', 'Predicted Positive'],
            index = ['Actual Negative', 'Actual Positive'])
# Showing the confusion matrix
cm_rbf2

In [None]:
# Creating a function to report confusion metrics
# save confusion matrix and slice into four pieces
TP = 35
TN = 36
FP = 2
FN = 2
print('True Positives:', TP)
print('True Negatives:', TN)
print('False Positives:', FP)
print('False Negatives:', FN)
    
# calculate accuracy
conf_accuracy = (float (TP+TN) / float(TP + TN + FP + FN))
print('Accuracy: ', conf_accuracy) 
    
# calculate mis-classification
conf_misclassification = 1- conf_accuracy
print('Mis-Classification: ', conf_misclassification)
    
# calculate the sensitivity
conf_sensitivity = (TP / float(TP + FN))
print('Sensitivity: ', conf_sensitivity) 
# calculate the specificity
conf_specificity = (TN / float(TN + FP))
print('Specificity: ', conf_specificity) 
    
# calculate ppv
conf_ppv = (TP / float(TP + FP))
print('PPV: ', conf_ppv)
# calculate npv
conf_npv = (TN / float(TN + FN))
print('NPV: ', conf_npv)

In [None]:
import numpy as np
from mlxtend.evaluate import mcnemar_table

# The correct target (class) labels
y_target = y_testN

# Class labels predicted by model 1
y_model1 = pred_svmrbf

# Class labels predicted by model 2
y_model2 = y_pred_class_rbf2 

tb = mcnemar_table(y_target=y_target, 
                   y_model1=y_model1, 
                   y_model2=y_model2)

print(tb)

In [None]:
tb_rbfn2 = np.array([[71, 0],
                     [0, 4]])

In [None]:
chi2, p = mcnemar(ary=tb_rbfn2, exact=False, corrected=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
chi2, p = mcnemar(ary=tb_rbfn2, exact=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
tb_rbf2 = np.array([[36, 2],
                    [2, 35]])

In [None]:
chi2, p = mcnemar(ary=tb_rbf2, exact=False, corrected=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
chi2, p = mcnemar(ary=tb_rbf2, exact=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
#support vector machine polynomial model
parameters = {'kernel': ['poly'], 'C': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 'gamma': 
              [0.01, 0.02, 0.03, 0.04, 0.05, 0.10, 0.2, 0.3, 0.4, 0.5]}
seed = 7
model_svmpoly = svm.SVC()
grid = GridSearchCV(model_svmpoly, parameters)
skf = StratifiedKFold(n_splits=10, random_state=seed, shuffle=True)
skf.get_n_splits(X_bal, y_bal)
mean_tpr = 0.0
mean_fpr = np.linspace(0, 1, 100)
colors = cycle(['cyan', 'indigo', 'seagreen', 'yellow', 'blue', 'darkorange', 'red', 'brown', 'green', 'grey'])
lw = 2
i = 0
auc_svmpoly = {}
for (train_index, test_index), color in zip (skf.split(X_bal, y_bal), colors):
    X_trainN, X_testN = X_bal.iloc[train_index], X_bal.iloc[test_index]
    y_trainN, y_testN = y_bal[train_index], y_bal[test_index]
    grid.fit(X_trainN, y_trainN)
    pred_svmpoly = grid.predict(X_testN)
    # Compute ROC curve and area the curve
    fpr, tpr, thresholds = roc_curve(y_bal[test_index], pred_svmpoly)
    mean_tpr += np.interp(mean_fpr, fpr, tpr)
    mean_tpr[0] = 0.0
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, lw=lw, color=color, label='ROC fold %d (area = %0.2f)' % (i + 1, roc_auc))
    auc_svmpoly[i] = roc_auc
    i += 1
print(auc_svmpoly)
plt.plot([0, 1], [0, 1], linestyle='--', lw=lw, color='k')
mean_tpr /= skf.get_n_splits(X_bal, y_bal)
mean_tpr[-1] = 1.0
mean_auc_svmpoly = auc(mean_fpr, mean_tpr)
plt.plot(mean_fpr, mean_tpr, color='g', linestyle='--', label='Mean ROC (area = %0.2f)' % mean_auc_svmpoly, lw=lw)
print('Average SVM Poly classifer accuracy = %0.2f' % (mean_auc_svmpoly*100.0))
plt.xlim([-0.05, 1.05])
plt.ylim([-0.05, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve: Support Vector Machine - Poly')
plt.legend(loc="lower right")
plt.show()

In [None]:
# Creating the confusion matrix
cm_poly = metrics.confusion_matrix(y_bal[test_index],pred_svmpoly)
# Assigning columns names
cm_df_poly = pd.DataFrame(cm_poly, 
            columns = ['Predicted Negative', 'Predicted Positive'],
            index = ['Actual Negative', 'Actual Positive'])
# Showing the confusion matrix
cm_df_poly

In [None]:
# Creating a function to report confusion metrics
# save confusion matrix and slice into four pieces
TP = 35
TN = 37
FP = 1
FN = 2
print('True Positives:', TP)
print('True Negatives:', TN)
print('False Positives:', FP)
print('False Negatives:', FN)
    
# calculate accuracy
conf_accuracy = (float (TP+TN) / float(TP + TN + FP + FN))
print('Accuracy: ', conf_accuracy) 
    
# calculate mis-classification
conf_misclassification = 1- conf_accuracy
print('Mis-Classification: ', conf_misclassification)
    
# calculate the sensitivity
conf_sensitivity = (TP / float(TP + FN))
print('Sensitivity: ', conf_sensitivity) 
# calculate the specificity
conf_specificity = (TN / float(TN + FP))
print('Specificity: ', conf_specificity) 
    
# calculate ppv
conf_ppv = (TP / float(TP + FP))
print('PPV: ', conf_ppv)
# calculate npv
conf_npv = (TN / float(TN + FN))
print('NPV: ', conf_npv)

In [None]:
# store the predicted probabilities for class 1 (septic)
parameters = {'kernel': ['poly'], 'C': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 'gamma': 
              [0.01, 0.02, 0.03, 0.04, 0.05, 0.10, 0.2, 0.3, 0.4, 0.5]}
model_svmpoly2 = svm.SVC(probability=True)
grid = GridSearchCV(model_svmpoly2, parameters)
grid.fit(X_trainN, y_trainN)
y_pred_prob3 = grid.predict_proba(X_testN)[:, 1]

In [None]:
# predict sepsis if the predicted probability is greater than 0.11
from sklearn.preprocessing import binarize
threshold = 0.11
y_pred_class_poly1 = binarize([y_pred_prob3], threshold)[0]

In [None]:
# Creating the confusion matrix
cm_poly1 = metrics.confusion_matrix(y_bal[test_index],y_pred_class_poly1)
# Assigning columns names
cm_df_poly1 = pd.DataFrame(cm_poly1, 
            columns = ['Predicted Negative', 'Predicted Positive'],
            index = ['Actual Negative', 'Actual Positive'])
# Showing the confusion matrix
cm_df_poly1

In [None]:
# Creating a function to report confusion metrics
# save confusion matrix and slice into four pieces
TP = 37
TN = 35
FP = 3
FN = 0
print('True Positives:', TP)
print('True Negatives:', TN)
print('False Positives:', FP)
print('False Negatives:', FN)
    
# calculate accuracy
conf_accuracy = (float (TP+TN) / float(TP + TN + FP + FN))
print('Accuracy: ', conf_accuracy) 
    
# calculate mis-classification
conf_misclassification = 1- conf_accuracy
print('Mis-Classification: ', conf_misclassification)
    
# calculate the sensitivity
conf_sensitivity = (TP / float(TP + FN))
print('Sensitivity: ', conf_sensitivity) 
# calculate the specificity
conf_specificity = (TN / float(TN + FP))
print('Specificity: ', conf_specificity) 
    
# calculate ppv
conf_ppv = (TP / float(TP + FP))
print('PPV: ', conf_ppv)
# calculate npv
conf_npv = (TN / float(TN + FN))
print('NPV: ', conf_npv)

In [None]:
import numpy as np
from mlxtend.evaluate import mcnemar_table

# The correct target (class) labels
y_target = y_testN

# Class labels predicted by model 1
y_model1 = pred_svmpoly

# Class labels predicted by model 2
y_model2 = y_pred_class_poly1

tb = mcnemar_table(y_target=y_target, 
                   y_model1=y_model1, 
                   y_model2=y_model2)

print(tb)

In [None]:
tb_polyn = np.array([[70, 2],
                    [2, 1]])

In [None]:
chi2, p = mcnemar(ary=tb_polyn, exact=False, corrected=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
tb_poly = np.array([[35, 3],
                   [0, 37]])

In [None]:
chi2, p = mcnemar(ary=tb_poly, exact=False, corrected=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
chi2, p = mcnemar(ary=tb_poly, exact=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
# predict sepsis if the predicted probability is greater than 0.88
from sklearn.preprocessing import binarize
threshold = 0.88
y_pred_class_poly2 = binarize([y_pred_prob3], threshold)[0]

In [None]:
# Creating the confusion matrix
cm_poly2 = metrics.confusion_matrix(y_bal[test_index],y_pred_class_poly2)
# Assigning columns names
cm_df_poly2 = pd.DataFrame(cm_poly2, 
            columns = ['Predicted Negative', 'Predicted Positive'],
            index = ['Actual Negative', 'Actual Positive'])
# Showing the confusion matrix
cm_df_poly2

In [None]:
# Creating a function to report confusion metrics
# save confusion matrix and slice into four pieces
TP = 35
TN = 37
FP = 1
FN = 2
print('True Positives:', TP)
print('True Negatives:', TN)
print('False Positives:', FP)
print('False Negatives:', FN)
    
# calculate accuracy
conf_accuracy = (float (TP+TN) / float(TP + TN + FP + FN))
print('Accuracy: ', conf_accuracy) 
    
# calculate mis-classification
conf_misclassification = 1- conf_accuracy
print('Mis-Classification: ', conf_misclassification)
    
# calculate the sensitivity
conf_sensitivity = (TP / float(TP + FN))
print('Sensitivity: ', conf_sensitivity) 
# calculate the specificity
conf_specificity = (TN / float(TN + FP))
print('Specificity: ', conf_specificity) 
    
# calculate ppv
conf_ppv = (TP / float(TP + FP))
print('PPV: ', conf_ppv)
# calculate npv
conf_npv = (TN / float(TN + FN))
print('NPV: ', conf_npv)

In [None]:
import numpy as np
from mlxtend.evaluate import mcnemar_table

# The correct target (class) labels
y_target = y_testN

# Class labels predicted by model 1
y_model1 = pred_svmpoly

# Class labels predicted by model 2
y_model2 = y_pred_class_poly2

tb = mcnemar_table(y_target=y_target, 
                   y_model1=y_model1, 
                   y_model2=y_model2)

print(tb)

In [None]:
tb_polyn2 = np.array([[72, 0],
                      [0, 3]])

In [None]:
chi2, p = mcnemar(ary=tb_polyn2, exact=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
tb_poly2 = np.array([[37, 1],
                     [2, 35]])

In [None]:
chi2, p = mcnemar(ary=tb_poly2, exact=False, corrected=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
chi2, p = mcnemar(ary=tb_poly2, exact=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
#logistic regression model
seed = 7
model_lr = LogisticRegression()
skf = StratifiedKFold(n_splits=10, random_state=seed, shuffle=True)
skf.get_n_splits(X_bal, y_bal)
mean_tpr = 0.0
mean_fpr = np.linspace(0, 1, 100)
colors = cycle(['cyan', 'indigo', 'seagreen', 'yellow', 'blue', 'darkorange', 'red', 'brown', 'green', 'grey'])
lw = 2
i = 0
auc_lr = {}
for (train_index, test_index), color in zip (skf.split(X_bal, y_bal), colors):
    X_trainN, X_testN = X_bal.iloc[train_index], X_bal.iloc[test_index]
    y_trainN, y_testN = y_bal[train_index], y_bal[test_index]
    model_lr.fit(X_trainN, y_trainN)
    pred_lr = model_lr.predict(X_testN)
    # Compute ROC curve and area the curve
    fpr, tpr, thresholds = roc_curve(y_bal[test_index], pred_lr)
    mean_tpr += np.interp(mean_fpr, fpr, tpr)
    mean_tpr[0] = 0.0
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, lw=lw, color=color, label='ROC fold %d (area = %0.2f)' % (i + 1, roc_auc))
    auc_lr[i] = roc_auc
    i += 1
print(auc_lr)
plt.plot([0, 1], [0, 1], linestyle='--', lw=lw, color='k')
mean_tpr /= skf.get_n_splits(X_bal, y_bal)
mean_tpr[-1] = 1.0
mean_auc_lr = auc(mean_fpr, mean_tpr)
plt.plot(mean_fpr, mean_tpr, color='g', linestyle='--', label='Mean ROC (area = %0.2f)' % mean_auc_lr, lw=lw)
print('Average LR classifer accuracy = %0.2f' % (mean_auc_lr*100.0))
plt.xlim([-0.05, 1.05])
plt.ylim([-0.05, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve: Logistic regression')
plt.legend(loc="lower right")
plt.show()

In [None]:
# Creating the confusion matrix
cm_lr = metrics.confusion_matrix(y_bal[test_index],pred_lr)
# Assigning columns names
cm_df_lr = pd.DataFrame(cm_lr, 
            columns = ['Predicted Negative', 'Predicted Positive'],
            index = ['Actual Negative', 'Actual Positive'])
# Showing the confusion matrix
cm_df_lr

In [None]:
# Creating a function to report confusion metrics
# save confusion matrix and slice into four pieces
TP = 33
TN = 35
FP = 3
FN = 4
print('True Positives:', TP)
print('True Negatives:', TN)
print('False Positives:', FP)
print('False Negatives:', FN)
    
# calculate accuracy
conf_accuracy = (float (TP+TN) / float(TP + TN + FP + FN))
print('Accuracy: ', conf_accuracy) 
    
# calculate mis-classification
conf_misclassification = 1- conf_accuracy
print('Mis-Classification: ', conf_misclassification)
    
# calculate the sensitivity
conf_sensitivity = (TP / float(TP + FN))
print('Sensitivity: ', conf_sensitivity) 
# calculate the specificity
conf_specificity = (TN / float(TN + FP))
print('Specificity: ', conf_specificity) 
    
# calculate ppv
conf_ppv = (TP / float(TP + FP))
print('PPV: ', conf_ppv)
# calculate npv
conf_npv = (TN / float(TN + FN))
print('NPV: ', conf_npv)

In [None]:
# store the predicted probabilities for class 1 (septic)
y_pred_prob4 = model_lr.predict_proba(X_testN)[:, 1]

In [None]:
# predict sepsis if the predicted probability is greater than 0.11
from sklearn.preprocessing import binarize
threshold = 0.11
y_pred_class_lr = binarize([y_pred_prob4], threshold)[0]

In [None]:
# Creating the confusion matrix
cm_lr1 = metrics.confusion_matrix(y_bal[test_index],y_pred_class_lr)
# Assigning columns names
cm_df_lr1 = pd.DataFrame(cm_lr1, 
            columns = ['Predicted Negative', 'Predicted Positive'],
            index = ['Actual Negative', 'Actual Positive'])
# Showing the confusion matrix
cm_df_lr1

In [None]:
# Creating a function to report confusion metrics
# save confusion matrix and slice into four pieces
TP = 36
TN = 33
FP = 5
FN = 1
print('True Positives:', TP)
print('True Negatives:', TN)
print('False Positives:', FP)
print('False Negatives:', FN)
    
# calculate accuracy
conf_accuracy = (float (TP+TN) / float(TP + TN + FP + FN))
print('Accuracy: ', conf_accuracy) 
    
# calculate mis-classification
conf_misclassification = 1- conf_accuracy
print('Mis-Classification: ', conf_misclassification)
    
# calculate the sensitivity
conf_sensitivity = (TP / float(TP + FN))
print('Sensitivity: ', conf_sensitivity) 
# calculate the specificity
conf_specificity = (TN / float(TN + FP))
print('Specificity: ', conf_specificity) 
    
# calculate ppv
conf_ppv = (TP / float(TP + FP))
print('PPV: ', conf_ppv)
# calculate npv
conf_npv = (TN / float(TN + FN))
print('NPV: ', conf_npv)

In [None]:
import numpy as np
from mlxtend.evaluate import mcnemar_table

# The correct target (class) labels
y_target = y_testN

# Class labels predicted by model 1
y_model1 = pred_lr

# Class labels predicted by model 2
y_model2 = y_pred_class_lr

tb = mcnemar_table(y_target=y_target, 
                   y_model1=y_model1, 
                   y_model2=y_model2)

print(tb)

In [None]:
tb_lrn = np.array([[66, 3],
                   [2, 4]])

In [None]:
chi2, p = mcnemar(ary=tb_lrn, exact=False, corrected=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
chi2, p = mcnemar(ary=tb_lrn, exact=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
tb_lr = np.array([[33, 5],
                  [1, 36]])

In [None]:
chi2, p = mcnemar(ary=tb_lr, exact=False, corrected=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
chi2, p = mcnemar(ary=tb_lr, exact=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
# predict sepsis if the predicted probability is greater than 0.89
from sklearn.preprocessing import binarize
threshold = 0.89
y_pred_class_lr2 = binarize([y_pred_prob4], threshold)[0]

In [None]:
# Creating the confusion matrix
cm_lr2 = metrics.confusion_matrix(y_bal[test_index],y_pred_class_lr2)
# Assigning columns names
cm_df_lr2 = pd.DataFrame(cm_lr2, 
            columns = ['Predicted Negative', 'Predicted Positive'],
            index = ['Actual Negative', 'Actual Positive'])
# Showing the confusion matrix
cm_df_lr2

In [None]:
# Creating a function to report confusion metrics
# save confusion matrix and slice into four pieces
TP = 31
TN = 36
FP = 2
FN = 6
print('True Positives:', TP)
print('True Negatives:', TN)
print('False Positives:', FP)
print('False Negatives:', FN)
    
# calculate accuracy
conf_accuracy = (float (TP+TN) / float(TP + TN + FP + FN))
print('Accuracy: ', conf_accuracy) 
    
# calculate mis-classification
conf_misclassification = 1- conf_accuracy
print('Mis-Classification: ', conf_misclassification)
    
# calculate the sensitivity
conf_sensitivity = (TP / float(TP + FN))
print('Sensitivity: ', conf_sensitivity) 
# calculate the specificity
conf_specificity = (TN / float(TN + FP))
print('Specificity: ', conf_specificity) 
    
# calculate ppv
conf_ppv = (TP / float(TP + FP))
print('PPV: ', conf_ppv)
# calculate npv
conf_npv = (TN / float(TN + FN))
print('NPV: ', conf_npv)

In [None]:
import numpy as np
from mlxtend.evaluate import mcnemar_table

# The correct target (class) labels
y_target = y_testN

# Class labels predicted by model 1
y_model1 = pred_lr

# Class labels predicted by model 2
y_model2 = y_pred_class_lr2

tb = mcnemar_table(y_target=y_target, 
                   y_model1=y_model1, 
                   y_model2=y_model2)

print(tb)

In [None]:
tb_lrn2 = np.array([[66, 1],
                    [2, 6]])

In [None]:
chi2, p = mcnemar(ary=tb_lrn2, exact=False, corrected=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
tb_lr2 = np.array([[36, 2],
                   [6, 31]])

In [None]:
chi2, p = mcnemar(ary=tb_lr2, exact=False, corrected=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
chi2, p = mcnemar(ary=tb_lr, exact=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
#K-nearest_neighbor model
seed = 7
model_neigh = KNeighborsClassifier(n_neighbors=3)
skf = StratifiedKFold(n_splits=10, random_state=seed, shuffle=True)
skf.get_n_splits(X_bal, y_bal)
mean_tpr = 0.0
mean_fpr = np.linspace(0, 1, 100)
colors = cycle(['cyan', 'indigo', 'seagreen', 'yellow', 'blue', 'darkorange', 'red', 'brown', 'green', 'grey'])
lw = 2
i = 0
auc_neigh = {}
for (train_index, test_index), color in zip (skf.split(X_bal, y_bal), colors):
    X_trainN, X_testN = X_bal.to_numpy()[train_index], X_bal.to_numpy()[test_index]
    y_trainN, y_testN = y_bal[train_index], y_bal[test_index]
    model_neigh.fit(X_trainN, y_trainN)
    pred_neigh = model_neigh.predict(X_testN)
    # Compute ROC curve and area the curve
    fpr, tpr, thresholds = roc_curve(y_bal[test_index], pred_neigh)
    mean_tpr += np.interp(mean_fpr, fpr, tpr)
    mean_tpr[0] = 0.0
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, lw=lw, color=color, label='ROC fold %d (area = %0.2f)' % (i + 1, roc_auc))
    auc_neigh[i] = roc_auc
    i += 1
print(auc_neigh)
plt.plot([0, 1], [0, 1], linestyle='--', lw=lw, color='k')
mean_tpr /= skf.get_n_splits(X_bal, y_bal)
mean_tpr[-1] = 1.0
mean_auc_neigh = auc(mean_fpr, mean_tpr)
plt.plot(mean_fpr, mean_tpr, color='g', linestyle='--', label='Mean ROC (area = %0.2f)' % mean_auc_neigh, lw=lw)
print('Average KNN classifer accuracy = %0.2f' % (mean_auc_neigh*100.0))
plt.xlim([-0.05, 1.05])
plt.ylim([-0.05, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve: K-nearest neighbor')
plt.legend(loc="lower right")
plt.show()

In [None]:
# Importing the metrics package from sklearn library
from sklearn import metrics
# Creating the confusion matrix
cm_neigh = metrics.confusion_matrix(y_bal[test_index],pred_neigh)
# Assigning columns names
cm_df_neigh = pd.DataFrame(cm_neigh, 
            columns = ['Predicted Negative', 'Predicted Positive'],
            index = ['Actual Negative', 'Actual Positive'])
# Showing the confusion matrix
cm_df_neigh

In [None]:
# Creating a function to report confusion metrics
# save confusion matrix and slice into four pieces
TP = 31
TN = 37
FP = 1
FN = 6
print('True Positives:', TP)
print('True Negatives:', TN)
print('False Positives:', FP)
print('False Negatives:', FN)
    
# calculate accuracy
conf_accuracy = (float (TP+TN) / float(TP + TN + FP + FN))
print('Accuracy: ', conf_accuracy) 
    
# calculate mis-classification
conf_misclassification = 1- conf_accuracy
print('Mis-Classification: ', conf_misclassification)
    
# calculate the sensitivity
conf_sensitivity = (TP / float(TP + FN))
print('Sensitivity: ', conf_sensitivity) 
# calculate the specificity
conf_specificity = (TN / float(TN + FP))
print('Specificity: ', conf_specificity) 
    
# calculate ppv
conf_ppv = (TP / float(TP + FP))
print('PPV: ', conf_ppv)
# calculate npv
conf_npv = (TN / float(TN + FN))
print('NPV: ', conf_npv)

In [None]:
# store the predicted probabilities for class 1 (septic)
y_pred_prob5 = model_neigh.predict_proba(X_testN)[:, 1]

In [None]:
# predict sepsis if the predicted probability is greater than 0.11
from sklearn.preprocessing import binarize
threshold = 0.11
y_pred_class_neigh = binarize([y_pred_prob5], threshold)[0]

In [None]:
# Creating the confusion matrix
cm_neigh1 = metrics.confusion_matrix(y_bal[test_index],y_pred_class_neigh)
# Assigning columns names
cm_df_neigh1 = pd.DataFrame(cm_neigh1, 
            columns = ['Predicted Negative', 'Predicted Positive'],
            index = ['Actual Negative', 'Actual Positive'])
# Showing the confusion matrix
cm_df_neigh1

In [None]:
# Creating a function to report confusion metrics
# save confusion matrix and slice into four pieces
TP = 34
TN = 36
FP = 2
FN = 3
print('True Positives:', TP)
print('True Negatives:', TN)
print('False Positives:', FP)
print('False Negatives:', FN)
    
# calculate accuracy
conf_accuracy = (float (TP+TN) / float(TP + TN + FP + FN))
print('Accuracy: ', conf_accuracy) 
    
# calculate mis-classification
conf_misclassification = 1- conf_accuracy
print('Mis-Classification: ', conf_misclassification)
    
# calculate the sensitivity
conf_sensitivity = (TP / float(TP + FN))
print('Sensitivity: ', conf_sensitivity) 
# calculate the specificity
conf_specificity = (TN / float(TN + FP))
print('Specificity: ', conf_specificity) 
    
# calculate ppv
conf_ppv = (TP / float(TP + FP))
print('PPV: ', conf_ppv)
# calculate npv
conf_npv = (TN / float(TN + FN))
print('NPV: ', conf_npv)

In [None]:
import numpy as np
from mlxtend.evaluate import mcnemar_table

# The correct target (class) labels
y_target = y_testN

# Class labels predicted by model 1
y_model1 = pred_neigh

# Class labels predicted by model 2
y_model2 = y_pred_class_neigh

tb = mcnemar_table(y_target=y_target, 
                   y_model1=y_model1, 
                   y_model2=y_model2)

print(tb)

In [None]:
tb_neighn = np.array([[67, 3],
                      [1, 4]])

In [None]:
chi2, p = mcnemar(ary=tb_neighn, exact=False, corrected=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
tb_neigh = np.array([[36, 2],
                     [3, 34]])

In [None]:
chi2, p = mcnemar(ary=tb_neigh, exact=False, corrected=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
chi2, p = mcnemar(ary=tb_neigh, exact=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
# predict sepsis if the predicted probability is greater than 0.89
from sklearn.preprocessing import binarize
threshold = 0.89
y_pred_class_neigh2 = binarize([y_pred_prob5], threshold)[0]

In [None]:
# Creating the confusion matrix
cm3 = metrics.confusion_matrix(y_bal[test_index],y_pred_class_neigh2)
# Assigning columns names
cm_df3 = pd.DataFrame(cm3, 
            columns = ['Predicted Negative', 'Predicted Positive'],
            index = ['Actual Negative', 'Actual Positive'])
# Showing the confusion matrix
cm_df3

In [None]:
# Creating a function to report confusion metrics
# save confusion matrix and slice into four pieces
TP = 25
TN = 38
FP = 0
FN = 12
print('True Positives:', TP)
print('True Negatives:', TN)
print('False Positives:', FP)
print('False Negatives:', FN)
    
# calculate accuracy
conf_accuracy = (float (TP+TN) / float(TP + TN + FP + FN))
print('Accuracy: ', conf_accuracy) 
    
# calculate mis-classification
conf_misclassification = 1- conf_accuracy
print('Mis-Classification: ', conf_misclassification)
    
# calculate the sensitivity
conf_sensitivity = (TP / float(TP + FN))
print('Sensitivity: ', conf_sensitivity) 
# calculate the specificity
conf_specificity = (TN / float(TN + FP))
print('Specificity: ', conf_specificity) 
    
# calculate ppv
conf_ppv = (TP / float(TP + FP))
print('PPV: ', conf_ppv)
# calculate npv
conf_npv = (TN / float(TN + FN))
print('NPV: ', conf_npv)

In [None]:
import numpy as np
from mlxtend.evaluate import mcnemar_table

# The correct target (class) labels
y_target = y_testN

# Class labels predicted by model 1
y_model1 = pred_neigh

# Class labels predicted by model 2
y_model2 = y_pred_class_neigh2

tb = mcnemar_table(y_target=y_target, 
                   y_model1=y_model1, 
                   y_model2=y_model2)

print(tb)

In [None]:
tb_neighn2 = np.array([[62, 1],
                      [6, 6]])

In [None]:
chi2, p = mcnemar(ary=tb_neighn2, exact=False, corrected=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
chi2, p = mcnemar(ary=tb_neighn2, exact=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
tb_neigh2 = np.array([[38, 0],
                      [12, 25]])

In [None]:
chi2, p = mcnemar(ary=tb_neigh2, exact=False, corrected=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
chi2, p = mcnemar(ary=tb_neigh2, exact=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
#Gaussian Naive Bayes (GaussianNB) model
seed = 7
model_NB = GaussianNB()
skf = StratifiedKFold(n_splits=10, random_state=seed, shuffle=True)
skf.get_n_splits(X_bal, y_bal)
mean_tpr = 0.0
mean_fpr = np.linspace(0, 1, 100)
colors = cycle(['cyan', 'indigo', 'seagreen', 'yellow', 'blue', 'darkorange', 'red', 'brown', 'green', 'grey'])
lw = 2
i = 0
auc_NB = {}
for (train_index, test_index), color in zip (skf.split(X_bal, y_bal), colors):
    X_trainN, X_testN = X_bal.iloc[train_index], X_bal.iloc[test_index]
    y_trainN, y_testN = y_bal[train_index], y_bal[test_index]
    model_NB.fit(X_trainN, y_trainN)
    pred_NB = model_NB.predict(X_testN)
    # Compute ROC curve and area the curve
    fpr, tpr, thresholds = roc_curve(y_bal[test_index], pred_NB)
    mean_tpr += np.interp(mean_fpr, fpr, tpr)
    mean_tpr[0] = 0.0
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, lw=lw, color=color, label='ROC fold %d (area = %0.2f)' % (i + 1, roc_auc))
    auc_NB[i] = roc_auc
    i += 1
print(auc_NB)
plt.plot([0, 1], [0, 1], linestyle='--', lw=lw, color='k')
mean_tpr /= skf.get_n_splits(X_bal, y_bal)
mean_tpr[-1] = 1.0
mean_auc_NB = auc(mean_fpr, mean_tpr)
plt.plot(mean_fpr, mean_tpr, color='g', linestyle='--', label='Mean ROC (area = %0.2f)' % mean_auc_NB, lw=lw)
print('Average GaussianNB classifer accuracy = %0.2f' % (mean_auc_NB*100.0))
plt.xlim([-0.05, 1.05])
plt.ylim([-0.05, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve: Gaussian Naive Bayes')
plt.legend(loc="lower right")
plt.show()

In [None]:
# Creating the confusion matrix
cm_NB = metrics.confusion_matrix(y_bal[test_index],pred_NB)
# Assigning columns names
cm_df_NB = pd.DataFrame(cm_NB, 
            columns = ['Predicted Negative', 'Predicted Positive'],
            index = ['Actual Negative', 'Actual Positive'])
# Showing the confusion matrix
cm_df_NB

In [None]:
# Creating a function to report confusion metrics
# save confusion matrix and slice into four pieces
TP = 35
TN = 36
FP = 2
FN = 2
print('True Positives:', TP)
print('True Negatives:', TN)
print('False Positives:', FP)
print('False Negatives:', FN)
    
# calculate accuracy
conf_accuracy = (float (TP+TN) / float(TP + TN + FP + FN))
print('Accuracy: ', conf_accuracy) 
    
# calculate mis-classification
conf_misclassification = 1- conf_accuracy
print('Mis-Classification: ', conf_misclassification)
    
# calculate the sensitivity
conf_sensitivity = (TP / float(TP + FN))
print('Sensitivity: ', conf_sensitivity) 
# calculate the specificity
conf_specificity = (TN / float(TN + FP))
print('Specificity: ', conf_specificity) 
    
# calculate ppv
conf_ppv = (TP / float(TP + FP))
print('PPV: ', conf_ppv)
# calculate npv
conf_npv = (TN / float(TN + FN))
print('NPV: ', conf_npv)

In [None]:
# store the predicted probabilities for class 1 (septic)
y_pred_prob6 = model_NB.predict_proba(X_testN)[:, 1]

In [None]:
# predict sepsis if the predicted probability is greater than 0.11
from sklearn.preprocessing import binarize
threshold = 0.11
y_pred_class_NB = binarize([y_pred_prob6], threshold)[0]

In [None]:
# Creating the confusion matrix
cm3 = metrics.confusion_matrix(y_bal[test_index],y_pred_class_NB)
# Assigning columns names
cm_df3 = pd.DataFrame(cm3, 
            columns = ['Predicted Negative', 'Predicted Positive'],
            index = ['Actual Negative', 'Actual Positive'])
# Showing the confusion matrix
cm_df3

In [None]:
# Creating a function to report confusion metrics
# save confusion matrix and slice into four pieces
TP = 35
TN = 36
FP = 2
FN = 2
print('True Positives:', TP)
print('True Negatives:', TN)
print('False Positives:', FP)
print('False Negatives:', FN)
    
# calculate accuracy
conf_accuracy = (float (TP+TN) / float(TP + TN + FP + FN))
print('Accuracy: ', conf_accuracy) 
    
# calculate mis-classification
conf_misclassification = 1- conf_accuracy
print('Mis-Classification: ', conf_misclassification)
    
# calculate the sensitivity
conf_sensitivity = (TP / float(TP + FN))
print('Sensitivity: ', conf_sensitivity) 
# calculate the specificity
conf_specificity = (TN / float(TN + FP))
print('Specificity: ', conf_specificity) 
    
# calculate ppv
conf_ppv = (TP / float(TP + FP))
print('PPV: ', conf_ppv)
# calculate npv
conf_npv = (TN / float(TN + FN))
print('NPV: ', conf_npv)

In [None]:
import numpy as np
from mlxtend.evaluate import mcnemar_table

# The correct target (class) labels
y_target = y_testN

# Class labels predicted by model 1
y_model1 = pred_NB

# Class labels predicted by model 2
y_model2 = y_pred_class_NB

tb = mcnemar_table(y_target=y_target, 
                   y_model1=y_model1, 
                   y_model2=y_model2)

print(tb)

In [None]:
tb_NBn = np.array([[71, 0],
                   [0, 4]])

In [None]:
chi2, p = mcnemar(ary=tb_NBn, exact=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
tb_NB = np.array([[36, 2],
                  [2, 35]])

In [None]:
chi2, p = mcnemar(ary=tb_NB, exact=False, corrected=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
chi2, p = mcnemar(ary=tb_NB, exact=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
# predict sepsis if the predicted probability is greater than 0.89
from sklearn.preprocessing import binarize
threshold = 0.89
y_pred_class_NB2 = binarize([y_pred_prob6], threshold)[0]

In [None]:
# Creating the confusion matrix
cm3 = metrics.confusion_matrix(y_bal[test_index],y_pred_class_NB2)
# Assigning columns names
cm_df3 = pd.DataFrame(cm3, 
            columns = ['Predicted Negative', 'Predicted Positive'],
            index = ['Actual Negative', 'Actual Positive'])
# Showing the confusion matrix
cm_df3

In [None]:
# Creating a function to report confusion metrics
# save confusion matrix and slice into four pieces
TP = 35
TN = 37
FP = 1
FN = 2
print('True Positives:', TP)
print('True Negatives:', TN)
print('False Positives:', FP)
print('False Negatives:', FN)
    
# calculate accuracy
conf_accuracy = (float (TP+TN) / float(TP + TN + FP + FN))
print('Accuracy: ', conf_accuracy) 
    
# calculate mis-classification
conf_misclassification = 1- conf_accuracy
print('Mis-Classification: ', conf_misclassification)
    
# calculate the sensitivity
conf_sensitivity = (TP / float(TP + FN))
print('Sensitivity: ', conf_sensitivity) 
# calculate the specificity
conf_specificity = (TN / float(TN + FP))
print('Specificity: ', conf_specificity) 
    
# calculate ppv
conf_ppv = (TP / float(TP + FP))
print('PPV: ', conf_ppv)
# calculate npv
conf_npv = (TN / float(TN + FN))
print('NPV: ', conf_npv)

In [None]:
import numpy as np
from mlxtend.evaluate import mcnemar_table

# The correct target (class) labels
y_target = y_testN

# Class labels predicted by model 1
y_model1 = pred_NB

# Class labels predicted by model 2
y_model2 = y_pred_class_NB2

tb = mcnemar_table(y_target=y_target, 
                   y_model1=y_model1, 
                   y_model2=y_model2)

print(tb)

In [None]:
tb_NBn2 = np.array([[71, 1],
                    [0, 3]])

In [None]:
chi2, p = mcnemar(ary=tb_NBn2, exact=False, corrected=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
chi2, p = mcnemar(ary=tb_NB2, exact=False, corrected=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
chi2, p = mcnemar(ary=tb_NB2, exact=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
#Decision Tree model
seed = 7
model_DT = DecisionTreeClassifier()
skf = StratifiedKFold(n_splits=10, random_state=seed, shuffle=True)
skf.get_n_splits(X_bal, y_bal)
mean_tpr = 0.0
mean_fpr = np.linspace(0, 1, 100)
colors = cycle(['cyan', 'indigo', 'seagreen', 'yellow', 'blue', 'darkorange', 'red', 'brown', 'green', 'grey'])
lw = 2
i = 0
auc_DT = {}
for (train_index, test_index), color in zip (skf.split(X_bal, y_bal), colors):
    X_trainN, X_testN = X_bal.iloc[train_index], X_bal.iloc[test_index]
    y_trainN, y_testN = y_bal[train_index], y_bal[test_index]
    model_DT.fit(X_trainN, y_trainN)
    pred_DT = model_DT.predict(X_testN)
    # Compute ROC curve and area the curve
    fpr, tpr, thresholds = roc_curve(y_bal[test_index], pred_DT)
    mean_tpr += np.interp(mean_fpr, fpr, tpr)
    mean_tpr[0] = 0.0
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, lw=lw, color=color, label='ROC fold %d (area = %0.2f)' % (i + 1, roc_auc))
    auc_DT[i] = roc_auc
    i += 1
print(auc_DT)
plt.plot([0, 1], [0, 1], linestyle='--', lw=lw, color='k')
mean_tpr /= skf.get_n_splits(X_bal, y_bal)
mean_tpr[-1] = 1.0
mean_auc_DT = auc(mean_fpr, mean_tpr)
plt.plot(mean_fpr, mean_tpr, color='g', linestyle='--', label='Mean ROC (area = %0.2f)' % mean_auc_DT, lw=lw)
print('Average DT classifer accuracy = %0.2f' % (mean_auc_DT*100.0))
plt.xlim([-0.05, 1.05])
plt.ylim([-0.05, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve: Decision Tree')
plt.legend(loc="lower right")
plt.show()

In [None]:
# Creating the confusion matrix
cm_DT = metrics.confusion_matrix(y_bal[test_index],pred_DT)
# Assigning columns names
cm_df_DT = pd.DataFrame(cm_DT, 
            columns = ['Predicted Negative', 'Predicted Positive'],
            index = ['Actual Negative', 'Actual Positive'])
# Showing the confusion matrix
cm_df_DT

In [None]:
# Creating a function to report confusion metrics
# save confusion matrix and slice into four pieces
TP = 36
TN = 36
FP = 2
FN = 1
print('True Positives:', TP)
print('True Negatives:', TN)
print('False Positives:', FP)
print('False Negatives:', FN)
    
# calculate accuracy
conf_accuracy = (float (TP+TN) / float(TP + TN + FP + FN))
print('Accuracy: ', conf_accuracy) 
    
# calculate mis-classification
conf_misclassification = 1- conf_accuracy
print('Mis-Classification: ', conf_misclassification)
    
# calculate the sensitivity
conf_sensitivity = (TP / float(TP + FN))
print('Sensitivity: ', conf_sensitivity) 
# calculate the specificity
conf_specificity = (TN / float(TN + FP))
print('Specificity: ', conf_specificity) 
    
# calculate ppv
conf_ppv = (TP / float(TP + FP))
print('PPV: ', conf_ppv)
# calculate npv
conf_npv = (TN / float(TN + FN))
print('NPV: ', conf_npv)

In [None]:
# store the predicted probabilities for class 1 (septic)
y_pred_prob7 =  model_DT.predict_proba(X_testN)[:, 1]

In [None]:
# predict sepsis if the predicted probability is greater than 0.11
from sklearn.preprocessing import binarize
threshold = 0.11
y_pred_class_DT = binarize([y_pred_prob7], threshold)[0]

In [None]:
# Creating the confusion matrix
cm_DT2 = metrics.confusion_matrix(y_bal[test_index],y_pred_class_DT)
# Assigning columns names
cm_df_DT2 = pd.DataFrame(cm_DT2, 
            columns = ['Predicted Negative', 'Predicted Positive'],
            index = ['Actual Negative', 'Actual Positive'])
# Showing the confusion matrix
cm_df_DT

In [None]:
# Creating a function to report confusion metrics
# save confusion matrix and slice into four pieces
TP = 36
TN = 36
FP = 2
FN = 1
print('True Positives:', TP)
print('True Negatives:', TN)
print('False Positives:', FP)
print('False Negatives:', FN)
    
# calculate accuracy
conf_accuracy = (float (TP+TN) / float(TP + TN + FP + FN))
print('Accuracy: ', conf_accuracy) 
    
# calculate mis-classification
conf_misclassification = 1- conf_accuracy
print('Mis-Classification: ', conf_misclassification)
    
# calculate the sensitivity
conf_sensitivity = (TP / float(TP + FN))
print('Sensitivity: ', conf_sensitivity) 
# calculate the specificity
conf_specificity = (TN / float(TN + FP))
print('Specificity: ', conf_specificity) 
    
# calculate ppv
conf_ppv = (TP / float(TP + FP))
print('PPV: ', conf_ppv)
# calculate npv
conf_npv = (TN / float(TN + FN))
print('NPV: ', conf_npv)

In [None]:
import numpy as np
from mlxtend.evaluate import mcnemar_table

# The correct target (class) labels
y_target = y_testN

# Class labels predicted by model 1
y_model1 = pred_DT

# Class labels predicted by model 2
y_model2 = y_pred_class_DT

tb = mcnemar_table(y_target=y_target, 
                   y_model1=y_model1, 
                   y_model2=y_model2)

print(tb)

In [None]:
tb_DTn = np.array([[72, 0],
                   [0, 3]])

In [None]:
chi2, p = mcnemar(ary=tb_DTn, exact=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
tb_DT = np.array([[36, 2],
                  [2, 35]])

In [None]:
chi2, p = mcnemar(ary=tb_DT, exact=False, corrected=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
chi2, p = mcnemar(ary=tb_DT, exact=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
# predict sepsis if the predicted probability is greater than 0.89
from sklearn.preprocessing import binarize
threshold = 0.89
y_pred_class_DT2 = binarize([y_pred_prob7], threshold)[0]

In [None]:
# Creating the confusion matrix
cm_DT3 = metrics.confusion_matrix(y_bal[test_index],y_pred_class_DT2)
# Assigning columns names
cm_df_DT3 = pd.DataFrame(cm_DT3, 
            columns = ['Predicted Negative', 'Predicted Positive'],
            index = ['Actual Negative', 'Actual Positive'])
# Showing the confusion matrix
cm_df_DT3

In [None]:
# Creating a function to report confusion metrics
# save confusion matrix and slice into four pieces
TP = 35
TN = 36
FP = 2
FN = 1
print('True Positives:', TP)
print('True Negatives:', TN)
print('False Positives:', FP)
print('False Negatives:', FN)
    
# calculate accuracy
conf_accuracy = (float (TP+TN) / float(TP + TN + FP + FN))
print('Accuracy: ', conf_accuracy) 
    
# calculate mis-classification
conf_misclassification = 1- conf_accuracy
print('Mis-Classification: ', conf_misclassification)
    
# calculate the sensitivity
conf_sensitivity = (TP / float(TP + FN))
print('Sensitivity: ', conf_sensitivity) 
# calculate the specificity
conf_specificity = (TN / float(TN + FP))
print('Specificity: ', conf_specificity) 
    
# calculate ppv
conf_ppv = (TP / float(TP + FP))
print('PPV: ', conf_ppv)
# calculate npv
conf_npv = (TN / float(TN + FN))
print('NPV: ', conf_npv)

In [None]:
import numpy as np
from mlxtend.evaluate import mcnemar_table

# The correct target (class) labels
y_target = y_testN

# Class labels predicted by model 1
y_model1 = pred_DT

# Class labels predicted by model 2
y_model2 = y_pred_class_DT2

tb = mcnemar_table(y_target=y_target, 
                   y_model1=y_model1, 
                   y_model2=y_model2)

print(tb)

In [None]:
tb_DTn2 = np.array([[72, 0],
                    [0, 3]])

In [None]:
chi2, p = mcnemar(ary=tb_DTn2, exact=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
tb_DT2 = np.array([[36, 2],
                      [2, 35]])

In [None]:
chi2, p = mcnemar(ary=tb_DT2, exact=False, corrected=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
import numpy as np
from mlxtend.evaluate import mcnemar_table

# The correct target (class) labels
y_target = y_testN

# Class labels predicted by model 1
y_model1 =pred_svml

# Class labels predicted by model 2
y_model2 = pred_svmrbf

tb = mcnemar_table(y_target=y_target, 
                   y_model1=y_model1, 
                   y_model2=y_model2)

print(tb)

In [None]:
tb_model = np.array([[69, 2],
                     [2, 2]])

In [None]:
chi2, p = mcnemar(ary=tb_model, exact=False, corrected=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
import numpy as np
from mlxtend.evaluate import mcnemar_table

# The correct target (class) labels
y_target = y_testN

# Class labels predicted by model 1
y_model1 =pred_svml

# Class labels predicted by model 2
y_model2 = pred_svmpoly

tb = mcnemar_table(y_target=y_target, 
                   y_model1=y_model1, 
                   y_model2=y_model2)

print(tb)

In [None]:
tb_model1 = np.array([[70, 2],
                      [1, 2]])

In [None]:
chi2, p = mcnemar(ary=tb_model1, exact=False, corrected=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
import numpy as np
from mlxtend.evaluate import mcnemar_table

# The correct target (class) labels
y_target = y_testN

# Class labels predicted by model 1
y_model1 =pred_svml

# Class labels predicted by model 2
y_model2 = pred_lr

tb = mcnemar_table(y_target=y_target, 
                   y_model1=y_model1, 
                   y_model2=y_model2)

print(tb)

In [None]:
tb_model2 = np.array([[68, 0],
                      [3, 4]])

In [None]:
chi2, p = mcnemar(ary=tb_model2, exact=False, corrected=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
import numpy as np
from mlxtend.evaluate import mcnemar_table

# The correct target (class) labels
y_target = y_testN

# Class labels predicted by model 1
y_model1 =pred_svml

# Class labels predicted by model 2
y_model2 = pred_neigh

tb = mcnemar_table(y_target=y_target, 
                   y_model1=y_model1, 
                   y_model2=y_model2)

print(tb)

In [None]:
tb_model3 = np.array([[65, 3],
                      [6, 1]])

In [None]:
chi2, p = mcnemar(ary=tb_model3, exact=False, corrected=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
import numpy as np
from mlxtend.evaluate import mcnemar_table

# The correct target (class) labels
y_target = y_testN

# Class labels predicted by model 1
y_model1 =pred_svml

# Class labels predicted by model 2
y_model2 = pred_NB

tb = mcnemar_table(y_target=y_target, 
                   y_model1=y_model1, 
                   y_model2=y_model2)

print(tb)

In [None]:
tb_model4 = np.array([[69, 2],
                      [2, 2]])
chi2, p = mcnemar(ary=tb_model4, exact=False, corrected=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
import numpy as np
from mlxtend.evaluate import mcnemar_table

# The correct target (class) labels
y_target = y_testN

# Class labels predicted by model 1
y_model1 =pred_svml

# Class labels predicted by model 2
y_model2 = pred_DT

tb = mcnemar_table(y_target=y_target, 
                   y_model1=y_model1, 
                   y_model2=y_model2)

print(tb)

In [None]:
tb_model5 = np.array([[68, 4],
                      [3, 0]])
chi2, p = mcnemar(ary=tb_model5, exact=False, corrected=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
import numpy as np
from mlxtend.evaluate import mcnemar_table

# The correct target (class) labels
y_target = y_testN

# Class labels predicted by model 1
y_model1 = pred_svmrbf

# Class labels predicted by model 2
y_model2 = pred_svmpoly

tb = mcnemar_table(y_target=y_target, 
                   y_model1=y_model1, 
                   y_model2=y_model2)

print(tb)

In [None]:
tb_model6 = np.array([[71, 1],
                      [0, 3]])
chi2, p = mcnemar(ary=tb_model6, exact=False, corrected=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
import numpy as np
from mlxtend.evaluate import mcnemar_table

# The correct target (class) labels
y_target = y_testN

# Class labels predicted by model 1
y_model1 = pred_svmrbf

# Class labels predicted by model 2
y_model2 = pred_lr

tb = mcnemar_table(y_target=y_target, 
                   y_model1=y_model1, 
                   y_model2=y_model2)

print(tb)

In [None]:
tb_model7 = np.array([[66, 2],
                      [5, 2]])
chi2, p = mcnemar(ary=tb_model7, exact=False, corrected=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
import numpy as np
from mlxtend.evaluate import mcnemar_table

# The correct target (class) labels
y_target = y_testN

# Class labels predicted by model 1
y_model1 = pred_svmrbf

# Class labels predicted by model 2
y_model2 = pred_neigh

tb = mcnemar_table(y_target=y_target, 
                   y_model1=y_model1, 
                   y_model2=y_model2)

print(tb)

In [None]:
tb_model8 = np.array([[66, 2],
                      [5, 2]])
chi2, p = mcnemar(ary=tb_model8, exact=False, corrected=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
import numpy as np
from mlxtend.evaluate import mcnemar_table

# The correct target (class) labels
y_target = y_testN

# Class labels predicted by model 1
y_model1 = pred_svmrbf

# Class labels predicted by model 2
y_model2 = pred_NB

tb = mcnemar_table(y_target=y_target, 
                   y_model1=y_model1, 
                   y_model2=y_model2)

print(tb)

In [None]:
tb_model9 = np.array([[69, 2],
                      [2, 2]])
chi2, p = mcnemar(ary=tb_model9, exact=False, corrected=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
import numpy as np
from mlxtend.evaluate import mcnemar_table

# The correct target (class) labels
y_target = y_testN

# Class labels predicted by model 1
y_model1 = pred_svmrbf

# Class labels predicted by model 2
y_model2 = pred_DT

tb = mcnemar_table(y_target=y_target, 
                   y_model1=y_model1, 
                   y_model2=y_model2)

print(tb)

In [None]:
tb_model10 = np.array([[69, 3],
                       [2, 1]])
chi2, p = mcnemar(ary=tb_model10, exact=False, corrected=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
import numpy as np
from mlxtend.evaluate import mcnemar_table

# The correct target (class) labels
y_target = y_testN

# Class labels predicted by model 1
y_model1 = pred_svmpoly

# Class labels predicted by model 2
y_model2 = pred_lr

tb = mcnemar_table(y_target=y_target, 
                   y_model1=y_model1, 
                   y_model2=y_model2)

print(tb)

In [None]:
tb_model11 = np.array([[67, 1],
                       [5, 2]])
chi2, p = mcnemar(ary=tb_model11, exact=False, corrected=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
import numpy as np
from mlxtend.evaluate import mcnemar_table

# The correct target (class) labels
y_target = y_testN

# Class labels predicted by model 1
y_model1 = pred_svmpoly

# Class labels predicted by model 2
y_model2 = pred_neigh

tb = mcnemar_table(y_target=y_target, 
                   y_model1=y_model1, 
                   y_model2=y_model2)

print(tb)

In [None]:
tb_model12 = np.array([[67, 1],
                       [5, 2]])
chi2, p = mcnemar(ary=tb_model12, exact=False, corrected=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
import numpy as np
from mlxtend.evaluate import mcnemar_table

# The correct target (class) labels
y_target = y_testN

# Class labels predicted by model 1
y_model1 = pred_svmpoly

# Class labels predicted by model 2
y_model2 = pred_NB

tb = mcnemar_table(y_target=y_target, 
                   y_model1=y_model1, 
                   y_model2=y_model2)

print(tb)

In [None]:
tb_model13 = np.array([[70, 1],
                       [2, 2]])
chi2, p = mcnemar(ary=tb_model13, exact=False, corrected=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
import numpy as np
from mlxtend.evaluate import mcnemar_table

# The correct target (class) labels
y_target = y_testN

# Class labels predicted by model 1
y_model1 = pred_svmpoly

# Class labels predicted by model 2
y_model2 = pred_DT

tb = mcnemar_table(y_target=y_target, 
                   y_model1=y_model1, 
                   y_model2=y_model2)

print(tb)

In [None]:
tb_model14 = np.array([[69, 3],
                       [3, 0]])
chi2, p = mcnemar(ary=tb_model14, exact=False, corrected=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
import numpy as np
from mlxtend.evaluate import mcnemar_table

# The correct target (class) labels
y_target = y_testN

# Class labels predicted by model 1
y_model1 = pred_lr

# Class labels predicted by model 2
y_model2 = pred_neigh

tb = mcnemar_table(y_target=y_target, 
                   y_model1=y_model1, 
                   y_model2=y_model2)

print(tb)

In [None]:
tb_model15 = np.array([[62, 6],
                       [6, 1]])
chi2, p = mcnemar(ary=tb_model15, exact=False, corrected=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
import numpy as np
from mlxtend.evaluate import mcnemar_table

# The correct target (class) labels
y_target = y_testN

# Class labels predicted by model 1
y_model1 = pred_lr

# Class labels predicted by model 2
y_model2 = pred_NB

tb = mcnemar_table(y_target=y_target, 
                   y_model1=y_model1, 
                   y_model2=y_model2)

print(tb)

In [None]:
tb_model16 = np.array([[67, 4],
                       [1, 3]])
chi2, p = mcnemar(ary=tb_model16, exact=False, corrected=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
import numpy as np
from mlxtend.evaluate import mcnemar_table

# The correct target (class) labels
y_target = y_testN

# Class labels predicted by model 1
y_model1 = pred_lr

# Class labels predicted by model 2
y_model2 = pred_DT

tb = mcnemar_table(y_target=y_target, 
                   y_model1=y_model1, 
                   y_model2=y_model2)

print(tb)

In [None]:
tb_model17 = np.array([[65, 7],
                       [3, 0]])
chi2, p = mcnemar(ary=tb_model17, exact=False, corrected=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
import numpy as np
from mlxtend.evaluate import mcnemar_table

# The correct target (class) labels
y_target = y_testN

# Class labels predicted by model 1
y_model1 = pred_neigh

# Class labels predicted by model 2
y_model2 = pred_NB

tb = mcnemar_table(y_target=y_target, 
                   y_model1=y_model1, 
                   y_model2=y_model2)

print(tb)

In [None]:
tb_model18 = np.array([[65, 6],
                       [3, 1]])
chi2, p = mcnemar(ary=tb_model18, exact=False, corrected=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
import numpy as np
from mlxtend.evaluate import mcnemar_table

# The correct target (class) labels
y_target = y_testN

# Class labels predicted by model 1
y_model1 = pred_neigh

# Class labels predicted by model 2
y_model2 = pred_DT

tb = mcnemar_table(y_target=y_target, 
                   y_model1=y_model1, 
                   y_model2=y_model2)

print(tb)

In [None]:
tb_model19 = np.array([[65, 7],
                       [3, 0]])
chi2, p = mcnemar(ary=tb_model19, exact=False, corrected=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
import numpy as np
from mlxtend.evaluate import mcnemar_table

# The correct target (class) labels
y_target = y_testN

# Class labels predicted by model 1
y_model1 = pred_NB

# Class labels predicted by model 2
y_model2 = pred_DT

tb = mcnemar_table(y_target=y_target, 
                   y_model1=y_model1, 
                   y_model2=y_model2)

print(tb)

In [None]:
tb_model20 = np.array([[68, 4],
                       [3, 0]])
chi2, p = mcnemar(ary=tb_model20, exact=False, corrected=True)
print('chi-squared:', chi2)
print('p-value:', p)

In [None]:
SVM_RBF = [auc_svmrbf.get(0), auc_svmrbf.get(1), auc_svmrbf.get(2), auc_svmrbf.get(3), auc_svmrbf.get(4),auc_svmrbf.get(5), 
           auc_svmrbf.get(6), auc_svmrbf.get(7), auc_svmrbf.get(8), auc_svmrbf.get(9)]
SVM_POLY = [auc_svmpoly.get(0), auc_svmpoly.get(1), auc_svmpoly.get(2), auc_svmpoly.get(3), auc_svmpoly.get(4), auc_svmpoly.get(5),
            auc_svmpoly.get(6), auc_svmpoly.get(7), auc_svmpoly.get(8), auc_svmpoly.get(9)]
LR = [auc_lr.get(0), auc_lr.get(1), auc_lr.get(2), auc_lr.get(3), auc_lr.get(4), auc_lr.get(5), auc_lr.get(6), auc_lr.get(7), 
      auc_lr.get(8), auc_lr.get(9)]
KNN = [auc_neigh.get(0), auc_neigh.get(1), auc_neigh.get(2), auc_neigh.get(3), auc_neigh.get(4), auc_neigh.get(5), 
      auc_neigh.get(6), auc_neigh.get(7), auc_neigh.get(8), auc_neigh.get(9)]
NB = [auc_NB.get(0), auc_NB.get(1), auc_NB.get(2), auc_NB.get(3), auc_NB.get(4), auc_NB.get(5), auc_NB.get(6), auc_NB.get(7), 
      auc_NB.get(8), auc_NB.get(9)]
DT = [auc_DT.get(0), auc_DT.get(1), auc_DT.get(2), auc_DT.get(3), auc_DT.get(4), auc_DT.get(5), auc_DT.get(6), auc_DT.get(7), 
      auc_DT.get(8), auc_DT.get(9)]

In [None]:
PHY1 = [0.11]
PHY2 = [0.89]
SEN_L1 = [0.97]
SPEC_L1 = [0.97]

In [None]:
t_statistic, p_value = stats.wilcoxon(PHY1, SEN_L1)
print("\n SVML - SVMRBF ")
print(t_statistic)
print(p_value)

In [None]:
t_statistic, p_value = stats.wilcoxon(PHY2, SPEC_L1)
print("\n SVML - SVMRBF ")
print(t_statistic)
print(p_value)

In [None]:
#model comparison
np.random.seed(10)
SVM_L = [auc_svml.get(0), auc_svml.get(1), auc_svml.get(2), auc_svml.get(3), auc_svml.get(4), auc_svml.get(5), auc_svml.get(6), 
         auc_svml.get(7), auc_svml.get(8), auc_svml.get(9)]
SVM_RBF = [auc_svmrbf.get(0), auc_svmrbf.get(1), auc_svmrbf.get(2), auc_svmrbf.get(3), auc_svmrbf.get(4),auc_svmrbf.get(5), 
           auc_svmrbf.get(6), auc_svmrbf.get(7), auc_svmrbf.get(8), auc_svmrbf.get(9)]
SVM_POLY = [auc_svmpoly.get(0), auc_svmpoly.get(1), auc_svmpoly.get(2), auc_svmpoly.get(3), auc_svmpoly.get(4), auc_svmpoly.get(5),
            auc_svmpoly.get(6), auc_svmpoly.get(7), auc_svmpoly.get(8), auc_svmpoly.get(9)]
LR = [auc_lr.get(0), auc_lr.get(1), auc_lr.get(2), auc_lr.get(3), auc_lr.get(4), auc_lr.get(5), auc_lr.get(6), auc_lr.get(7), 
      auc_lr.get(8), auc_lr.get(9)]
KNN = [auc_neigh.get(0), auc_neigh.get(1), auc_neigh.get(2), auc_neigh.get(3), auc_neigh.get(4), auc_neigh.get(5), 
      auc_neigh.get(6), auc_neigh.get(7), auc_neigh.get(8), auc_neigh.get(9)]
NB = [auc_NB.get(0), auc_NB.get(1), auc_NB.get(2), auc_NB.get(3), auc_NB.get(4), auc_NB.get(5), auc_NB.get(6), auc_NB.get(7), 
      auc_NB.get(8), auc_NB.get(9)]
DT = [auc_DT.get(0), auc_DT.get(1), auc_DT.get(2), auc_DT.get(3), auc_DT.get(4), auc_DT.get(5), auc_DT.get(6), auc_DT.get(7), 
      auc_DT.get(8), auc_DT.get(9)]

In [None]:
t_statistic, p_value = stats.wilcoxon(SVM_L, SVM_RBF)
print("\n SVML - SVMRBF ")
print(t_statistic)
print(p_value)

In [None]:
t_statistic, p_value = stats.wilcoxon(SVM_L, SVM_POLY)
print("\n SVML - SVM_POLY ")
print(t_statistic)
print(p_value)

In [None]:
t_statistic, p_value = stats.wilcoxon(SVM_L, LR)
print("\n SVML - LR ")
print(t_statistic)
print(p_value)

In [None]:
t_statistic, p_value = stats.wilcoxon(SVM_L, KNN)
print("\n SVML - KNN ")
print(t_statistic)
print(p_value)

In [None]:
t_statistic, p_value = stats.wilcoxon(SVM_L, NB)
print("\n SVML - NB ")
print(t_statistic)
print(p_value)

In [None]:
t_statistic, p_value = stats.wilcoxon(SVM_L, DT)
print("\n SVML - DT ")
print(t_statistic)
print(p_value)

In [None]:
t_statistic, p_value = stats.wilcoxon(SVM_RBF, SVM_POLY)
print("\n SVM_RBF - SVM_POLY ")
print(t_statistic)
print(p_value)

In [None]:
t_statistic, p_value = stats.wilcoxon(SVM_RBF, LR)
print("\n SVM_RBF - LR ")
print(t_statistic)
print(p_value)

In [None]:
t_statistic, p_value = stats.wilcoxon(SVM_RBF, NB)
print("\n SVM_RBF - NB")
print(t_statistic)
print(p_value)

In [None]:
t_statistic, p_value = stats.wilcoxon(SVM_RBF, DT)
print("\n SVM_RBF - DT ")
print(t_statistic)
print(p_value)

In [None]:
t_statistic, p_value = stats.wilcoxon(SVM_POLY, LR)
print("\n SVM_POLY - LR ")
print(t_statistic)
print(p_value)

In [None]:
t_statistic, p_value = stats.wilcoxon(SVM_POLY, NB)
print("\n SVM_POLY - NB ")
print(t_statistic)
print(p_value)

In [None]:
t_statistic, p_value = stats.wilcoxon(SVM_POLY, DT)
print("\n SVM_POLY - DT ")
print(t_statistic)
print(p_value)

In [None]:
t_statistic, p_value = stats.wilcoxon(LR, NB)
print("\n LR - NB ")
print(t_statistic)
print(p_value)

In [None]:
t_statistic, p_value = stats.wilcoxon(LR, DT)
print("\n LR - DT ")
print(t_statistic)
print(p_value)

In [None]:
t_statistic, p_value = stats.wilcoxon(NB, DT)
print("\n NB - DT ")
print(t_statistic)
print(p_value)