# Importing the Libraries

In [63]:
import numpy as np
import pickle as pkl
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix, accuracy_score

# Importing and splitting the proceesed dataset

In [34]:
dataset = pd.read_csv('Healthcare_processed_dataset.csv') #the preprocessing file is in the week8 file and it would be redundant to add it again here
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [35]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20)

# Initializing and training models 

In [None]:
class_RandomForest = RandomForestClassifier(n_estimators = 100, criterion = 'entropy', random_state = 0).fit(X_train, y_train)
class_GradientBoost = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0).fit(X_train, y_train)
class_DecisionTree = DecisionTreeClassifier(criterion = 'entropy', random_state = 0).fit(X_train, y_train)
class_KNeighbors = KNeighborsClassifier().fit(X_train, y_train)
class_KSVM = SVC(kernel = 'rbf', random_state = 0).fit(X_train, y_train)
class_SVM = SVC(kernel = 'linear', random_state = 0).fit(X_train, y_train)
class_NB = GaussianNB().fit(X_train, y_train)
class_LR = LogisticRegression(random_state = 0).fit(X_train, y_train)

# Obtaining test predictions

In [37]:
y_pred_RandomForest = class_RandomForest.predict(X_test)
y_pred_GradientBoost = class_GradientBoost.predict(X_test)
y_pred_DecisionTree = class_DecisionTree.predict(X_test)
y_pred_KNeighbors = class_KNeighbors.predict(X_test)
y_pred_KSVM = class_KSVM.predict(X_test)
y_pred_SVM = class_SVM.predict(X_test)
y_pred_NB = class_NB.predict(X_test)
y_pred_LR = class_LR.predict(X_test)

# Creating confusion matricies 

In [38]:
cm_RandomForest = confusion_matrix(y_test, y_pred_RandomForest)
cm_GradientBoost = confusion_matrix(y_test, y_pred_GradientBoost)
cm_DecisionTree = confusion_matrix(y_test, y_pred_DecisionTree)
cm_KNeighbors = confusion_matrix(y_test, y_pred_KNeighbors)
cm_KSVM = confusion_matrix(y_test, y_pred_KSVM)
cm_SVM = confusion_matrix(y_test, y_pred_SVM)
cm_NB = confusion_matrix(y_test, y_pred_NB)
cm_LR = confusion_matrix(y_test, y_pred_LR)

# Comparing models

In [55]:
print(cm_RandomForest)
print(accuracy_score(y_test, y_pred_RandomForest)*100)

[[379  48]
 [ 74 184]]
82.18978102189782


In [56]:
print(cm_GradientBoost)
print(accuracy_score(y_test, y_pred_GradientBoost)*100)

[[370  57]
 [ 82 176]]
79.7080291970803


In [57]:
print(cm_DecisionTree)
print(accuracy_score(y_test, y_pred_DecisionTree)*100)

[[324 103]
 [ 85 173]]
72.55474452554745


In [58]:
print(cm_KNeighbors)
print(accuracy_score(y_test, y_pred_KNeighbors)*100)

[[380  47]
 [ 93 165]]
79.56204379562044


In [59]:
print(cm_KSVM)
print(accuracy_score(y_test, y_pred_KSVM)*100)

[[385  42]
 [ 84 174]]
81.60583941605839


In [60]:
print(cm_SVM)
print(accuracy_score(y_test, y_pred_SVM)*100)

[[370  57]
 [ 77 181]]
80.43795620437956


In [61]:
print(cm_NB)
print(accuracy_score(y_test, y_pred_NB)*100)

[[ 16 411]
 [  5 253]]
39.270072992700726


In [62]:
print(cm_LR)
print(accuracy_score(y_test, y_pred_LR)*100)

[[367  60]
 [ 78 180]]
79.85401459854015


# Saving the best model

In [64]:
pkl.dump(class_RandomForest, open('model.pkl', 'wb'))