## Ensemble

In [1]:
# Libraries
import pandas as pd
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from xgboost import XGBClassifier
import pickle
from pickle import load

root_path = "../../Data/GoogleDrive/"

In [2]:
# Load data
X_train = pd.read_parquet(root_path + "X_train.parquet")
X_test = pd.read_parquet(root_path + "X_test.parquet")
y_train = pd.read_parquet(root_path + "y_train.parquet")
y_test = pd.read_parquet(root_path + "y_test.parquet")

In [3]:
# Load models

# Root directory
root = '../../Data/GoogleDrive/'

# File names
log_name = 'logit_model_fixed.pkl'
knn_name = 'knn_model_fixed.pkl'
rf_name = 'rf_model_fixed.pkl'
xgb_name = 'xgboost_model_fixed.pkl'

with open(root + log_name, 'rb') as f:
    logit_model = pickle.load(f)

with open(root + knn_name, 'rb') as f:
    knn_model = pickle.load(f)

with open(root + rf_name, 'rb') as f:
    rf_model = pickle.load(f)

with open(root + xgb_name, 'rb') as f:
    xgb_model = pickle.load(f)

In [5]:
# Model
ensemble = VotingClassifier(
    estimators=[('logit', logit_model), 
                ('knn', knn_model),
                ('rf', rf_model),
                ('xgb', xgb_model)], 
                voting='soft')

y_train = y_train.values.ravel()

ensemble.fit(X_train, y_train)
ensemble.score(X_test, y_test)
print('Ensemble model accuracy: ', ensemble.score(X_test, y_test))

# Predictions
y_pred = ensemble.predict(X_test)

In [None]:
# create confusion matrix
ensemble_cf = confusion_matrix(y_test, y_pred)
ensemble_cf = pd.DataFrame(ensemble_cf, 
                         columns = ['predicted_no_heart_disease', 
                                    'predicted_heart_disease'],
                         index = ['no_heart_disease',
                                  'has_heart_disease'])

In [None]:
# Evaluate

# Accuracy
ens_accuracy = accuracy_score(y_test, y_pred)

# Precision
ens_precision = precision_score(y_test, y_pred)

# Recall
ens_recall = recall_score(y_test, y_pred)

# F1
ens_f1 = f1_score(y_test, y_pred)

# AUC ROC
ens_roc = roc_auc_score(y_test, y_pred)