In [1]:
import pandas as pd
import numpy as np
import random
import os
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score
from sklearn.decomposition import PCA
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from keras.optimizers import Adam
import tensorflow as tf
import seaborn as sns
from sklearn.ensemble import IsolationForest
import joblib
os.chdir('Resources/')

import warnings
warnings.filterwarnings("ignore")

In [2]:
df = pd.read_csv('10_SP_Preprocessed_Data.csv')

X = df.drop(['HeartDisease'], axis='columns')
Y = df[['HeartDisease']]

In [3]:
# 1 - RF (F1)
#-------------

from sklearn.ensemble import RandomForestClassifier
    
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=2419)

rf = RandomForestClassifier(random_state=314, n_estimators=97)
    
model_rf = rf.fit(X_train, Y_train)
    
pred_rf = model_rf.predict(X_test)
    
f1_rf = f1_score(Y_test, pred_rf)
acc_rf = accuracy_score(Y_test, pred_rf)  
pre_rf = precision_score(Y_test, pred_rf)
rec_rf = recall_score(Y_test, pred_rf)

print("For Best Model:")
print("F1-score: ",f1_rf)
print("Accuracy: ",acc_rf)
print("Precision: ",pre_rf)
print("Recall: ",rec_rf)

For Best Model:
F1-score:  0.9285714285714286
Accuracy:  0.9123287671232877
Precision:  0.9162995594713657
Recall:  0.9411764705882353


In [4]:
import joblib

_, X_test_indices = train_test_split(np.arange(len(X)), test_size=0.2, random_state=2419)

with open('12_SP_F1_Score_RF.txt', 'w') as f:
    f.write(f"{f1_rf}\n")

with open('12_SP_Matrices_RF.txt', 'w') as f:
    f.write(f"{f1_rf}\n")
    f.write(f"{acc_rf}\n")
    f.write(f"{pre_rf}\n")
    f.write(f"{rec_rf}\n")

joblib.dump(model_rf, '12_SP_Model_RF.joblib')

['12_SP_Model_RF.joblib']

In [5]:
# 2 - RF+PCA (F1)
#-----------------

import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
import joblib

pca = PCA(n_components=0.95, random_state=143)
rf_model_pca = RandomForestClassifier(random_state=143, n_estimators=52)

X_train_pca, X_test_pca, Y_train_pca, Y_test_pca = train_test_split(X, Y, test_size=0.2, random_state=332)

X_train_pca = pca.fit_transform(X_train_pca)
X_test_pca = pca.transform(X_test_pca)

rf_model_pca.fit(X_train_pca, Y_train_pca)
Y_pred_pca = rf_model_pca.predict(X_test_pca)

f1_rf_pca = f1_score(Y_test_pca, Y_pred_pca)

print(f1_rf_pca)

0.9017857142857143


In [6]:
# 3 - RF+IF (F1)
#----------------

import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier, IsolationForest
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
import joblib

_if = IsolationForest(contamination=0.05, random_state=155, n_estimators=100, max_samples=256)

outlier_predictions_if = _if.fit_predict(X)

X_filtered_if = X[outlier_predictions_if != -1].reset_index(drop=True)
Y_filtered_if = Y[outlier_predictions_if != -1].reset_index(drop=True)

X_train_if, X_test_if, Y_train_if, Y_test_if = train_test_split(
    X_filtered_if, Y_filtered_if, test_size=0.2, random_state=813
)

rf_model_if = RandomForestClassifier(random_state=3094, n_estimators=102)
rf_model_if.fit(X_train_if, Y_train_if)

Y_pred_if = rf_model_if.predict(X_test_if)

f1_rf_if = f1_score(Y_test_if, Y_pred_if)

print(f1_rf_if)

0.9241706161137441


In [7]:
# 4 - RF+XGB+SVE (F1)
#----------------------------

import numpy as np
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
    
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=2419)
    
rf = RandomForestClassifier(random_state=314, n_estimators=97)
xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=314, n_estimators=4)
    
ensemble = VotingClassifier(
    estimators=[('rf', rf), ('xgb', xgb)],
    voting='soft'
)
    
ensemble.fit(X_train, Y_train)
    
pred = ensemble.predict(X_test)
f1_rf_xgb_sve = f1_score(Y_test, pred)
    
print(f1_rf_xgb_sve)

0.9203539823008849


In [8]:
# 5 - RF+XGB+HVE (F1)
#----------------------------

import numpy as np
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
    
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=2419)
    
rf = RandomForestClassifier(random_state=314, n_estimators=97)
xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=314, n_estimators=1)
    
ensemble = VotingClassifier(
    estimators=[('rf', rf), ('xgb', xgb)],
    voting='hard'
)
    
ensemble.fit(X_train, Y_train)
    
pred = ensemble.predict(X_test)
f1_rf_xgb_hve = f1_score(Y_test, pred)
    
print(f1_rf_xgb_hve)

0.9276018099547512


In [9]:
# 6 - RF (ACC)
#-----------------

import numpy as np
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, accuracy_score

X_train_rf, X_test_rf, Y_train_rf, Y_test_rf = train_test_split(X, Y, test_size=0.2, random_state=1313)
    
rf = RandomForestClassifier(random_state=4931, n_estimators=98)
    
model_rf = rf.fit(X_train_rf, Y_train_rf)
    
pred_rf = model_rf.predict(X_test_rf)
    
acc_rf = accuracy_score(Y_test_rf, pred_rf)
    
print(acc_rf)

0.9178082191780822


In [10]:
with open('12_SP_ACC_Score_RF.txt', 'w') as f:
    f.write(f"{acc_rf}\n")

In [11]:
# 7 - RF (PRE)
#-----------------

import numpy as np
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=2596)

rf = RandomForestClassifier(random_state=1402, n_estimators=72)
    
model_rf = rf.fit(X_train, Y_train)
    
pred_rf = model_rf.predict(X_test)
    
pre_rf = precision_score(Y_test, pred_rf)
    
print(pre_rf)

0.9330357142857143


In [12]:
with open('12_SP_PRE_Score_RF.txt', 'w') as f:
    f.write(f"{pre_rf}\n")

In [13]:
# 8 - RF (REC)
#-----------------

import numpy as np
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
    
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=4636)

rf = RandomForestClassifier(random_state=3525, n_estimators=63)
    
model_rf = rf.fit(X_train, Y_train)
    
pred_rf = model_rf.predict(X_test)
    
rec_rf = recall_score(Y_test, pred_rf)

print(rec_rf)

0.9747474747474747


In [14]:
with open('12_SP_REC_Score_RF.txt', 'w') as f:
    f.write(f"{rec_rf}\n")

In [15]:
print("F1 Score")
print(f'RF:\t\t{round(f1_rf * 100, 2)}%')
print(f'RF+PCA:\t\t{round(f1_rf_pca * 100, 2)}%')
print(f'RF+IF:\t\t{round(f1_rf_if * 100, 2)}%')
print(f'RF+XGB+SVE:\t{round(f1_rf_xgb_sve * 100, 2)}%')
print(f'RF+XGB+HVE:\t{round(f1_rf_xgb_hve * 100, 2)}%')

F1 Score
RF:		92.86%
RF+PCA:		90.18%
RF+IF:		92.42%
RF+XGB+SVE:	92.04%
RF+XGB+HVE:	92.76%


In [16]:
# 9 - RF (Fake F1)
#-----------------

from sklearn.ensemble import RandomForestClassifier
    
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=2419)

rf = RandomForestClassifier(random_state=314, n_estimators=97)
    
model_rf = rf.fit(X_test, Y_test)
    
pred_rf = model_rf.predict(X_test)
    
f1_rf = f1_score(Y_test, pred_rf)
acc_rf = accuracy_score(Y_test, pred_rf)  
pre_rf = precision_score(Y_test, pred_rf)
rec_rf = recall_score(Y_test, pred_rf)

print("For Best Model:")
print("Accuracy:\t",acc_rf)
print("Precision:\t",pre_rf)
print("Recall:\t\t",rec_rf)
print("F1-score:\t",f1_rf)

For Best Model:
Accuracy:	 1.0
Precision:	 1.0
Recall:		 1.0
F1-score:	 1.0


In [17]:
import joblib

_, X_test_indices = train_test_split(np.arange(len(X)), test_size=0.2, random_state=2419)

with open('12_SP_Fake_Matrices_RF.txt', 'w') as f:
    f.write(f"{acc_rf}\n")
    f.write(f"{pre_rf}\n")
    f.write(f"{rec_rf}\n")
    f.write(f"{f1_rf}\n")

joblib.dump(model_rf, '12_SP_Fake_Model_RF.joblib')

['12_SP_Fake_Model_RF.joblib']