<a href="https://www.kaggle.com/code/nsff591/heart-failure-prediction-random-forest-0-85?scriptVersionId=97815720" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

# Heart Failure Prediction

## Importing libraries

In [None]:
import numpy as np # making use of arrays
import matplotlib.pyplot as plt # plotting the data
import pandas as pd # importing the data

from sklearn.model_selection import train_test_split # splitting the data
from sklearn.preprocessing import StandardScaler # feature scaling

from sklearn.svm import SVC # SVM model
from sklearn.linear_model import LogisticRegression # logistic regression model
from sklearn.ensemble import RandomForestClassifier # Random forest model
from sklearn.naive_bayes import GaussianNB # naive bayes model
from xgboost import XGBClassifier # XGBoost model
import xgboost as xgb

from sklearn.metrics import confusion_matrix, accuracy_score, classification_report # analysing the results
from sklearn.model_selection import cross_val_score # k-fold cross validation

print("Importing Complete!")

## Importing the Dataset

In [None]:
#importing the heart failure(hf) data
hf_data = pd.read_csv('../input/heart-failure-clinical-data/heart_failure_clinical_records_dataset.csv')
print(hf_data.shape)

x = hf_data.iloc[:, :-1].values
y = hf_data.iloc[:, -1].values

hf_data.head(5)

## Splitting the data in training and test set

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size= 0.2, random_state=0)

print("Unscaled training data example:" + np.array2string(x_train[0], formatter={'float_kind':lambda x: "%.0f" % x}))

## Feauture Scaling

In [None]:
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

print("Scaled training data example:" + np.array2string(x_train[0], formatter={'float_kind':lambda x: "%.3f" % x}))

## Training the Naive Bayes model on the training set

In [None]:
classifier = GaussianNB()
classifier.fit(x_train, y_train)

## Predicting the test set results

In [None]:
y_pred = classifier.predict(x_test)
print("Showing first 10 predictions compared to the test data: ")
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1)[:10,:])

## Making the Confusion matrix

In [None]:
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(accuracy_score(y_test, y_pred))

## Reviewing the model

In [None]:
nb_report=classification_report(y_test,y_pred,target_names=['Dead','Alive'])
print(nb_report)

## Training other models: Support Vector Machine (RBF)

In [None]:
classifier_svm = SVC(kernel= 'rbf', random_state= 0, max_iter=100, probability=True)
classifier_svm.fit(x_train, y_train)

## Making the Confusion matrix

In [None]:
y_pred_svm = classifier_svm.predict(x_test)
cm = confusion_matrix(y_test, y_pred_svm)
print(cm)
print(accuracy_score(y_test, y_pred_svm))

## Reviewing the model

In [None]:
svm_report=classification_report(y_test,y_pred_svm,target_names=['Dead','Alive'])
print(svm_report)

## Training other models: Logistic Regression

In [None]:
classifier_LR = LogisticRegression(max_iter=500,random_state = 0)
classifier_LR.fit(x_train, y_train)

## Making the Confusion matrix

In [None]:
y_pred_LR = classifier_LR.predict(x_test)
cm = confusion_matrix(y_test, y_pred_LR)
print(cm)
print(accuracy_score(y_test, y_pred_LR))

## Reviewing the model

In [None]:
lr_report=classification_report(y_test,y_pred_LR,target_names=['Dead','Alive'])
print(lr_report)

## Training other models: Random Forest

In [None]:
classifier_rf = RandomForestClassifier(n_estimators = 100, criterion = 'entropy', random_state = 0)
classifier_rf.fit(x_train, y_train)

## Making the Confusion matrix

In [None]:
y_pred_rf = classifier_rf.predict(x_test)
cm = confusion_matrix(y_test, y_pred_rf)
print(cm)
print(accuracy_score(y_test, y_pred_rf))

## Reviewing the model

In [None]:
rf_report=classification_report(y_test,y_pred_rf,target_names=['Dead','Alive'])
print(rf_report)

## Training other models: XGboost

In [None]:
classifier_xgb = XGBClassifier(use_label_encoder=False,eval_metric='logloss')
classifier_xgb.fit(x_train, y_train)

## Making the Confusion matrix

In [None]:
y_pred_xgb = classifier_xgb.predict(x_test)
cm = confusion_matrix(y_test, y_pred_xgb)
print(cm)
print(accuracy_score(y_test, y_pred_xgb))

## Reviewing the model

In [None]:
xgb_report=classification_report(y_test,y_pred_xgb,target_names=['Dead','Alive'])
print(xgb_report)

## Applying k-fold Cross validation

In [None]:
accuracies = cross_val_score(estimator = classifier_xgb, X = x_train, y = y_train, cv = 10)
print("Accuracy: {:.2f} %".format(accuracies.mean()*100))
print("Standard Deviation: {:.2f} %".format(accuracies.std()*100))

## Boosting Trees

In [None]:
xgb.plot_tree(classifier_xgb,num_trees=0)
plt.show()


## Feature importance

In [None]:
xgb.plot_importance(classifier_xgb)
plt.figure(figsize=(10, 10), dpi=150)
plt.show()

In [None]:
print("Most important features: " + hf_data.columns[11] + " > " + hf_data.columns[4] + " > " + hf_data.columns[2] + " > " + hf_data.columns[7])