# Heart Failure 🩺 EDA + Predictions

#### If you like my work, It will be really great of you to upvote this notebook!
#### If not then you leaving a comment on what do I need to work on and improve will be really helpful!

## Importing Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.simplefilter("ignore")

## Loading up the data

In [None]:
df = pd.read_csv("../input/heart-failure-clinical-data/heart_failure_clinical_records_dataset.csv")
df.head()

In [None]:
df.info()

In [None]:
# Checking if there are any missing values in the dataset
df.isna().sum()

In [None]:
# Having a look at the correlation matrix

fig, ax = plt.subplots(figsize=(10,6)) 
sns.heatmap(df.corr(), annot=True, fmt='.1g', cmap="viridis", cbar=False);

In [None]:
plt.style.use("classic")
fig, ax = plt.subplots(figsize=(8,6))
sns.countplot(df["DEATH_EVENT"], palette=["seagreen","firebrick"]);

In [None]:
plt.style.use("classic")
fig, ax = plt.subplots(figsize=(8,6))
sns.distplot(df["age"], color="r");
plt.show()

The average age of the patients seems to be between `55 to 75 years`. With the maximum age being 95 and the minimum being 40 years.

In [None]:
plt.style.use("seaborn")
fig, ax = plt.subplots(figsize=(8,6))
colors = ["lightcoral", "maroon"]
plt.pie(df["high_blood_pressure"].value_counts(), labels=["Normal Blood Pressure", "High Blood Pressure"],
        explode = (0.1,0), shadow=True, colors=colors);

In [None]:
plt.style.use("classic")
fig, ax = plt.subplots(figsize=(8,6))
sns.countplot(df["diabetes"], palette=["skyblue", "deepskyblue"]);

In [None]:
plt.style.use("classic")
fig, ax = plt.subplots(figsize=(8,6))
sns.countplot(df["anaemia"], palette=["olivedrab","darkolivegreen"]);

In [None]:
plt.style.use("classic")
fig, ax = plt.subplots(figsize=(8,6))
sns.countplot(df["sex"], palette=["pink", "skyblue"],);

Majority of the patients having the problem are `Male`.

In [None]:
plt.style.use("seaborn")
fig, ax = plt.subplots(figsize=(8,6))
colors = ["mediumorchid", "darkorchid"]
plt.pie(df["smoking"].value_counts(), labels=["Non-Smoker", "Smoker"],
        explode = (0.05,0), shadow=True, colors=colors);

In [None]:
df.head()

## Splitting the data into training and test datasets
Here, we are trying to predict the `Death` of the patient using the given data. 
Hence, the `DEATH_EVENT` will be the y label and the rest data will be the `X` or the input data.

In [None]:
# X data
X = df.drop("DEATH_EVENT", axis=1)

In [None]:
X.head()

In [None]:
# y data
y = df["DEATH_EVENT"]

In [None]:
y.head()

In [None]:
# Splitting the data into X train, X test and y train, y test

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=4)

In [None]:
len(X_train), len(X_test)

## Training the Model


## Random Forest Classifier

In [None]:
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier(random_state=4)

In [None]:
rfc.fit(X_train, y_train)

In [None]:
rfc.predict(X_test)

In [None]:
np.array(y_test)

In [None]:
RandomForestClassifierScore = rfc.score(X_test,y_test)
print("Accuracy obtained by Random Forest Classifier model:", RandomForestClassifierScore*100)

## Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()

In [None]:
lr.fit(X_train, y_train)

In [None]:
lr.predict(X_test)

In [None]:
np.array(y_test)

In [None]:
LogisticRegressionScore = lr.score(X_test,y_test)
print("Accuracy obtained by Logistic Regression model:", LogisticRegressionScore*100)

## Decision Tree Classifier

In [None]:
from sklearn.tree import DecisionTreeClassifier
tree = DecisionTreeClassifier()
tree.fit(X_train,y_train)

In [None]:
DecisionTreeClassifierScore = tree.score(X_test,y_test)
print("Accuracy obtained by Decision Tree Classifier model:",DecisionTreeClassifierScore*100)

## Gradient Boosting Classifier

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
gb = GradientBoostingClassifier()
gb.fit(X_train, y_train)

In [None]:
GradientBoostingClassifierScore = gb.score(X_test,y_test)
print("Accuracy obtained by Gradient Boosting Classifier model:",GradientBoostingClassifierScore*100)

## CatBoost Classifier

In [None]:
from catboost import CatBoostClassifier
cat = CatBoostClassifier(iterations=10)
cat.fit(X_train, y_train);

In [None]:
CatBoostClassifierScore = cat.score(X_test,y_test)
print("Accuracy obtained by CatBoost Classifier model:",CatBoostClassifierScore*100)

## K Neighbors Classifier

In [None]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(4)
knn.fit(X_train, y_train)

In [None]:
KNeighborsClassifierScore = cat.score(X_test,y_test)
print("Accuracy obtained by K Neighbors Classifier model:",KNeighborsClassifierScore*100)

In [None]:
plt.style.use("classic")
x = ["LogisticRegression", 
     "Decision Tree Classifier", 
     "RandomForestClassifier", 
     "KNeighborsClassifier", 
     "CatBoost Classifier", 
     "Gradient Boosting Classifier"]

y = [LogisticRegressionScore, 
     DecisionTreeClassifierScore, 
     RandomForestClassifierScore, 
     KNeighborsClassifierScore, 
     CatBoostClassifierScore, 
     GradientBoostingClassifierScore]

fig, ax = plt.subplots(figsize=(8,6))
sns.barplot(x=x,y=y, palette="crest");
plt.xlabel("Models")
plt.ylabel("Model Accuracy")
plt.xticks(rotation=40)
plt.title("Model Comparison - Model Accuracy");

## Hyperparameter Tuning

In [None]:
from sklearn.model_selection import GridSearchCV

param_grid = [
    {
      "penalty": ["l1", "l2", "elastic", "none" ],
      "C" : np.logspace(-4, 4, 20),
      "solver" : ["sag", "saga", "lbfgs", "liblinear", "newton-cg"],
      "max_iter" : [100, 1000, 2500, 5000]
    }
]

In [None]:
clf = GridSearchCV(lr, param_grid=param_grid, cv=5, verbose=True, n_jobs=-1)

In [None]:
best_clf = clf.fit(X_train,y_train)

In [None]:
best_clf.best_estimator_

In [None]:
best_clf.best_params_

In [None]:
print(f"Accuracy of the Logistic Regression Model after doing GridSearchCV: {best_clf.score(X_test,y_test):.3f}")