#**Features Explanation:**
age: Age of the patient

Anaemia: Decrease of red blood cells or hemoglobin (0 = no, 1 = yes)

High blood pressure: If a patient has hypertension (0 = no, 1 = yes)

Creatinine phosphokinase (CPK): Level of the CPK enzyme in the blood (range from 23mcg/L to 7861mcg/L)

Diabetes: If the patient has diabetes (0 = no, 1 = yes)

Ejection fraction: Percentage of blood leaving (range from 14% to 80%)

sex: The person's sex (1 = male, 0 = female)

Platelets: Platelets in the blood (range from 25 kiloplatelets/mL to 850 kiloplatelets/mL)

Serum creatinine: Level of creatinine in the blood (range from 0.50mg/dL to 9.40mg/dL)

Serum sodium: Level of sodium in the blood (range from 114mEq/L to 148mEq/L)

Smoking: If the patient smokes (0 = no, 1 = yes)

Time: Follow-up period (range from 4 days to 285 days)

(target) death event: If the patient died during the follow-up period (0 = no, 1 = yes)


**Boolean features**

  - Sex: Gender of patient Male = 1, Female =0
  - Diabetes: 0 = No, 1 = Yes
  - Anaemia: 0 = No, 1 = Yes
  - High_blood_pressure: 0 = No, 1 = Yes
  - Smoking: 0 = No, 1 = Yes
  - DEATH_EVENT: 0 = No, 1 = Yes

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve
from sklearn.ensemble import RandomForestClassifier

In [None]:
clinical = pd.read_csv('https://raw.githubusercontent.com/laiz6660666/BA765-Session02/master/heart_failure_clinical_records_dataset.csv')
clinical.head()
clinical.info()

#**Machine Learning Models**

#Feature Importance

In [None]:
from sklearn.datasets import load_iris
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.ensemble import StackingClassifier
from sklearn.model_selection import train_test_split

X, y = clinical(return_X_y=True)
estimators = [
    ('rf', RandomForestClassifier(n_estimators=100, random_state=775)),
    ('svr', make_pipeline(StandardScaler(),
                          LinearSVC(random_state=775)))
]
clf = StackingClassifier(
    estimators=estimators, final_estimator=LogisticRegression()
)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, stratify=y, random_state=775
)
clf.fit(X_train, y_train).score(X_test, y_test)

In [None]:
params = {'random_state': 775, 'n_jobs': 4, 'n_estimators': 100, 'max_depth': 8}

x, y = clinical.drop('DEATH_EVENT',axis=1), clinical['DEATH_EVENT']
# Fit RandomForest Classifier
clf = RandomForestClassifier(**params)
clf = clf.fit(x, y)
# Plot features importances
imp = pd.Series(data=clf.feature_importances_, index=x.columns).sort_values(ascending=False)
plt.figure(figsize=(10,12))
plt.title("Feature importance")
ax = sns.barplot(y=imp.index, x=imp.values, palette="Blues_d", orient='h')

#**Random Forest Classifier**


In [None]:
# evaluate random forest algorithm for classification
from numpy import mean
from numpy import std
from sklearn.datasets import make_classification
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.ensemble import RandomForestClassifier
# define dataset
X, y = make_classification(n_samples=1000, n_features=20, n_informative=15, n_redundant=5, random_state=775)
# define the model
model = RandomForestClassifier()
# evaluate the model
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=775)
n_scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=-1, error_score='raise')
# report performance
print('Accuracy: %.3f (%.3f)' % (mean(n_scores), std(n_scores)))

#**Linear Regression**

In [None]:
drop = ['DEATH_EVENT','creatinine_phosphokinase']

In [None]:
X, y = clinical.drop(drop,axis=1), clinical['DEATH_EVENT']

In [None]:
Features = ['time','ejection_fraction','serum_creatinine']

In [None]:
Features = ['ejection_fraction','serum_creatinine']

In [None]:
X, y = clinical[Features], clinical['DEATH_EVENT']

In [None]:
x, y = clinical.drop('DEATH_EVENT',axis=1), clinical['DEATH_EVENT']

In [None]:
from sklearn.model_selection import train_test_split
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.3, random_state=775)

In [None]:
from sklearn.linear_model import LogisticRegression  # 1. choose model class
model = LogisticRegression(solver='liblinear')     # 2. instantiate model

In [None]:
model.fit(Xtrain, ytrain)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='liblinear', tol=0.0001, verbose=0,
                   warm_start=False)

In [None]:
y_model = model.predict(Xtest)

In [None]:
test = Xtest.join(ytest).reset_index()
test.join(pd.Series(y_model, name='predicted')).head()

Unnamed: 0,index,time,ejection_fraction,serum_creatinine,DEATH_EVENT,predicted
0,159,121,45,1.1,0,0
1,26,24,40,1.0,1,1
2,50,43,25,1.0,1,1
3,233,209,38,1.4,0,0
4,256,220,38,1.7,0,0


In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(ytest, y_model)

0.8111111111111111

#**K Neighbor Classifier**

In [None]:
# K Neighbors Classifier

from sklearn.neighbors import KNeighborsClassifier
kn_clf = KNeighborsClassifier(n_neighbors=6)
kn_clf.fit(Xtrain, ytrain)
kn_pred = kn_clf.predict(Xtest)
kn_acc = accuracy_score(ytest, kn_pred)


In [None]:
kn_acc

0.7111111111111111

#**Gaussian NB**

In [None]:
from sklearn.naive_bayes import GaussianNB # 1. choose model class
model = GaussianNB()             # 2. instantiate model
model.fit(Xtrain, ytrain)           # 3. fit model to data
y_model = model.predict(Xtest)        # 4. predict on new data

In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(ytest, y_model)

0.7