In [1]:
import numpy as np 
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from xgboost import XGBClassifier

RANDOM_STATE = 42

In [2]:
df = pd.read_csv("/kaggle/input/heart-failure-prediction/heart.csv")
df.head()

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,M,ATA,140,289,0,Normal,172,N,0.0,Up,0
1,49,F,NAP,160,180,0,Normal,156,N,1.0,Flat,1
2,37,M,ATA,130,283,0,ST,98,N,0.0,Up,0
3,48,F,ASY,138,214,0,Normal,108,Y,1.5,Flat,1
4,54,M,NAP,150,195,0,Normal,122,N,0.0,Up,0


In [3]:
cat_variables = ['Sex',
'ChestPainType',
'RestingECG',
'ExerciseAngina',
'ST_Slope'
]
df = pd.get_dummies(data = df,
                         prefix = cat_variables,
                         columns = cat_variables)

In [4]:
features = [x for x in df.columns if x not in 'HeartDisease']
X_train, X_val, y_train, y_val = train_test_split(df[features], df['HeartDisease'], train_size = 0.8, random_state = RANDOM_STATE)

In [5]:
n = int(len(X_train)*0.8)
X_train_fit, X_train_eval, y_train_fit, y_train_eval = X_train[:n], X_train[n:], y_train[:n], y_train[n:]

In [6]:
xgb_model = XGBClassifier(n_estimators = 500, learning_rate = 0.1,verbosity = 1, random_state = RANDOM_STATE)
xgb_model.fit(X_train_fit,y_train_fit, eval_set = [(X_train_eval,y_train_eval)], early_stopping_rounds = 10)

[0]	validation_0-logloss:0.65141
[1]	validation_0-logloss:0.61289
[2]	validation_0-logloss:0.57971
[3]	validation_0-logloss:0.55172
[4]	validation_0-logloss:0.53175
[5]	validation_0-logloss:0.51401
[6]	validation_0-logloss:0.49884
[7]	validation_0-logloss:0.48808
[8]	validation_0-logloss:0.47906
[9]	validation_0-logloss:0.47016
[10]	validation_0-logloss:0.46099
[11]	validation_0-logloss:0.45347
[12]	validation_0-logloss:0.44867
[13]	validation_0-logloss:0.44370
[14]	validation_0-logloss:0.44137
[15]	validation_0-logloss:0.43769
[16]	validation_0-logloss:0.43645
[17]	validation_0-logloss:0.43211
[18]	validation_0-logloss:0.43149
[19]	validation_0-logloss:0.42899
[20]	validation_0-logloss:0.42878
[21]	validation_0-logloss:0.42753
[22]	validation_0-logloss:0.42596
[23]	validation_0-logloss:0.42669
[24]	validation_0-logloss:0.42381
[25]	validation_0-logloss:0.42547
[26]	validation_0-logloss:0.42418
[27]	validation_0-logloss:0.42022
[28]	validation_0-logloss:0.41854
[29]	validation_0-loglos



[40]	validation_0-logloss:0.41901
[41]	validation_0-logloss:0.41970
[42]	validation_0-logloss:0.41873


In [7]:
xgb_model.best_iteration
print(f"Metrics train:\n\tAccuracy score: {accuracy_score(xgb_model.predict(X_train),y_train):.4f}\nMetrics test:\n\tAccuracy score: {accuracy_score(xgb_model.predict(X_val),y_val):.4f}")

Metrics train:
	Accuracy score: 0.9469
Metrics test:
	Accuracy score: 0.8750
