# XGBoost

In [48]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from xgboost import XGBClassifier


In [49]:
df = pd.read_csv('../../dataset/heart-failure-tratado.csv')
df.head(3)

Unnamed: 0,ExerciseAngina,Oldpeak,ST_Slope,MaxHR,ChestPainType,HeartDisease
0,0,0.0,2,172,1,0
1,0,1.0,1,156,2,1
2,0,0.0,2,98,1,0


In [50]:
# Features
x = df.drop(columns=['HeartDisease'])

# Target
y = df['HeartDisease']

In [51]:
# Split do dataset em conjuntos de treinamento e teste.

# # 70% treino, 30% teste
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=45)

# 90% treino, 10% teste
# x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1, random_state=45)


In [None]:
modelo = XGBClassifier()
modelo.fit(x_train, y_train)

y_pred = modelo.predict(x_test)

# Se y_pred veio de 'binary:logistic' ou valores contínuos
y_pred_class = [1 if p >= 0.5 else 0 for p in y_pred]

accuracy = accuracy_score(y_test, y_pred_class)
precision = precision_score(y_test, y_pred_class)
recall = recall_score(y_test, y_pred_class)
f1 = f1_score(y_test, y_pred_class)

print(f'Accuracy: {accuracy}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1-score: {f1}')

Accuracy: 0.8079710144927537
Precision: 0.8211920529801324
Recall: 0.8266666666666667
F1-score: 0.8239202657807309


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


# Tuning dos hiperparâmetros

In [61]:
# Modelo base
xgb = XGBClassifier()

# Grade de parâmetros
param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [3, 5, 7],
    'learning_rate': [0.01, 0.1, 0.2],
    'subsample': [0.8, 1.0]
}

# Busca com validação cruzada
grid_search = GridSearchCV(estimator=xgb, param_grid=param_grid,
                           cv=10, scoring='accuracy'
                           )

grid_search.fit(x_train, y_train)

# Melhor combinação
print("Melhores parâmetros:", grid_search.best_params_)

# Se y_pred veio de 'binary:logistic' ou valores contínuos
y_pred_class = [1 if p >= 0.5 else 0 for p in y_pred]

accuracy = accuracy_score(y_test, y_pred_class)
precision = precision_score(y_test, y_pred_class)
recall = recall_score(y_test, y_pred_class)
f1 = f1_score(y_test, y_pred_class)

print(f'Accuracy: {accuracy}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1-score: {f1}')

Melhores parâmetros: {'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 200, 'subsample': 0.8}
Accuracy: 0.8079710144927537
Precision: 0.8211920529801324
Recall: 0.8266666666666667
F1-score: 0.8239202657807309
