# Árvore de decisão

In [8]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeClassifier


In [9]:
df = pd.read_csv('../../dataset/heart-failure-tratado.csv')
df.head(3)

Unnamed: 0,ExerciseAngina,Oldpeak,ST_Slope,MaxHR,ChestPainType,HeartDisease
0,0,0.0,2,172,1,0
1,0,1.0,1,156,2,1
2,0,0.0,2,98,1,0


In [10]:
# Features
x = df.drop(columns=['HeartDisease'])

# Target
y = df['HeartDisease']

In [11]:
# Split do dataset em conjuntos de treinamento e teste.

# # 70% treino, 30% teste
# x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=45)

# 90% treino, 10% teste
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1, random_state=45)


In [12]:
modelos_tree = DecisionTreeClassifier()
modelos_tree.fit(x_train, y_train)

y_pred = modelos_tree.predict(x_test)

In [13]:
# Calculando métricas de desempenho (acurácia, precisão, revocação e F1-Score)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='micro')
recall = recall_score(y_test, y_pred, average='micro')
f1 = f1_score(y_test, y_pred, average='micro')

print("Acurácia:", accuracy)
print("Precisão:", precision)
print("Recall:", recall)
print("F1-Score:", f1)

Acurácia: 0.782608695652174
Precisão: 0.782608695652174
Recall: 0.782608695652174
F1-Score: 0.782608695652174


# Tuning dos hiperparâmetros

In [16]:
# Definindo o modelo base
modelo = DecisionTreeClassifier(random_state=42)

# Definindo o grid de hiperparâmetros
param_grid = {
    'criterion': ['gini', 'entropy'],
    'max_depth': [None, 5, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Criando o GridSearchCV
grid_search = GridSearchCV(estimator=modelo,
                           param_grid=param_grid,
                           cv=5,
                           scoring='accuracy')

# Ajustando o modelo aos dados
grid_search.fit(x_train, y_train)

# Exibindo os melhores parâmetros
print("Melhores hiperparâmetros encontrados:")
print(grid_search.best_params_)

# Usando o melhor modelo para prever
melhor_modelo = grid_search.best_estimator_
y_pred = melhor_modelo.predict(x_test)


# Calculando métricas de desempenho (acurácia, precisão, revocação e F1-Score)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='micro')
recall = recall_score(y_test, y_pred, average='micro')
f1 = f1_score(y_test, y_pred, average='micro')

print("Acurácia:", accuracy)
print("Precisão:", precision)
print("Recall:", recall)
print("F1-Score:", f1)

Melhores hiperparâmetros encontrados:
{'criterion': 'gini', 'max_depth': 5, 'min_samples_leaf': 4, 'min_samples_split': 2}
Acurácia: 0.8260869565217391
Precisão: 0.8260869565217391
Recall: 0.8260869565217391
F1-Score: 0.8260869565217391
