In [24]:
import os
import numpy as np
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, roc_auc_score
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier

In [25]:
previsores_file = open(f'{os.getcwd()}\\Tratamento e Pre-Processamento\\Dados\\heart_previsores.pkl', 'rb')
previsores = pickle.load(previsores_file)
previsores_file.close()


alvo_file = open(f'{os.getcwd()}\\Tratamento e Pre-Processamento\\Dados\\heart_alvo.pkl', 'rb')
alvo = pickle.load(alvo_file)
alvo_file.close()

In [26]:
previsores_treino, previsores_teste, alvo_treino, alvo_teste = train_test_split(previsores, alvo, test_size = 0.3, random_state = 0)


In [27]:
gbm = GradientBoostingClassifier(random_state = 0)

In [28]:
param_grid = dict(n_estimators = [20, 50, 150, 250],
                  learning_rate = [0.05, 0.1, 0.5],
                  max_depth = [1, 2, 3, 4, 5])

In [29]:
grid_search = GridSearchCV(gbm, param_grid, scoring='roc_auc', cv = 4)

In [30]:
grid_search.fit(previsores_treino, alvo_treino)

GridSearchCV(cv=4, estimator=GradientBoostingClassifier(random_state=0),
             param_grid={'learning_rate': [0.05, 0.1, 0.5],
                         'max_depth': [1, 2, 3, 4, 5],
                         'n_estimators': [20, 50, 150, 250]},
             scoring='roc_auc')

In [31]:
grid_search.best_params_

{'learning_rate': 0.05, 'max_depth': 1, 'n_estimators': 150}

In [32]:
resultado = pd.DataFrame(grid_search.cv_results_)
resultado.sort_values(by='mean_test_score', ascending = False, inplace = True)
resultado.reset_index(drop = True, inplace = True)


In [33]:
resultado[['param_max_depth', 'param_learning_rate', 'param_n_estimators', 'mean_test_score', 'std_test_score']].head()

Unnamed: 0,param_max_depth,param_learning_rate,param_n_estimators,mean_test_score,std_test_score
0,1,0.05,150,0.921389,0.027952
1,2,0.05,50,0.920428,0.026282
2,1,0.05,250,0.919132,0.024735
3,1,0.1,50,0.918464,0.028898
4,1,0.1,150,0.91842,0.025333


In [34]:
gbm = GradientBoostingClassifier(random_state = 0, learning_rate=0.05, n_estimators = 150, max_depth = 2)

In [35]:
gbm.fit(previsores_treino, alvo_treino)

GradientBoostingClassifier(learning_rate=0.05, max_depth=2, n_estimators=150,
                           random_state=0)

In [36]:
previsoes_gbm = gbm.predict(previsores_teste)


In [37]:
print('Acurácia: %.2f%%' % (accuracy_score(alvo_teste, previsoes_gbm) * 100))

Acurácia: 87.68%


In [38]:
confusion_matrix(alvo_teste, previsoes_gbm)

array([[105,  16],
       [ 18, 137]], dtype=int64)

In [39]:
print(classification_report(alvo_teste, previsoes_gbm))

              precision    recall  f1-score   support

           0       0.85      0.87      0.86       121
           1       0.90      0.88      0.89       155

    accuracy                           0.88       276
   macro avg       0.87      0.88      0.88       276
weighted avg       0.88      0.88      0.88       276



In [40]:
previsoes_treino = gbm.predict(previsores_treino)

In [41]:
accuracy_score(alvo_treino, previsoes_treino)

0.9141965678627145

In [42]:
confusion_matrix(alvo_treino, previsoes_treino)

array([[254,  35],
       [ 20, 332]], dtype=int64)

In [43]:
#Validação Cruzada

In [44]:
kfold = KFold(n_splits = 30, shuffle = True, random_state = 5)

In [45]:
modelo = GradientBoostingClassifier(random_state = 0, learning_rate=0.05, n_estimators = 150, max_depth = 2)
resultado = cross_val_score(modelo, previsores, alvo, cv = kfold)

In [46]:
print('Acurácia Média: %.2f%%' % (resultado.mean() * 100))

Acurácia Média: 87.02%
