In [1]:
import pandas as pd
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, recall_score, f1_score, precision_score
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
import joblib
from pathlib import Path
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline

### Carregando os dados limpos e normalizados

In [2]:
# Carregando os dados
data = pd.read_csv('../data/processed/cardiovascular_processed_scaled.csv', sep=';')
X = data.drop('target', axis=1)
y = data['target']

scaler = joblib.load('../data/processed/scaler.pkl')

param_grid = {
    'Logistic Regression': {
        'model': LogisticRegression(),
        'params': {
            'C': [0.01, 0.1, 1, 10, 100],
            'solver': ['liblinear', 'saga']
        }
    },
    'Random Forest': {
        'model': RandomForestClassifier(),
        'params': {
            'n_estimators': [50, 100, 200],
            'max_depth': [None, 10, 20, 30]
        }
    },
    'SVC': {
        'model': SVC(),
        'params': {
            'C': [0.1, 1, 10],
            'kernel': ['linear', 'rbf']
        }
    },
    'XGBoost': {
        'model': XGBClassifier(),
        'params': {
            'n_estimators': [50, 100, 200],
            'learning_rate': [0.01, 0.1, 0.2],
            'max_depth': [3, 5, 7]
        }
    }
}

In [None]:
results = {}

for model_name, config in param_grid.items():
    print(f'\nTreinando {model_name}...')

    pipe = Pipeline([
        ('scaler', scaler),
        ('model', config['model'])
    ])

    # Prefixo model__ é OBRIGATÓRIO
    params = {
        f'model__{k}': v for k, v in config['params'].items()
    }

    scorings = {
        'acuracia': 'accuracy',
        'recall': 'recall',
        'f1_score': 'f1',
        'precision': 'precision'
    }

    grid = GridSearchCV(
        estimator=pipe,
        param_grid=params,
        cv=5,
        scoring=scorings,
        refit='recall',
        n_jobs=-1,
        verbose=1
    )

    grid.fit(X, y)

    results[model_name] = grid


    print(f'Best Recall: {grid.best_score_:.4f}')


Treinando Logistic Regression...
Fitting 5 folds for each of 10 candidates, totalling 50 fits




Best Recall: 0.9753

Treinando Random Forest...
Fitting 5 folds for each of 12 candidates, totalling 60 fits
Best Recall: 0.9829

Treinando SVC...
Fitting 5 folds for each of 6 candidates, totalling 30 fits
Best Recall: 0.9753

Treinando XGBoost...
Fitting 5 folds for each of 27 candidates, totalling 135 fits


In [None]:
for model_name, grid in results.items():
    print(f'\n===== {model_name} =====')

    idx = grid.best_index_

    print('Melhores parâmetros:')
    for k, v in grid.best_params_.items():
        print(f'  {k}: {v}')

    print('\nScores médios (CV):')
    print(f"  Accuracy : {grid.cv_results_['mean_test_acuracia'][idx]:.4f}")
    print(f"  Recall   : {grid.cv_results_['mean_test_recall'][idx]:.4f}")
    print(f"  Precision: {grid.cv_results_['mean_test_precision'][idx]:.4f}")
    print(f"  F1-score : {grid.cv_results_['mean_test_f1_score'][idx]:.4f}")



Logistic Regression


KeyError: 'scores'