# Prática: Aplicação e Combinação de Classificadores Avançados

In [16]:
!pip install catboost



In [17]:
import warnings
warnings.simplefilter("ignore")

In [18]:

import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold, cross_val_score, GridSearchCV
from sklearn.ensemble import VotingClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier

## Carregamento de Dados (substitua com seu dataset)

In [19]:

# Substitua pelo carregamento do seu dataset
# df = pd.read_csv('seu_dataset.csv')
# X = df.drop('target', axis=1).values
# y = df['target'].values

from sklearn.datasets import load_breast_cancer
data = load_breast_cancer()
X = data.data
y = data.target

## Inicializando os classificadores

In [20]:
xgb = XGBClassifier(verbosity=0)
lgb = LGBMClassifier(verbosity=-1)
cat = CatBoostClassifier(verbose=0)

## Avaliação com Validação Cruzada Manual (10 folds)

In [21]:
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

def avaliar_modelo(modelo, X, y, nome="Modelo"):
    f1_scores = []
    for train_idx, test_idx in skf.split(X, y):
        X_train, X_test = X[train_idx], X[test_idx]
        y_train, y_test = y[train_idx], y[test_idx]
        modelo.fit(X_train, y_train)
        preds = modelo.predict(X_test)
        f1_scores.append(f1_score(y_test, preds))
    print(f"{nome}: F1-score médio = {np.mean(f1_scores):.4f}, desvio padrão = {np.std(f1_scores):.4f}")

## Avaliação dos Modelos Individuais

In [None]:
avaliar_modelo(xgb, X, y, "XGBoost")
avaliar_modelo(lgb, X, y, "LightGBM")
avaliar_modelo(cat, X, y, "CatBoost")

XGBoost: F1-score médio = 0.9734, desvio padrão = 0.0256
LightGBM: F1-score médio = 0.9700, desvio padrão = 0.0212


## VotingClassifier (Hard e Soft)

In [None]:

voting_hard = VotingClassifier(estimators=[('xgb', xgb), ('lgb', lgb), ('cat', cat)], voting='hard')
voting_soft = VotingClassifier(estimators=[('xgb', xgb), ('lgb', lgb), ('cat', cat)], voting='soft')

avaliar_modelo(voting_hard, X, y, "VotingClassifier (Hard)")
avaliar_modelo(voting_soft, X, y, "VotingClassifier (Soft)")


## StackingClassifier

In [None]:

stacking = StackingClassifier(estimators=[('xgb', xgb), ('lgb', lgb), ('cat', cat)],
                              final_estimator=LogisticRegression(), cv=5)
avaliar_modelo(stacking, X, y, "StackingClassifier")


## Ajuste de Hiperparâmetros com GridSearchCV (Exemplo para XGBoost)

In [None]:

param_grid_xgb = {
    'n_estimators': [50, 100],
    'max_depth': [3, 5],
    'learning_rate': [0.01, 0.1]
}

grid_xgb = GridSearchCV(XGBClassifier(use_label_encoder=False, eval_metric='logloss', verbosity=0),
                        param_grid_xgb, scoring='f1', cv=3, verbose=1)
grid_xgb.fit(X, y)
print("Melhores parâmetros para XGBoost:", grid_xgb.best_params_)

xgb_best = grid_xgb.best_estimator_


## Validação Cruzada Manual (10-Fold) para Modelos Otimizados

In [None]:

modelos = {
    "XGBoost Otimizado": xgb_best,
    "LightGBM": LGBMClassifier(),
    "CatBoost": CatBoostClassifier(verbose=0),
    "Voting Soft": VotingClassifier(estimators=[
        ('xgb', xgb_best), ('lgb', LGBMClassifier()), ('cat', CatBoostClassifier(verbose=0))
    ], voting='soft'),
    "Stacking": StackingClassifier(estimators=[
        ('xgb', xgb_best), ('lgb', LGBMClassifier()), ('cat', CatBoostClassifier(verbose=0))
    ], final_estimator=LogisticRegression(), cv=5)
}

for nome, modelo in modelos.items():
    f1_scores = []
    for train_idx, test_idx in skf.split(X, y):
        X_train, X_test = X[train_idx], X[test_idx]
        y_train, y_test = y[train_idx], y[test_idx]
        modelo.fit(X_train, y_train)
        preds = modelo.predict(X_test)
        f1_scores.append(f1_score(y_test, preds, average='macro'))
    print(f"{nome}: F1-score médio = {np.mean(f1_scores):.4f}, desvio padrão = {np.std(f1_scores):.4f}")


In [None]:
from sklearn.model_selection import GridSearchCV
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier

# Ajuste para XGBoost
param_grid_xgb = {
    'n_estimators': [50, 100],
    'max_depth': [3, 5],
    'learning_rate': [0.01, 0.1]
}
grid_xgb = GridSearchCV(XGBClassifier(verbosity=0),
                        param_grid_xgb, scoring='f1_macro', cv=10, verbose=1)
grid_xgb.fit(X, y)
print("Melhores parâmetros para XGBoost:", grid_xgb.best_params_)
xgb_best = grid_xgb.best_estimator_

# Ajuste para LightGBM
param_grid_lgb = {
    'n_estimators': [50, 100],
    'max_depth': [3, 5, -1],
    'learning_rate': [0.01, 0.1]
}
grid_lgb = GridSearchCV(LGBMClassifier(verbosity=-1),
                        param_grid_lgb, scoring='f1_macro', cv=10, verbose=1)
grid_lgb.fit(X, y)
print("Melhores parâmetros para LightGBM:", grid_lgb.best_params_)
lgb_best = grid_lgb.best_estimator_

# Ajuste para CatBoost
param_grid_cat = {
    'iterations': [100, 200],
    'depth': [3, 5],
    'learning_rate': [0.01, 0.1]
}
grid_cat = GridSearchCV(CatBoostClassifier(verbose=0),
                        param_grid_cat, scoring='f1_macro', cv=10, verbose=1)
grid_cat.fit(X, y)
print("Melhores parâmetros para CatBoost:", grid_cat.best_params_)
cat_best = grid_cat.best_estimator_