<a href="https://colab.research.google.com/github/JoseBatista2010/ML2023-Jose-Batista/blob/master/bagging_and_boost.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, recall_score, f1_score, precision_score, classification_report

# Carregar o dataset
data = load_breast_cancer()
X = data.data
y = data.target

# Dividir os dados em conjunto de treino e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Realizar o escalonamento das features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Definir as combinações de hiperparâmetros para RandomForest e Ada Boosting
param_grid_rf = {
    'n_estimators': [50, 100, 200],
    'max_depth': [3, 5, 7]
}

param_grid_ada = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.1, 0.05, 0.01, 0.5]
}

# Grid search com 5-fold cross-validation para RandomForest
rf_model = RandomForestClassifier(random_state=42)
rf_grid_search = GridSearchCV(rf_model, param_grid_rf, cv=5)
rf_grid_search.fit(X_train_scaled, y_train)

# Grid search com 5-fold cross-validation para Ada Boosting
ada_model = AdaBoostClassifier(base_estimator=DecisionTreeClassifier(), random_state=42)
ada_grid_search = GridSearchCV(ada_model, param_grid_ada, cv=5)
ada_grid_search.fit(X_train_scaled, y_train)

# Encontrar a melhor combinação de hiperparâmetros para RandomForest e Ada Boosting
best_rf_model = rf_grid_search.best_estimator_
best_ada_model = ada_grid_search.best_estimator_

# Retreinar o modelo com a melhor combinação de hiperparâmetros usando o conjunto de treino completo
best_rf_model.fit(X_train_scaled, y_train)
best_ada_model.fit(X_train_scaled, y_train)

# Avaliar as métricas de desempenho
y_pred_rf = best_rf_model.predict(X_test_scaled)
y_pred_ada = best_ada_model.predict(X_test_scaled)

accuracy_rf = accuracy_score(y_test, y_pred_rf)
accuracy_ada = accuracy_score(y_test, y_pred_ada)

recall_rf = recall_score(y_test, y_pred_rf, average=None)
recall_ada = recall_score(y_test, y_pred_ada, average=None)

f1_score_rf = f1_score(y_test, y_pred_rf, average=None)
f1_score_ada = f1_score(y_test, y_pred_ada, average=None)

precision_rf = precision_score(y_test, y_pred_rf, average=None)
precision_ada = precision_score(y_test, y_pred_ada, average=None)

classification_report_rf = classification_report(y_test, y_pred_rf)
classification_report_ada = classification_report(y_test, y_pred_ada)

# Mostrar a importância das features
feature_importances_rf = best_rf_model.feature_importances_
feature_importances_ada = best_ada_model.feature_importances_

print("Random Forest:")
print("Acurácia:", accuracy_rf)
print("Recall:", recall_rf)
print("F1-score:", f1_score_rf)
print("Precision:", precision_rf)
print("Classification Report:\n", classification_report_rf)

print("Ada Boosting:")
print("Acurácia:", accuracy_ada)
print("Recall:", recall_ada)
print("F1-score:", f1_score_ada)
print("Precision:", precision_ada)
print("Classification Report:\n", classification_report_ada)

print("Importância das Features - Random Forest:")
