In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix

# Load the dataset
data = load_breast_cancer()
X = data.data
y = data.target

In [None]:
# Split dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



In [None]:

# Define the Gradient Boosting classifier
gb = GradientBoostingClassifier()

In [None]:
# Define hyperparameters grid for GridSearchCV
param_grid = {
    'n_estimators': [50, 100, 150],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 5, 7],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

In [None]:
# Create GridSearchCV object
grid_search_gb = GridSearchCV(estimator=gb, param_grid=param_grid, cv=5, n_jobs=-1
)


In [64]:
# Fit to the data
grid_search_gb.fit(X_train, y_train)

In [65]:
# Print best parameters
print("Best Parameters:", grid_search_gb.best_params_)

Best Parameters: {'learning_rate': 0.1, 'max_depth': 3, 'min_samples_leaf': 4, 'min_samples_split': 5, 'n_estimators': 100}


In [66]:
# Print best estimator
best_gb_classifier_grid = grid_search_gb.best_estimator_
print("Best Estimator:", best_gb_classifier_grid)

Best Estimator: GradientBoostingClassifier(min_samples_leaf=4, min_samples_split=5)


In [67]:
# Perform k-fold cross-validation
cv_scores_gb_grid = cross_val_score(best_gb_classifier_grid, X_train, y_train, cv=5)
print("Cross-validation scores:", cv_scores_gb_grid)
print("Average cross-validation score:", np.mean(cv_scores_gb_grid))


Cross-validation scores: [0.95604396 0.95604396 0.98901099 0.97802198 0.95604396]
Average cross-validation score: 0.9670329670329672


In [68]:
# Evaluate on the test set
y_pred_gb_grid = best_gb_classifier_grid.predict(X_test)
test_accuracy_gb_grid = accuracy_score(y_test, y_pred_gb_grid)
print("Test Accuracy:", test_accuracy_gb_grid)

# Calculate confusion matrix
conf_matrix_gb_grid = confusion_matrix(y_test, y_pred_gb_grid)
tn, fp, fn, tp = conf_matrix_gb_grid.ravel()

# Calculate sensitivity (recall) and specificity
sensitivity_gb_grid = tp / (tp + fn)
specificity_gb_grid = tn / (tn + fp)

print("Sensitivity (Recall):", sensitivity_gb_grid)
print("Specificity:", specificity_gb_grid)


Test Accuracy: 0.956140350877193
Sensitivity (Recall): 0.971830985915493
Specificity: 0.9302325581395349
