In [28]:
from sklearn import datasets
from sklearn.model_selection import cross_val_score, GridSearchCV, train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn import datasets

In [29]:
#Load dataset
cancer = datasets.load_breast_cancer()

In [30]:
print("Features: ", cancer.feature_names)
# label type of cancer('malignant' 'benign')
print("Labels: ", cancer.target_names)

Features:  ['mean radius' 'mean texture' 'mean perimeter' 'mean area'
 'mean smoothness' 'mean compactness' 'mean concavity'
 'mean concave points' 'mean symmetry' 'mean fractal dimension'
 'radius error' 'texture error' 'perimeter error' 'area error'
 'smoothness error' 'compactness error' 'concavity error'
 'concave points error' 'symmetry error' 'fractal dimension error'
 'worst radius' 'worst texture' 'worst perimeter' 'worst area'
 'worst smoothness' 'worst compactness' 'worst concavity'
 'worst concave points' 'worst symmetry' 'worst fractal dimension']
Labels:  ['malignant' 'benign']


In [31]:
X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, test_size=0.2, random_state=42)

In [32]:
# Gradient Boosting Classifier
gb_classifier = GradientBoostingClassifier()

In [33]:
# Define the parameter grid for Gradient Boosting
para_grid_gb = {'n_estimators': [50, 100, 150],
                 'learning_rate': [0.01, 0.1, 1.0],
                 'max_depth': [3, 5, None]}

In [34]:
gridsearch_gb = GridSearchCV(gb_classifier, para_grid_gb, cv=5)

# Perform grid search for Gradient Boosting
gridsearch_gb.fit(X_train, y_train)

# Get the best Gradient Boosting model
best_gb_classifier = gridsearch_gb.best_estimator_

In [35]:
# k-fold cross-validation with best Gradient Boosting model
cv_scores = cross_val_score(best_gb_classifier, X_train, y_train, cv=5)
print("Gradient Boosting - Cross-validation scores: ", cv_scores)
print("Gradient Boosting - Average cross-validation score: {:.2f}".format(cv_scores.mean()))


Gradient Boosting - Cross-validation scores:  [0.97802198 0.94505495 0.97802198 0.97802198 0.92307692]
Gradient Boosting - Average cross-validation score: 0.96


In [36]:
print("Best parameters:", gridsearch_gb.best_params_)

Best parameters: {'learning_rate': 1.0, 'max_depth': 3, 'n_estimators': 50}


In [37]:
# Predict on the test set using the best Gradient Boosting model
y_pred_gb_best = best_gb_classifier.predict(X_test)

In [38]:
tn, fp, fn, tp = confusion_matrix(y_test, y_pred_gb_best).ravel()

In [39]:
Sensitivity = tp / (tp + fn)
Specificity = tn / (tn + fp)

In [40]:
# Evaluating model's performance on the test set
test_accuracy_gb_best = accuracy_score(y_test, y_pred_gb_best)
print("Best Gradient Boosting model test set accuracy: {:.2f}".format(test_accuracy_gb_best))
print("\nSensitivity :", Sensitivity)
print("Specificity:", Specificity)
print("\nClassification Report for Gradient Boosting:")
print(classification_report(y_test, y_pred_gb_best))

Best Gradient Boosting model test set accuracy: 0.96

Sensitivity : 0.971830985915493
Specificity: 0.9302325581395349

Classification Report for Gradient Boosting:
              precision    recall  f1-score   support

           0       0.95      0.93      0.94        43
           1       0.96      0.97      0.97        71

    accuracy                           0.96       114
   macro avg       0.96      0.95      0.95       114
weighted avg       0.96      0.96      0.96       114

