#### Train and Evaluate a Gradient Boosting Model on a Breast Cancer Dataset, Tune key paramters,
#### and Compare its performance with a Random Forest Model

In [7]:
from sklearn.datasets import load_breast_cancer
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, classification_report

# Load Data
cancer = load_breast_cancer()
X = cancer.data
y = cancer.target

# SPlit Dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Display Feature and Target
print(cancer.keys())
print("\n--- Features ---\n",cancer.feature_names)
print("\n--- Target ---\n",cancer.target_names)

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename', 'data_module'])

--- Features ---
 ['mean radius' 'mean texture' 'mean perimeter' 'mean area'
 'mean smoothness' 'mean compactness' 'mean concavity'
 'mean concave points' 'mean symmetry' 'mean fractal dimension'
 'radius error' 'texture error' 'perimeter error' 'area error'
 'smoothness error' 'compactness error' 'concavity error'
 'concave points error' 'symmetry error' 'fractal dimension error'
 'worst radius' 'worst texture' 'worst perimeter' 'worst area'
 'worst smoothness' 'worst compactness' 'worst concavity'
 'worst concave points' 'worst symmetry' 'worst fractal dimension']

--- Target ---
 ['malignant' 'benign']


In [8]:
# Fitting Model
model_gb = GradientBoostingClassifier(random_state=42)
model_gb.fit(X_train, y_train)

# Making Prediction
y_pred_gb = model_gb.predict(X_test)

# Evaluating Model
accuracy = accuracy_score(y_test, y_pred_gb)
cls_report = classification_report(y_test, y_pred_gb)
print(f"Gradient Boosting Accuracy: {accuracy:.2f}")
print("\n--- Classification Report ---\n", cls_report)

Gradient Boosting Accuracy: 0.96

--- Classification Report ---
               precision    recall  f1-score   support

           0       0.95      0.93      0.94        43
           1       0.96      0.97      0.97        71

    accuracy                           0.96       114
   macro avg       0.96      0.95      0.95       114
weighted avg       0.96      0.96      0.96       114



In [9]:
# Printing Parameter List 
model_gb.get_params().keys()

dict_keys(['ccp_alpha', 'criterion', 'init', 'learning_rate', 'loss', 'max_depth', 'max_features', 'max_leaf_nodes', 'min_impurity_decrease', 'min_samples_leaf', 'min_samples_split', 'min_weight_fraction_leaf', 'n_estimators', 'n_iter_no_change', 'random_state', 'subsample', 'tol', 'validation_fraction', 'verbose', 'warm_start'])

In [10]:
# Hyperparameter Tuning
tuned_param = {
    "learning_rate":[0.01, 0.05, 0.1, 1],
    "n_estimators": [50, 100, 200],
    "max_depth": [3, 5, 7]
}

In [11]:
# Perform the Grid Search
grid_search = GridSearchCV(
    estimator=GradientBoostingClassifier(random_state=42),
    param_grid = tuned_param,
    n_jobs = -1,
    cv = 5,
    scoring = 'accuracy'
)

grid_search.fit(X_train, y_train)

# Display Best Paramters
param = grid_search.best_params_
acc_scr = grid_search.best_score_
print(f"Best Paramters: {param}")
print(f"Best Cross Validation Accuracy: {acc_scr:.2f}")

Best Paramters: {'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 200}
Best Cross Validation Accuracy: 0.96


### Compare Gradient Boosting With Random Forest


In [13]:
# Train Random Forest Classifier
model_rf = RandomForestClassifier(random_state=42)
model_rf.fit(X_train, y_train)

# RF Predictions
y_pred_rf = model_rf.predict(X_test)

# Evaluate the performance
accuracy_rf = accuracy_score(y_test, y_pred_rf)
print(f"Random Forest Accuracy: {accuracy_rf:.2f}")

Random Forest Accuracy: 0.96


In [16]:
print(f"Random Forest Accuracy: {accuracy_rf:.5f}")
print(f"Gradient Boosting Accuracy: {accuracy:.5f}")
print(f"Best Cross Validation Accuracy: {acc_scr:.5f}")

Random Forest Accuracy: 0.96491
Gradient Boosting Accuracy: 0.95614
Best Cross Validation Accuracy: 0.96484
