# Gradient Boosting Classifier on Breast Cancer Dataset
This notebook demonstrates training and evaluating a Gradient Boosting Classifier using the Breast Cancer Wisconsin dataset from scikit-learn.


In [None]:

# 1. Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt

# Set a random state for reproducibility
RANDOM_STATE = 42


In [None]:

# 2. Load the dataset
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)

print("Dataset loaded successfully.")
print("Features shape:", X.shape)
print("Target distribution:", y.value_counts())


In [None]:

# 3. Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=RANDOM_STATE)


In [None]:

# 4. Preprocess with scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [None]:

# 5. Train Gradient Boosting Classifier
model = GradientBoostingClassifier(random_state=RANDOM_STATE)
model.fit(X_train_scaled, y_train)

y_pred = model.predict(X_test_scaled)


In [None]:

# 6. Evaluate performance
acc = accuracy_score(y_test, y_pred)
print("Accuracy:", acc)

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

cm = confusion_matrix(y_test, y_pred)

# Visualization
plt.figure(figsize=(5,5))
plt.imshow(cm, cmap='Blues')
plt.title("Confusion Matrix")
plt.colorbar()
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.show()


## References and Sources
- Breast Cancer Wisconsin dataset: https://scikit-learn.org/
- Gradient Boosting explanation: Friedman, J. (2001). Greedy Function Approximation.
