In [1]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier # Import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier # AdaBoost often uses Decision Stumps as base learners
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [2]:
# Load the Dataset

print("Loading the Breast Cancer dataset...")
data = load_breast_cancer()
X = data.data
y = data.target
print(f"Dataset loaded. Number of samples: {X.shape[0]}, Number of features: {X.shape[1]}")
print(f"Target classes: {data.target_names}")

Loading the Breast Cancer dataset...
Dataset loaded. Number of samples: 569, Number of features: 30
Target classes: ['malignant' 'benign']


In [3]:
# Split the Data into Training and Testing Sets

print("\nSplitting data into training and testing sets...")
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
print(f"Training samples: {X_train.shape[0]}, Testing samples: {X_test.shape[0]}")



Splitting data into training and testing sets...
Training samples: 398, Testing samples: 171


In [5]:
# Define the Base Estimator (Weak Learner)
print("\nDefining the base estimator (Decision Stump)...")
base_estimator = DecisionTreeClassifier(max_depth=1, random_state=42)

# Create the AdaBoost Classifier

print("\nCreating the AdaBoost Classifier...")
adaboost_model = AdaBoostClassifier(
    estimator=base_estimator,
    n_estimators=100, # Number of boosting stages
    learning_rate=1.0, # Contribution of each classifier
    random_state=42
)


Defining the base estimator (Decision Stump)...

Creating the AdaBoost Classifier...


In [6]:
# Train the AdaBoost Model
print("Training the AdaBoost Classifier...")
adaboost_model.fit(X_train, y_train)
print("AdaBoost model training complete.")


Training the AdaBoost Classifier...
AdaBoost model training complete.


In [7]:
# Make Predictions with the AdaBoost Model
print("\nMaking predictions with the AdaBoost Classifier on the test set...")
y_pred_adaboost = adaboost_model.predict(X_test)


Making predictions with the AdaBoost Classifier on the test set...


In [8]:
y_pred_adaboost

array([1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1,
       0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1,
       0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0,
       1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1,
       0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0,
       1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1,
       1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1])

In [9]:
# Evaluate the AdaBoost Model Performance

# Accuracy Score
accuracy_adaboost = accuracy_score(y_test, y_pred_adaboost)
print(f"\nAdaBoost Model Accuracy: {accuracy_adaboost:.4f}")

# Confusion Matrix
cm_adaboost = confusion_matrix(y_test, y_pred_adaboost)
print("\nAdaBoost Model Confusion Matrix:")
print(cm_adaboost)

# Extracting Type 1 and Type 2 Errors
# Assuming class 0 = malignant (negative), class 1 = benign (positive)
type_1_error_adaboost = cm_adaboost[0, 1]
print(f"\nAdaBoost Model Type 1 Error (False Positives): {type_1_error_adaboost}")
type_2_error_adaboost = cm_adaboost[1, 0]
print(f"AdaBoost Model Type 2 Error (False Negatives): {type_2_error_adaboost}")


AdaBoost Model Accuracy: 0.9708

AdaBoost Model Confusion Matrix:
[[ 62   1]
 [  4 104]]

AdaBoost Model Type 1 Error (False Positives): 1
AdaBoost Model Type 2 Error (False Negatives): 4


In [10]:
# Precision, Recall, and F1-Score
print("\nAdaBoost Model Classification Report (Precision, Recall, F1-Score):")
print(classification_report(y_test, y_pred_adaboost, target_names=data.target_names))



AdaBoost Model Classification Report (Precision, Recall, F1-Score):
              precision    recall  f1-score   support

   malignant       0.94      0.98      0.96        63
      benign       0.99      0.96      0.98       108

    accuracy                           0.97       171
   macro avg       0.96      0.97      0.97       171
weighted avg       0.97      0.97      0.97       171

