The Breast Cancer dataset was selected for this project because it is a well-established, relatively balanced binary classification dataset, making it ideal for evaluating classification models.

In [88]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
import pandas as pd
from sklearn.model_selection import cross_val_score, StratifiedKFold

np.random.seed(42)  # For reproducibility


#Loading Dataset

In [89]:
# Loading the Breast Cancer dataset
X, y = load_breast_cancer(return_X_y=True)

# Convert labels {0,1} → {-1, +1} for AdaBoost math
y_signed = np.where(y == 0, -1, 1)

# Train/test split 80 / 20
X_train, X_test, y_train_signed, y_test_signed = train_test_split(
    X, y_signed, test_size=0.2, stratify=y_signed, random_state=42
)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Displaying data analysis results in the terminal
print("\n=== Part 1: Data Analysis and Preparation ===")
print(f"Train shape: {X_train_scaled.shape}, Test shape: {X_test_scaled.shape}\n")

# Analyzing the data distribution (Descriptive Statistics)
train_df = pd.DataFrame(X_train_scaled)
test_df = pd.DataFrame(X_test_scaled)

print("\n=== Training Set Analysis (Scaled Features) ===")
print(train_df.describe())

print("\n=== Test Set Analysis (Scaled Features) ===")
print(test_df.describe())

# Class Distribution in Train and Test Sets
unique_train, counts_train = np.unique(y_train_signed, return_counts=True)
unique_test, counts_test = np.unique(y_test_signed, return_counts=True)

print("\n=== Class Distribution in Training Set ===")
for label, count in zip(unique_train, counts_train):
    print(f"Class {label}: {count} samples")

print("\n=== Class Distribution in Test Set ===")
for label, count in zip(unique_test, counts_test):
    print(f"Class {label}: {count} samples")


=== Part 1: Data Analysis and Preparation ===
Train shape: (455, 30), Test shape: (114, 30)


=== Training Set Analysis (Scaled Features) ===
                 0             1             2             3             4   \
count  4.550000e+02  4.550000e+02  4.550000e+02  4.550000e+02  4.550000e+02   
mean  -4.337434e-15  2.240942e-15 -7.437274e-16  1.503071e-16  5.223660e-15   
std    1.001101e+00  1.001101e+00  1.001101e+00  1.001101e+00  1.001101e+00   
min   -2.027220e+00 -2.167362e+00 -1.980187e+00 -1.465734e+00 -2.503730e+00   
25%   -6.958063e-01 -7.379620e-01 -6.979907e-01 -6.758983e-01 -7.210063e-01   
50%   -2.280663e-01 -9.711800e-02 -2.312101e-01 -3.097848e-01 -3.774691e-02   
75%    4.785500e-01  5.596334e-01  5.033684e-01  3.526440e-01  6.458845e-01   
max    4.017353e+00  4.552410e+00  4.018733e+00  5.370416e+00  3.610271e+00   

                 5             6             7             8             9   \
count  4.550000e+02  4.550000e+02  4.550000e+02  4.550000e+02  4.5

#MLP Implementation

In [90]:
class MLP:
    """One‑hidden‑layer neural network with tanh hidden units and softmax output.
    Supports sample weights for AdaBoost training."""
    def __init__(self, n_in, n_hidden, n_out, lr=0.01, epochs=200):
        self.W1 = np.random.randn(n_in, n_hidden) * np.sqrt(2.0 / n_in)
        self.b1 = np.zeros((1, n_hidden))
        self.W2 = np.random.randn(n_hidden, n_out) * np.sqrt(2.0 / n_hidden)
        self.b2 = np.zeros((1, n_out))
        self.lr = lr
        self.epochs = epochs

    @staticmethod
    def _softmax(z):
        expz = np.exp(z - np.max(z, axis=1, keepdims=True))
        return expz / np.sum(expz, axis=1, keepdims=True)

    def _forward(self, X):
        z1 = X @ self.W1 + self.b1
        a1 = np.tanh(z1)
        z2 = a1 @ self.W2 + self.b2
        a2 = self._softmax(z2)
        return z1, a1, z2, a2

    def _backward(self, X, y_onehot, sample_weights, cache):
        z1, a1, _, a2 = cache
        m = X.shape[0]

        # Weighted cross‑entropy gradient
        delta2 = (a2 - y_onehot) * sample_weights[:, None] / m
        dW2 = a1.T @ delta2
        db2 = np.sum(delta2, axis=0, keepdims=True)

        delta1 = (1 - np.tanh(z1) ** 2) * (delta2 @ self.W2.T)
        dW1 = X.T @ delta1
        db1 = np.sum(delta1, axis=0, keepdims=True)

        # Parameter update (SGD on full batch)
        self.W2 -= self.lr * dW2
        self.b2 -= self.lr * db2
        self.W1 -= self.lr * dW1
        self.b1 -= self.lr * db1

    def fit(self, X, y_signed, sample_weights=None):
        if sample_weights is None:
            sample_weights = np.ones(X.shape[0]) / X.shape[0]
        # Binary classification → two output neurons (‑1 and +1)
        y_idx = (y_signed == 1).astype(int)          # map {‑1:0, +1:1}
        y_onehot = np.eye(2)[y_idx]

        for _ in range(self.epochs):
            cache = self._forward(X)
            self._backward(X, y_onehot, sample_weights, cache)

    def predict(self, X):
        _, _, _, probs = self._forward(X)
        pred_idx = np.argmax(probs, axis=1)
        return np.where(pred_idx == 1, 1, -1)


#AdaBoost using MLP weak learner

In [99]:
class AdaBoostMLP:
    """AdaBoost one‑hidden‑layer MLPs as weak learners"""
    def __init__(self, n_estimators=10, hidden_size=4, lr=0.03, epochs=30):
        self.n_estimators = n_estimators
        self.hidden_size = hidden_size
        self.lr = lr
        self.epochs = epochs
        self.alphas = []
        self.models = []

    def fit(self, X, y_signed):
        n_samples, n_features = X.shape
        # Initialize uniform distribution
        w = np.ones(n_samples) / n_samples

        for t in range(self.n_estimators):
            # Train weak learner
            model = MLP(n_features, self.hidden_size, 2, lr=self.lr, epochs=self.epochs)
            model.fit(X, y_signed, sample_weights=w)
            pred = model.predict(X)

            # Compute weighted error
            incorrect = (pred != y_signed).astype(float)
            err = np.dot(w, incorrect) / np.sum(w)

            # Guard against perfect / worse‑than‑random learners
            err = np.clip(err, 1e-10, 0.999)
            alpha = 0.5 * np.log((1 - err) / err)

            # Update sample weights
            w *= np.exp(-alpha * y_signed * pred)
            w /= np.sum(w)  # normalize

            # Save
            self.alphas.append(alpha)
            self.models.append(model)

            # print(f"Iter {t+1}/{self.n_estimators} – error: {err:.4f}, alpha: {alpha:.4f}")

    def predict(self, X):
        # Weighted majority vote
        agg = np.zeros(X.shape[0])
        for alpha, model in zip(self.alphas, self.models):
            agg += alpha * model.predict(X)
        return np.sign(agg)


#Single MLP

In [122]:
# Cross-Validation (5-Fold) Setup
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
cv_accuracies = []

# Cross-Validation Loop for Baseline MLP
for train_index, test_index in cv.split(X, y_signed):
    X_cv_train, X_cv_test = X[train_index], X[test_index]
    y_cv_train, y_cv_test = y_signed[train_index], y_signed[test_index]

    # Scaling within each fold
    scaler = StandardScaler()
    X_cv_train = scaler.fit_transform(X_cv_train)
    X_cv_test = scaler.transform(X_cv_test)

    # Baseline MLP Model Initialization
    baseline_mlp = MLP(X_cv_train.shape[1], n_hidden=10, n_out=2, lr=0.02, epochs=300)
    baseline_mlp.fit(X_cv_train, y_cv_train)

    # Prediction on CV Test Set
    y_pred_base = baseline_mlp.predict(X_cv_test)

    # Calculating and Storing CV Accuracy
    cv_accuracy = accuracy_score(y_cv_test, y_pred_base)
    cv_accuracies.append(cv_accuracy)

# Calculating Average CV Accuracy
mean_cv_accuracy = np.mean(cv_accuracies)

print("\nCross-Validation Baseline MLP (5-Fold)")
print(f"Mean CV Accuracy: {mean_cv_accuracy:.4f}")
print(f"All CV Accuracies: {cv_accuracies}")



Cross-Validation Baseline MLP (5-Fold)
Mean CV Accuracy: 0.4920
All CV Accuracies: [0.45614035087719296, 0.4824561403508772, 0.7105263157894737, 0.39473684210526316, 0.415929203539823]


#AdaBoost with MLP weak learners

In [123]:
# Cross-Validation Setup
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
cv_accuracies = []
all_y_true = []
all_y_pred = []

# Cross-Validation Loop
for train_index, test_index in cv.split(X, y_signed):
    X_cv_train, X_cv_test = X[train_index], X[test_index]
    y_cv_train, y_cv_test = y_signed[train_index], y_signed[test_index]

    # Scaling within each fold
    scaler = StandardScaler()
    X_cv_train = scaler.fit_transform(X_cv_train)
    X_cv_test = scaler.transform(X_cv_test)

    # AdaBoost with MLP Weak Learners
    boost = AdaBoostMLP(n_estimators=10, hidden_size=6, lr=0.02, epochs=150)
    boost.fit(X_cv_train, y_cv_train)

    # Predict on CV Test Set
    y_pred_boost = boost.predict(X_cv_test)
    cv_accuracy = accuracy_score(y_cv_test, y_pred_boost)
    cv_accuracies.append(cv_accuracy)

    # Storing all true and predicted values for full evaluation
    all_y_true.extend(y_cv_test)
    all_y_pred.extend(y_pred_boost)

# Calculating Average CV Accuracy
mean_cv_accuracy = np.mean(cv_accuracies)
print(f"\nCross-Validation AdaBoost-MLP Ensemble (5-Fold)")
print(f"Mean CV Accuracy: {mean_cv_accuracy:.4f}")
print(classification_report((np.array(all_y_true) == 1).astype(int),
                            (np.array(all_y_pred) == 1).astype(int), zero_division=0))


Cross-Validation AdaBoost-MLP Ensemble (5-Fold)
Mean CV Accuracy: 0.9174
              precision    recall  f1-score   support

           0       0.88      0.90      0.89       212
           1       0.94      0.93      0.93       357

    accuracy                           0.92       569
   macro avg       0.91      0.91      0.91       569
weighted avg       0.92      0.92      0.92       569



#Conclusion

Initially, a single MLP model demonstrated low performance with limited generalization capabilities. However, with the introduction of AdaBoost, the model's efficiency improved significantly. The AdaBoost-MLP ensemble, trained through 5-fold cross-validation, achieved an average accuracy of 91.74%. The model exhibited balanced performance, with class 0 achieving 88% precision and 90% recall, while class 1 reached 94% precision and 93% recall. These results demonstrate that the AdaBoost ensemble effectively transformed weak MLP learners into a strong, robust classifier by adaptively focusing on misclassified samples. The model maintained high recall for class 1, highlighting its ability to correctly identify positive cases while maintaining competitive precision for both classes. The consistently high cross-validation accuracy further confirms the model's stability and strong generalization capability.

#Perceptron‑node Random Forest

In [94]:
class PerceptronNode:
    """Internal decision node that learns a hyperplane to split the data."""
    def __init__(self, n_features, lr=0.05, epochs=30):
        self.w = np.random.randn(n_features)
        self.b = 0.
        self.lr = lr
        self.epochs = epochs

    def _train(self, X, y):
        # Simple perceptron learning rule (PLA) for labels {-1,+1}
        for _ in range(self.epochs):
            for xi, yi in zip(X, y):
                if yi * (np.dot(xi, self.w) + self.b) <= 0:
                    self.w += self.lr * yi * xi
                    self.b += self.lr * yi

    def predict(self, X):
        return np.sign(X @ self.w + self.b)


In [95]:

import math
from collections import Counter

class DecisionTreePerceptron:
    """Binary decision tree whose internal splits are tiny perceptrons."""
    def __init__(self, max_depth=5, min_samples_leaf=5, max_features=None,
                 lr=0.05, epochs=25, rng=None):
        self.max_depth = max_depth
        self.min_samples_leaf = min_samples_leaf
        self.max_features = max_features
        self.lr = lr
        self.epochs = epochs
        self.rng = np.random.default_rng(rng)
        self.root = None

    class _Node:
        __slots__ = ('perceptron', 'left', 'right', 'is_leaf', 'pred')
        def __init__(self):
            self.perceptron = None
            self.left = None
            self.right = None
            self.is_leaf = False
            self.pred = None

    def _build(self, X, y, depth):
        node = self._Node()

        # Stopping conditions
        if depth >= self.max_depth or len(X) <= self.min_samples_leaf or len(np.unique(y)) == 1:
            node.is_leaf = True
            node.pred = 1 if np.sum(y == 1) >= np.sum(y == -1) else -1
            return node

        n_features_total = X.shape[1]
        max_features = self.max_features or int(math.sqrt(n_features_total))
        feat_idx = self.rng.choice(n_features_total, size=max_features, replace=False)

        # Train perceptron only on selected features
        p = PerceptronNode(n_features=max_features, lr=self.lr, epochs=self.epochs)
        p._train(X[:, feat_idx], y)

        # Split
        pred = p.predict(X[:, feat_idx])
        left_mask = pred == -1
        right_mask = ~left_mask

        if np.all(left_mask) or np.all(right_mask):  # failed split, make leaf
            node.is_leaf = True
            node.pred = 1 if np.sum(y == 1) >= np.sum(y == -1) else -1
            return node

        node.perceptron = (p, feat_idx)
        node.left = self._build(X[left_mask], y[left_mask], depth+1)
        node.right = self._build(X[right_mask], y[right_mask], depth+1)
        return node

    def fit(self, X, y):
        self.root = self._build(X, y, depth=0)

    def _predict_one(self, x, node):
        while not node.is_leaf:
            p, feat_idx = node.perceptron
            side = p.predict(x[feat_idx][None, :])[0]
            node = node.left if side == -1 else node.right
        return node.pred

    def predict(self, X):
        return np.array([self._predict_one(x, self.root) for x in X])


In [115]:

class RandomForestPerceptron:
    """Ensemble of DecisionTreePerceptron built via bagging."""
    def __init__(self, n_estimators=25, max_depth=5, min_samples_leaf=5,
                 max_features=None, lr=0.05, epochs=25, rng=None):
        self.n_estimators = n_estimators
        self.trees = []
        self.max_depth = max_depth
        self.min_samples_leaf = min_samples_leaf
        self.max_features = max_features
        self.lr = lr
        self.epochs = epochs
        self.rng = np.random.default_rng(rng)

    def fit(self, X, y):
        n_samples = X.shape[0]
        for i in range(self.n_estimators):
            # Bootstrap sample
            indices = self.rng.choice(n_samples, size=n_samples, replace=True)
            X_boot, y_boot = X[indices], y[indices]
            tree = DecisionTreePerceptron(max_depth=self.max_depth,
                                          min_samples_leaf=self.min_samples_leaf,
                                          max_features=self.max_features,
                                          lr=self.lr, epochs=self.epochs,
                                          rng=self.rng.integers(1e9))
            tree.fit(X_boot, y_boot)
            self.trees.append(tree)
            # print(f"Tree {i+1}/{self.n_estimators} trained.")

    def predict(self, X):
        votes = np.zeros(X.shape[0])
        for tree in self.trees:
            votes += tree.predict(X)
        return np.sign(votes)


In [124]:
# Cross-Validation (5-Fold) Setup
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
cv_accuracies = []
all_y_true = []
all_y_pred = []

# Cross-Validation Loop for Perceptron Random Forest
for train_index, test_index in cv.split(X, y_signed):
    X_cv_train, X_cv_test = X[train_index], X[test_index]
    y_cv_train, y_cv_test = y_signed[train_index], y_signed[test_index]

    # Scaling within each fold
    scaler = StandardScaler()
    X_cv_train = scaler.fit_transform(X_cv_train)
    X_cv_test = scaler.transform(X_cv_test)

    # Perceptron Random Forest Initialization
    forest = RandomForestPerceptron(
        n_estimators=25,
        max_depth=6,
        min_samples_leaf=5,
        max_features=int(np.sqrt(X_train.shape[1])),
        lr=0.05,
        epochs=20,
        rng=42
    )

    # Training the Random Forest on CV Training Data
    forest.fit(X_cv_train, y_cv_train)

    # Predicting on CV Test Set
    y_pred_forest = forest.predict(X_cv_test)

    # Calculating and Storing CV Accuracy
    cv_accuracy = accuracy_score(y_cv_test, y_pred_forest)
    cv_accuracies.append(cv_accuracy)

    # Collecting all predictions for full classification report
    all_y_true.extend(y_cv_test)
    all_y_pred.extend(y_pred_forest)

# Calculating Average CV Accuracy
mean_cv_accuracy = np.mean(cv_accuracies)

print("\nCross-Validation Perceptron Random Forest (5-Fold)")
print(f"Mean CV Accuracy: {mean_cv_accuracy:.4f}")
print(f"All CV Accuracies: {cv_accuracies}")
print(classification_report((np.array(all_y_true) == 1).astype(int),
                            (np.array(all_y_pred) == 1).astype(int), zero_division=0))


Cross-Validation Perceptron Random Forest (5-Fold)
Mean CV Accuracy: 0.9631
All CV Accuracies: [1.0, 0.9385964912280702, 0.9473684210526315, 0.956140350877193, 0.9734513274336283]
              precision    recall  f1-score   support

           0       0.98      0.92      0.95       212
           1       0.96      0.99      0.97       357

    accuracy                           0.96       569
   macro avg       0.97      0.96      0.96       569
weighted avg       0.96      0.96      0.96       569



#Conclusion

The Perceptron Random Forest model was implemented. In this model, each decision tree within the forest uses perceptrons at its internal nodes to make splitting decisions. Unlike AdaBoost, which focuses on sequentially improving weak learners, the Random Forest approach relies on bagging. Each decision tree (which internally uses perceptrons for splits) is trained independently on a bootstrap sample of the training data. The perceptron at each node of a tree is trained on the subset of data reaching that node and a random subset of features. This promotes model diversity and helps in minimizing errors when the predictions of all trees are aggregated. The resulting Perceptron Random Forest model demonstrated outstanding performance, achieving a cross-validated accuracy of 96.31%. The model maintained high precision and recall for both classes, achieving 98% precision and 92% recall for class 0, and 96% precision and 99% recall for class 1. This balanced performance indicates the model’s ability to correctly classify both classes with minimal errors. These results highlight the robustness of Random Forest in reducing variance through ensemble learning, making it less prone to overfitting compared to AdaBoost. The model’s high accuracy across all cross-validation folds further emphasizes its stability and generalization capability.