In [4]:
import pandas as pd
from sklearn.datasets import load_wine
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# Load dataset
data = load_wine()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Base estimator: simple decision tree stump (depth=1)
base_estimator = DecisionTreeClassifier(max_depth=1)

# AdaBoost model
model = AdaBoostClassifier(estimator=base_estimator, n_estimators=50, learning_rate=1.0, random_state=42)
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Evaluation
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nAccuracy Score:", accuracy_score(y_test, y_pred))


Confusion Matrix:
 [[14  0  0]
 [ 1 13  0]
 [ 0  1  7]]

Classification Report:
               precision    recall  f1-score   support

           0       0.93      1.00      0.97        14
           1       0.93      0.93      0.93        14
           2       1.00      0.88      0.93         8

    accuracy                           0.94        36
   macro avg       0.95      0.93      0.94        36
weighted avg       0.95      0.94      0.94        36


Accuracy Score: 0.9444444444444444


In [2]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Decision stump implementation
class DecisionStump:
    def __init__(self):
        self.feature_index = None
        self.threshold = None
        self.polarity = 1
        self.alpha = None

    def predict(self, X):
        n_samples = X.shape[0]
        X_column = X[:, self.feature_index]
        predictions = np.ones(n_samples)
        if self.polarity == 1:
            predictions[X_column < self.threshold] = -1
        else:
            predictions[X_column > self.threshold] = -1
        return predictions

def adaboost_train(X, y, n_clf=10):
    n_samples, n_features = X.shape

    # Initialize weights uniformly
    w = np.full(n_samples, (1 / n_samples))

    classifiers = []

    for _ in range(n_clf):
        clf = DecisionStump()
        min_error = float('inf')

        # Find best decision stump
        for feature_i in range(n_features):
            X_column = X[:, feature_i]
            thresholds = np.unique(X_column)
            for threshold in thresholds:
                for polarity in [1, -1]:
                    predictions = np.ones(n_samples)
                    if polarity == 1:
                        predictions[X_column < threshold] = -1
                    else:
                        predictions[X_column > threshold] = -1

                    # Calculate weighted error
                    misclassified = w[y != predictions]
                    error = sum(misclassified)

                    if error < min_error:
                        min_error = error
                        clf.polarity = polarity
                        clf.threshold = threshold
                        clf.feature_index = feature_i

        # Compute alpha
        EPS = 1e-10  # to avoid division by zero
        clf.alpha = 0.5 * np.log((1.0 - min_error) / (min_error + EPS))

        # Predict with the best stump
        predictions = clf.predict(X)

        # Update weights
        w *= np.exp(-clf.alpha * y * predictions)
        w /= np.sum(w)  # normalize

        classifiers.append(clf)

    return classifiers

def adaboost_predict(X, classifiers):
    clf_preds = [clf.alpha * clf.predict(X) for clf in classifiers]
    y_pred = np.sum(clf_preds, axis=0)
    return np.sign(y_pred)

# === Main ===

# Load dataset
data = load_breast_cancer()
X = data.data
y = data.target

# Convert labels to -1 and 1 for AdaBoost
y = np.where(y == 0, -1, 1)

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train AdaBoost
classifiers = adaboost_train(X_train, y_train, n_clf=10)

# Predict test data
y_pred = adaboost_predict(X_test, classifiers)

# Evaluation
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nAccuracy Score:", accuracy_score(y_test, y_pred))


Confusion Matrix:
 [[40  3]
 [ 2 69]]

Classification Report:
               precision    recall  f1-score   support

          -1       0.95      0.93      0.94        43
           1       0.96      0.97      0.97        71

    accuracy                           0.96       114
   macro avg       0.96      0.95      0.95       114
weighted avg       0.96      0.96      0.96       114


Accuracy Score: 0.956140350877193
