In [None]:
# ADABOOST 


from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
X, y = make_classification(n_samples=1000, n_features=10, n_informative=5, n_redundant=0, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# New Section

In [None]:
base_estimator = DecisionTreeClassifier(max_depth=1, random_state=42)
adaboost = AdaBoostClassifier(base_estimator=base_estimator, n_estimators=50, random_state=42)
adaboost.fit(X_train, y_train)
y_pred = adaboost.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: %.2f%%" % (accuracy * 100.0))


Accuracy: 81.00%




In [None]:
from sklearn.metrics._plot.confusion_matrix import confusion_matrix
cm=confusion_matrix(y_test,y_pred)
cm

array([[89, 15],
       [23, 73]])

without libraries


In [None]:
import numpy as np
from typing import List


class DecisionStump:
    def __init__(self):
        self.polarity = 1
        self.feature_index = None
        self.threshold = None
        self.alpha = None


class AdaBoost:
    def __init__(self, num_estimators):
        self.num_estimators = num_estimators
        self.estimators = []
    
    def fit(self, X, y):
        n_samples, n_features = X.shape
        
        # Initialize weights to 1/N
        weights = np.full(n_samples, 1 / n_samples)
        
        for _ in range(self.num_estimators):
            # Train a decision stump on the weighted dataset
            stump = DecisionStump()
            min_error = float('inf')
            for feature_idx in range(n_features):
                feature_values = np.expand_dims(X[:, feature_idx], axis=1)
                unique_values = np.unique(feature_values)
                for threshold in unique_values:
                    # Try all thresholds for this feature
                    p = 1
                    prediction = np.ones_like(y)
                    prediction[X[:, feature_idx] < threshold] = -1
                    error = sum(weights[y != prediction])
                    if error > 0.5:
                        error = 1 - error
                        p = -1
                    
                    # Keep track of the best decision stump so far
                    if error < min_error:
                        stump.polarity = p
                        stump.threshold = threshold
                        stump.feature_index = feature_idx
                        min_error = error
            
            # Calculate the alpha value for the decision stump
            eps = 1e-10
            stump.alpha = 0.5 * np.log((1.0 - min_error + eps) / (min_error + eps))
            
            # Update the sample weights based on the decision stump
            predictions = np.ones_like(y)
            negative_idx = (stump.polarity * X[:, stump.feature_index] < stump.polarity * stump.threshold)
            predictions[negative_idx] = -1
            weights *= np.exp(-stump.alpha * y * predictions)
            weights /= np.sum(weights)
            
            # Save the decision stump
            self.estimators.append(stump)
    
    def predict(self, X):
        n_samples = X.shape[0]
        predictions = np.zeros(n_samples)
        for stump in self.estimators:
            pred = np.ones(n_samples)
            negative_idx = (stump.polarity * X[:, stump.feature_index] < stump.polarity * stump.threshold)
            pred[negative_idx] = -1
            predictions += stump.alpha * pred
        
        return np.sign(predictions)


In [None]:
X = np.array([[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [6, 7]])
y = np.array([1, 1, 1, -1, -1, -1])

adaboost = AdaBoost(num_estimators=3)
adaboost.fit(X, y)

# Predict on new data
X_test = np.array([[0, 1], [7, 8]])
y_pred = adaboost.predict(X_test)
print(y_pred)

[ 1. -1.]
