In [2]:
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
import random


In [None]:
class AdaBoostScratch:
  def __init__(self, n_estimators=50):
    self.n_estimators = n_estimators
    self.models = []
    self.alphas = []

  def fit(self, df):
    n = df.shape[0]
    df = df.copy()
    df['weights'] = 1 / n

    for i in range(self.n_estimators):
        df_sampled = df.sample(n=n, replace=True, weights='weights', random_state=i).reset_index(drop=True)

        drop_cols = [col for col in ['target', 'weights', 'y_pred'] if col in df_sampled.columns]
        X = df_sampled.drop(columns=drop_cols)
        y = df_sampled['target']

        clf = DecisionTreeClassifier(max_depth=1)
        clf.fit(X, y)

        X_full = df.drop(columns=[col for col in ['target', 'weights', 'y_pred'] if col in df.columns])
        y_true = df['target']
        y_pred = clf.predict(X_full)

        err = np.sum(df['weights'] * (y_true != y_pred)) / np.sum(df['weights'])

        if err > 0.5 or err == 0:
            continue

        alpha = 0.5 * np.log((1 - err) / err)

        self.models.append((clf, alpha))

        df['y_pred'] = y_pred
        df['weights'] = df.apply(
            lambda row: row['weights'] * np.exp(-alpha) if row['target'] == row['y_pred']
            else row['weights'] * np.exp(alpha),
            axis=1
        )
        df['weights'] /= df['weights'].sum()


  def predict(self, X):
      final = np.zeros(X.shape[0])
      for model, alpha in self.models:
          pred = model.predict(X)
          final += alpha * pred
      return np.sign(final)


In [7]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

X, y = make_classification(n_samples=300, n_features=5, n_informative=3)
y = np.where(y == 0, -1, 1)

df = pd.DataFrame(X, columns=[f"f{i}" for i in range(X.shape[1])])
df['target'] = y

train_df, test_df = train_test_split(df, test_size=0.2)

# Train
model = AdaBoostScratch(n_estimators=20)
model.fit(train_df)

# Predict
X_test = test_df.drop(columns='target')
y_test = test_df['target']
y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))


Accuracy: 0.9
