In [2]:
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier

# Dataset pequeño (X1, X2, y)
data = {
    "X1": [1, 2, 3, 4, 5, 6],
    "X2": [2, 1, 2, 3, 3, 2],
    "y":  [-1, -1, 1, 1, 1, -1]  # etiquetas binarias (-1 y 1)
}

In [4]:
def adaBoost_train(X, y, n_estimators=10):
    #The number of samples in this case its 1/6
    n_samples = X.shape[0]
    
    W = np.ones(n_samples) / n_samples
    
    #The estimators
    models = []
    
    #The number of alphas "the amount say"
    alphas = []
    
    for single_stump in range(n_estimators):
        #One Node and two lines
        stump = DecisionTreeClassifier(max_depth=1)
        
        #We are going to predict and see the number of errors
        stump.fit(X, y, sample_weight=W)
        
        #The number of error(that we had, the number we are going to predict)
        y_prediction = stump.predict(X)
        
        error_per_stump = np.sum(W * (y_prediction != y)) / np.sum(W)
        
        
        
        amount_to_say = np.log( (1-error_per_stump)/ error_per_stump + 1e-10) / 2
        
        #actualizar samples
        W = W * np.exp(amount_to_say * (y_prediction != y))
        W /= np.sum(W) #Normalize the dataset
        
        models.append(stump)
        alphas.append(amount_to_say)
        
    return models, alphas
        
        

In [5]:
def adaBoost_predict(X, models, alphas):
    #We initialize the prediction
    final_pred = np.zeros(X.shape[0])
    
    #We sum every stump to make the prediction that has more amount to say
    for stump, alpha in zip(models, alphas):
        final_pred += alpha * stump.predict(X)
    
    return np.sign(final_pred)

In [6]:
X = pd.DataFrame({
    "X1":[1, 2, 3, 4, 5, 6],
    "X2":[2, 1, 2, 3, 3, 2]
}                
)
y = np.array([-1, -1, 1, 1, 1, -1])

#We are training the adaBoost with 10 stumps
models, alphas = adaBoost_train(X, y, n_estimators=10)
y_pred = adaBoost_predict(X, models, alphas)

print(y_pred)

[-1. -1.  1.  1.  1. -1.]
