In [1]:
import numpy as np
from sklearn.tree import DecisionTreeClassifier

def update_weights(weights, alpha_m, y_train, y_pred):
    v = (weights * np.exp(-(alpha_m * (np.equal(y_train, y_pred)).astype(int))))
    v = (v * np.exp(alpha_m * (np.not_equal(y_train, y_pred)).astype(int))) 
    return v
  
def compute_error(y_train, y_pred, weights):
    return (sum(weights * (np.not_equal(y_train, y_pred)).astype(int)))/sum(weights)

def compute_alpha(error_in_learner):
    return (1/2*(np.log((1 - error_in_learner) / error_in_learner)))
    
    
class My_AdaBoost:
    def __init__(self):
        self.M = None
        self.alpha = []
        self.training_error = []
        self.trained_models = []
        
        
    def fit_model(self, X_train, y_train, M=3):
        self.M = M
        self.training_error = []
        self.alpha = []
        
        for m in range(0, M):
            print("\nIteration no - ",m)
            if m == 0:
                weights = np.ones(len(y_train)) * 1 / len(y_train)
                print("Weights are ",weights)
            else:
                weights = update_weights(weights, alpha_of_learner, y_train, y_pred)
                print("Weights are ",weights)
            
            learner = DecisionTreeClassifier(max_depth=1)
            learner.fit(X_train, y_train, sample_weight=weights)
            y_pred = learner.predict(X_train)
            print("Prediction at iteration ",m)
            print(y_pred)
            
            self.trained_models.append(learner)
            
            error_in_learner = compute_error(y_train, y_pred, weights)
            print("Error rate is ",error_in_learner)
            self.training_error.append(error_in_learner)
            
            alpha_of_learner = compute_alpha(error_in_learner)
            print("Alpha value is ",alpha_of_learner)
            self.alpha.append(alpha_of_learner)
            
        assert len(self.trained_models) == len(self.alpha)
    
    
    def predict(self, X_train):
        weak_preds = pd.DataFrame(index = range(len(X_train)), columns = range(self.M))
        
        for m in range(self.M):
                y_pred_m = self.trained_models[m].predict(X_train) * self.alpha[m]
                weak_preds.iloc[:,m] = y_pred_m
                
        y_pred = (1 * np.sign(weak_preds.T.sum())).astype(int)
        print("Prediction is\n",y_pred)
        return y_pred

In [2]:
import pandas as pd
X_val = [[3,7],
    [2,9],
    [1,4],
    [2,8],
    [4,7]]

y_val = [1,-1,1,-1,-1]

df = pd.DataFrame(X_val, columns=['X1','X2'])
df['Y'] = y_val

df

Unnamed: 0,X1,X2,Y
0,3,7,1
1,2,9,-1
2,1,4,1
3,2,8,-1
4,4,7,-1


In [3]:
X = df.iloc[:,:-1].values
y = df.iloc[:,-1].values

In [4]:
adaboost = My_AdaBoost()
adaboost.fit_model(X, y)


Iteration no -  0
Weights are  [0.2 0.2 0.2 0.2 0.2]
Prediction at iteration  0
[ 1 -1  1 -1  1]
Error rate is  0.2
Alpha value is  0.6931471805599453

Iteration no -  1
Weights are  [0.1 0.1 0.1 0.1 0.4]
Prediction at iteration  1
[-1 -1  1 -1 -1]
Error rate is  0.125
Alpha value is  0.9729550745276566

Iteration no -  2
Weights are  [0.26457513 0.03779645 0.03779645 0.03779645 0.15118579]
Prediction at iteration  2
[ 1  1  1  1 -1]
Error rate is  0.14285714285714282
Alpha value is  0.8958797346140277


In [5]:
y_predict = adaboost.predict(X)

Prediction is
 0    1
1   -1
2    1
3   -1
4   -1
dtype: int64


In [6]:
y_predict

0    1
1   -1
2    1
3   -1
4   -1
dtype: int64

In [7]:
from sklearn.ensemble import AdaBoostClassifier
ada_class = AdaBoostClassifier(n_estimators=8)
ada_class.fit(X, y)
y_pre = ada_class.predict(X)
print(y_pre)

[ 1 -1  1 -1 -1]


In [8]:
import numpy as np
a = np.ones(5)*1/5
print(a)

[0.2 0.2 0.2 0.2 0.2]
