In [48]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.metrics import classification_report

X, y = make_classification(n_samples=500, random_state=1)
# because hypothesis function takes the sign, must change to -1, 1
y = np.where(y==0, -1, 1)  #change our y to be -1 if it is 0, otherwise 1
# print(y)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42)

In [52]:
class DecisionStump():
    def __init__(self):
        # Determines whether threshold should be evaluated as < or >
        self.polarity = 1 # will check both signs
        self.feature_index = None
        self.threshold = None
        # Voting power of the stump
        self.alpha = None

In [59]:
class AdaBoost():
    def __init__(self, S=5, eta=0.75):
        self.S = S # classifiers
        #Task 2
        self.eta = eta # the constant in front of a
        
    def fit(self, X, y):
        m, n = X.shape
        
        #initially, we set our weight to 1/m
        W = np.full(m, 1/m)
                
        #list of classifiers
        self.clfs = []
        
        for s in range(self.S):
            clf = DecisionStump()
            
            #set initial minimum error to infinity so at least the first stump is identified
            min_err = np.inf
            
            #previously we don't need to do this
            #NO LONGER GINI INDEX, use weighted
            #since sklearn learn does it
            #but now we have to loop all features, all threshold
            #and all polarity to find the minimum weighted errors
            for feature in range(n):
                feature_vals = np.unique(X[:, feature])
                thresholds = (feature_vals[:-1] + feature_vals[1:])/2 #(everything but the last + everything but the first)/2
#                 print(thresholds)
                for threshold in thresholds:
                    #Task 3
                    for polarity in [1, -1]:
                        yhat = np.ones(len(y)) # initialize to 1
                        yhat[polarity * X[:, feature] < polarity * threshold] = -1  #polarity=1 rule
                        err = W[(yhat != y)].sum()
                        
                        #save the best stump
                        if err < min_err:
                            clf.polarity = polarity
                            clf.threshold = threshold
                            clf.feature_index = feature
                            min_err = err
                            
            #once we know which is the best stump
            #we calculate its alpha, and reweight samples
            eps = 1e-10 #Task 1
            clf.alpha = self.eta * (np.log ((1 - min_err) / (min_err + eps)))            
            W = W * np.exp(-clf.alpha * y * yhat)
            W = W / sum (W)
            
            #save clf
            self.clfs.append(clf)
                        
    def predict(self, X):
        m, n = X.shape
        yhat = np.zeros(m)
        for clf in self.clfs:
            pred = np.ones(m) #set all to 1
            pred[clf.polarity * X[:, clf.feature_index] < clf.polarity * clf.threshold] = -1  #polarity=1 rule
            yhat += clf.alpha * pred

        return np.sign(yhat)

In [60]:
model = AdaBoost(S=20)
model.fit(X_train, y_train)
yhat = model.predict(X_test)
print(classification_report(y_test, yhat))

              precision    recall  f1-score   support

          -1       0.72      0.99      0.83        79
           1       0.98      0.56      0.71        71

    accuracy                           0.79       150
   macro avg       0.85      0.78      0.77       150
weighted avg       0.84      0.79      0.78       150



In [61]:
#Changing eta between 0-1 doesn't make much differnce
model = AdaBoost(S=20, eta=0.2)
model.fit(X_train, y_train)
yhat = model.predict(X_test)
print(classification_report(y_test, yhat))

              precision    recall  f1-score   support

          -1       0.72      0.99      0.83        79
           1       0.98      0.56      0.71        71

    accuracy                           0.79       150
   macro avg       0.85      0.78      0.77       150
weighted avg       0.84      0.79      0.78       150



In [62]:
#Changing eta between 0-1 doesn't make much differnce
model = AdaBoost(S=20, eta=1)
model.fit(X_train, y_train)
yhat = model.predict(X_test)
print(classification_report(y_test, yhat))

              precision    recall  f1-score   support

          -1       0.72      0.99      0.83        79
           1       0.98      0.56      0.71        71

    accuracy                           0.79       150
   macro avg       0.85      0.78      0.77       150
weighted avg       0.84      0.79      0.78       150



In [65]:
# eta=2 breaks it
model = AdaBoost(S=20, eta=2)
model.fit(X_train, y_train)
yhat = model.predict(X_test)
print(classification_report(y_test, yhat))

              precision    recall  f1-score   support

          -1       0.53      1.00      0.69        79
           1       0.00      0.00      0.00        71

    accuracy                           0.53       150
   macro avg       0.26      0.50      0.34       150
weighted avg       0.28      0.53      0.36       150



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
