In [1]:
import numpy as np
import math
from copy import deepcopy
import sklearn.datasets
import matplotlib.pyplot as plt
from sklearn.svm import SVC

# Dataset

We import the dataset from Hastie & Tibshirani book. 
This is an artificially generated binary classification problem. Labels are in the set $\{-1,+1\}$.

In [2]:
X,y = sklearn.datasets.make_hastie_10_2()
X_train = X[0:8000,:]
y_train = y[0:8000]
X_test = X[8000:,:]
y_test = y[8000:]

# Adaboost implementation

Here we implement the Adaboost algorith. We shall assume that:
- that the problem is a binary classification problem with labels in $\{-1, +1\}$.
- that the weakModel can fit a weighted sample set by means of the call `weakModel.fit(X,y,sample_weight=w)` where `w` is a vector of length $n=|X|=|y|$.

In [3]:
class AdaBoost:
    def __init__(self, weakModel, T):
        self.T = T
        self.weakModel = weakModel
        self.alphas = []
        self.classifiers = []
        self.wl_errors = []
        self.ens_errors = []

    def trainWeakModel(self, X, y, w):
        # Per come è impostato scikitlearn
        result = deepcopy(self.weakModel)
        result.fit(X,y,sample_weight = w)
        return result

    def weightedError(y, y_, w):
        # errors = (y != y_)
        # return np.matmul(w, errors)
        return np.sum(w[y != y_])

    def fit(self, X, y):
        n = len(y)
        w = np.ones(n) / n

        ensamble_predictions = np.zeros(n)
        print_step = np.max([1, self.T/100])

        for t in range(self.T):
            mt = self.trainWeakModel(X,y,w)
            y_ = mt.predict(X)
            #et = weightedError(y, y_, w)
            et = np.sum(w[y != y_])

            if et >= 0.5:
                print("Error et>=0.5")
            
            at = 0.5 * math.log((1 - et)/et)

            self.classifiers.append(mt)
            self.alphas.append(at)

            # diamo peso agli esempi
            w = w * np.exp(-at * y * y_)
            # normalizzo (w_i > 0)
            w = w / np.sum(w)

            ensamble_predictions += at * y_
            # guardiamo le etichette (booleane) e le sommiamo
            ensamble_error = np.sum(np.sign(ensamble_predictions) != y) / n

            self.wl_errors.append(et)
            self.ens_errors.append(ensamble_error)

            # Logging per monitorare l'apprendimento
            if t > 1 and t % print_step == 0:
                print("step:{} et:{:4f} error:{:.4f}".format(t, et, ensamble_error))
          
        #self.alphas = self.alphas
        return self

    def predict(self, X):
        result = np.zeros(len(X))
        for t in range(self.T):
            y_ = self.classifiers[t].predict(X)
            result += self.alphas[t] * y

        return np.sign(result)


# Testing with an SVM

Let us now see how our iplementation of AdaBoost performs on the dataset we loaded above. In this experiment we want the weak learning algorithm $\mathcal{A}$ to be good, but not too much. An SVM with a polynomial kernel of degre 3 works fine for our needs.

The SVC implementation provided by sklearn does not work well when weights are normalized. The following code simply "denormalize" weights befor calling into SVC implementation.

In [4]:
    class SVC_:
        def __init__(self, kernel="rbf", degree="3"):
            self.svc = SVC(kernel=kernel, degree=degree)

        def fit(self, X,y,sample_weight=None):
            if sample_weight is not None:
                sample_weight = sample_weight * len(X)

            self.svc.fit(X,y,sample_weight=sample_weight)
            return self

        def predict(self, X):
            return self.svc.predict(X)

In [5]:
weakModel = SVC(kernel="poly", degree=3)
adaboost = AdaBoost(weakModel, 100)
clf = adaboost.fit(X_train, y_train)

step:2 et:0.500000 error:0.4941
step:3 et:0.500000 error:0.4941
step:4 et:0.500000 error:0.4941
step:5 et:0.500000 error:0.4941
step:6 et:0.500000 error:0.4941
step:7 et:0.500000 error:0.4941
step:8 et:0.500000 error:0.4941
step:9 et:0.500000 error:0.4941
step:10 et:0.500000 error:0.4941
step:11 et:0.500000 error:0.4941


KeyboardInterrupt: 

In [None]:
y_train_ = clf.predict(X_train)
y_test_ = clf.predict(X_test)

In [None]:
accuracy = (0.5 - y_train_.T * y_train/(2 * len(y_train)))

In [None]:
ptl.plot(adaboost.wl_errors, label="$\epsilon_t$)
ptl.plot(adaboost.ens_errors, label="EnsError")
plt.legend()

# Testing on the weakest of the weak learners

We now want to experiment with a VERY weak learner. The weak learner works as follows:

- it creates a random linear model by generating the needed weight vector $\mathbf{w}$ at random; each weight shall be sampled from U(-1,1);
- it evaluates the weighted loss $\epsilon_t$ on the given dataset and flip the linear model if $\epsilon_t > 0.5$
- at prediction time it predicts +1 if $\mathbf{x} \cdot \mathbf{w} > 0$ it predicts -1 otherwise.

In [66]:
class RandomLinearModel:
    def loss(self, y, y_, w):
        return np.sum(w[y != y_])
        
    def fit(self,X,y,sample_weight=None):
        self.w = (np.random.rand(X.shape[1]) - 0.5) * 2.0
        y_ = self.predict(x)
        if self.loss(y, y_, sample_weight) > 0.5:
            self.w = - self.w


    def predict(self,X):
        return np.sing(np.matmul(X, self.w))

Let us now learn an AdaBoost model using the RandomLinearModel weak learner printing every $K$ iterations the weighted error and the current error of the ensemble. Evaluate the training and test error of the final ensemble model.

In [None]:
rs = RandomLinearModel()
a = AdaBoost(rs,10000)
a.fit(X_train,y_train)

y_train_ = a.predict(X_train)
y_test_ = a.predict(X_test)