# Setup

In [None]:
import numpy as np
import pylab as plt

from sklearn.datasets import make_circles, load_digits
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier

# **AdaBoost**

Boosting is a powerful technique used to construct a committee whose performance is significantly better than that of every single model within that committee. It delivers the most significant improvements when the predictions of individual models are slightly better than random predictions. In this case, the individual models are also called weak learners.

It consists of the following steps:
- The individual models, such as base classifiers are trained in sequence.
- Each base classifier is trained using weighted training data.
- The weighting coefficients of each sample depend on the performance of the previously trained base classifiers. In particular, misclassified data points receive a higher weight for the next classifier's training.
- After training all the base classifiers, these are combined through a suitably weighted average of individual model outputs.

In [None]:
np.random.seed(2)
X, y = make_circles(factor=.2, noise=.2)
y[y==0] = -1
plt.scatter(X[:, 0], X[:, 1], c=y)
plt.show()

In [None]:
def plot_contour(clf, X, y, sample_weight=None):
    xx, yy = np.mgrid[-3:3:.1,-3:3:.1]
    zz = np.c_[xx.ravel(), yy.ravel()]
    proba = clf.predict(zz).reshape(xx.shape)

    # Normalize in intervall 40 to 240
    if sample_weight is not None:
        sample_weight = np.clip(sample_weight, 20, 500)

    plt.scatter(X[:, 0], X[:, 1], c=y, zorder=2, s=sample_weight, cmap='coolwarm', edgecolors='k')
    plt.contourf(xx, yy, proba, zorder=1, alpha=.5, cmap='coolwarm')
    plt.show()

> Implement the AdaBoost algorithm.

Use the `DecisionTreeClassifier(max_depth=1)` as the weak learner. Its method `.fit(X, y, sample_weight=sample_weight)` has a additional argument such that we can specify the sample weights considered in the loss function.

In [None]:
class Boosting:
    def __init__(self, n_models=100):
        self.n_models = n_models

    def fit(self, X, y, plot=False):
####################
# Your Code Here   #
####################

    def predict(self, X):
####################
# Your Code Here   #
####################

clf = Boosting(n_models=20)
clf.fit(X, y, plot=False)
plot_contour(clf, X, y, clf.sample_weight)

> Visualize the decision boundaries for different number of estimators during training similar to Figure 14.2 in [Bishop2006]

In [None]:
####################
# Your Code Here   #
####################

> Test your implementation using the given more difficult dataset.

In [None]:
X, y = load_digits(return_X_y=True)
idx = (y==1) | (y == 7)
X, y = X[idx], y[idx]
y[y==1] = 1
y[y==7] = -1

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33,  random_state=42)

plt.imshow(X_train[np.random.randint(len(X_train))].reshape(8, 8))
plt.show()

> Train multiple Boosting models with an increasing number of estimators.  In each iteration, store training error, test error, alphas and epsilons. Investigate how these quantities behave with respect to the estimators.

In [None]:
####################
# Your Code Here   #
####################

In [None]:
####################
# Your Code Here   #
####################

In [None]:
####################
# Your Code Here   #
####################

In [None]:
####################
# Your Code Here   #
####################