# アダブースト(AdaBoost)

## アダブーストとは

- ブースティング(boosting)の代表的手法
- 複数の学習器を直列につなげて、前の学習器で誤分類したサンプルに重みづけして学習する手法
- 予測は全ての学習器の重みづけ多数決で決定
- 学習器としては決定株(depth=1の決定木)が使われることもある

## 分類

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_gaussian_quantiles, make_circles, make_classification
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier

# データ準備
np.random.seed(0)

X1, y1 = make_gaussian_quantiles(cov=1.5, n_samples=200, n_features=2, n_classes=2, random_state=0)
X2, y2 = make_gaussian_quantiles(mean=(4, 4), cov=1.5, n_samples=200, n_features=2, n_classes=2, random_state=0)
X = np.concatenate((X1, X2))
y = np.concatenate((y1, - y2 + 1))
gaussian_quantiles = (X, y)
circles = make_circles(noise=.2, factor=.5, random_state=0)
X, y = make_classification(n_features=2, n_redundant=0, n_informative=2, random_state=0, n_clusters_per_class=1)
X += 2 * np.random.uniform(size=X.shape)
linearly_separable = (X, y)
datasets = [gaussian_quantiles, circles, linearly_separable]
rows = len(datasets)

# 学習器作成
names = ["Decision Tree", "AdaBoosted Decision Tree"]

classifiers = [
    DecisionTreeClassifier(max_depth=5),
    AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), algorithm="SAMME", n_estimators=200)
]
cols = len(classifiers) + 1

# 可視化
ax_size = 3
resolution = 300
cmap = 'bwr'

plt.figure(figsize=(ax_size * cols, ax_size * rows))

for row in range(rows):
    X, y = datasets[row]
    X = StandardScaler().fit_transform(X)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.4, random_state=0)

    margin = .5
    x_min, x_max = X[:, 0].min() - margin, X[:, 0].max() + margin
    y_min, y_max = X[:, 1].min() - margin, X[:, 1].max() + margin
    xx, yy = np.meshgrid(np.linspace(x_min, x_max, resolution), np.linspace(y_min, y_max, resolution))

    for col in range(cols):
        ax = plt.subplot(rows, cols, row * cols + col + 1)

        if row == 0:
            ax.set_title('Input data' if col == 0 else names[col - 1])
        if col > 0:
            clf = classifiers[col - 1]
            clf.fit(X_train, y_train)

            mesh = np.c_[xx.ravel(), yy.ravel()]
            if hasattr(clf, 'decision_function'):
                Z = clf.decision_function(mesh)
            else:
                Z = clf.predict_proba(mesh)[:, 1]
            Z.shape = xx.shape
            ax.pcolormesh(xx, yy, Z, cmap=cmap, alpha=.5)

        ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cmap)
        ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cmap, alpha=.6)

        ax.set_xlim(x_min, x_max)
        ax.set_ylim(y_min, y_max)
        ax.set_xticks(())
        ax.set_yticks(())

plt.show()

## 回帰

In [None]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import AdaBoostRegressor

np.random.seed(0)

# データ準備
X = np.linspace(0, 6, 100)[:, np.newaxis]
y = (np.sin(X) + np.sin(X * 6) + np.random.normal(0, .4, size=X.shape)).ravel()

# 学習器作成
tree = DecisionTreeRegressor(max_depth=4)
ada = AdaBoostRegressor(DecisionTreeRegressor(max_depth=4), n_estimators=300, random_state=0)

tree.fit(X, y)
ada.fit(X, y)

# 可視化
margin = .5
x = np.linspace(X.min() - margin, X.max() + margin, 200)[:, np.newaxis]
y_tree = tree.predict(x)
y_ada = ada.predict(x)

plt.plot(x, y_tree, c="r", label="n_estimators=1", linewidth=2)
plt.plot(x, y_ada, c="b", label="n_estimators=300", linewidth=2)
plt.scatter(X, y, c="k", label="training samples")

plt.legend()
plt.xlim(x.min(), x.max())
plt.ylim(min(y.min(), y_tree.min(), y_ada.min()) - margin, max(y.max(), y_tree.max(), y_ada.max()) + margin)
plt.xticks(())
plt.yticks(())

plt.show()