In [None]:
import sys
import warnings
if not sys.warnoptions:
    warnings.simplefilter("ignore")

import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from sklearn.datasets import make_moons
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import DecisionTreeRegressor

# moons
X, y = make_moons(n_samples=10000, noise=0.4, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# voting classifiers
log_clf = LogisticRegression()
rnd_clf = RandomForestClassifier()
svm_clf = SVC(probability=True)
voting_clf = VotingClassifier(
    estimators=[("lr", log_clf), ("rf", rnd_clf), ("svc", svm_clf)],
    voting="soft")
for clf in (log_clf, rnd_clf, svm_clf, voting_clf):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, accuracy_score(y_test, y_pred))
    
# bagging and pasting
tree_clf = DecisionTreeClassifier()
bag_clf = BaggingClassifier(tree_clf, n_estimators=500, max_samples=100, 
                            bootstrap=True, n_jobs=-1)
for clf in (tree_clf, bag_clf):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, accuracy_score(y_test, y_pred))
    
# Out-Of-Bag (oob) evaluation
bag_clf = BaggingClassifier(tree_clf, n_estimators=500, max_samples=100, 
                            bootstrap=True, n_jobs=-1, oob_score=True)
bag_clf.fit(X_train, y_train)
print("oob socore:", bag_clf.oob_score_)
print("test score:", accuracy_score(y_test, bag_clf.predict(X_test)))

# random forest
rnd_clf = RandomForestClassifier(n_estimators=500, max_leaf_nodes=16, n_jobs=-1)
rnd_clf.fit(X_train, y_train)
print("random forest:", accuracy_score(y_test, rnd_clf.predict(X_test)))

# extra trees
ext_clf = ExtraTreesClassifier(n_estimators=500, max_leaf_nodes=16, n_jobs=-1)
ext_clf.fit(X_train, y_train)
print("extra trees:", accuracy_score(y_test, ext_clf.predict(X_test)))

# feature importance
iris = load_iris()
rnd_clf = RandomForestClassifier(n_estimators=500, n_jobs=-1)
rnd_clf.fit(iris["data"], iris["target"])
for name, score in zip(iris["feature_names"], rnd_clf.feature_importances_):
    print(name, score)
    
# Ada boost
ada_clf = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), n_estimators=200, 
                             algorithm="SAMME.R", learning_rate=0.5)
ada_clf.fit(X_train, y_train)
print("Ada:", accuracy_score(y_test, ada_clf.predict(X_test)))

# hard-code gradient boosing
m = 200
X = 6 * np.random.rand(m, 1) - 3
y = 0.5 * X**2 + X + 2 + 0.5 * np.random.randn(m, 1)
tree_reg1 = DecisionTreeRegressor(max_depth=2)
tree_reg1.fit(X, y)
y2 = y - tree_reg1.predict(X).reshape(-1,1)
tree_reg2 = DecisionTreeRegressor(max_depth=2)
tree_reg2.fit(X, y2)
y3 = y2 - tree_reg2.predict(X).reshape(-1,1)
tree_reg3 = DecisionTreeRegressor(max_depth=2)
tree_reg3.fit(X, y3)
X_new = np.arange(-3,3,0.05).reshape(-1,1)
y_pred1 = tree_reg1.predict(X_new)
y_pred2 = sum(tree.predict(X_new) for tree in (tree_reg1, tree_reg2))
y_pred3 = sum(tree.predict(X_new) for tree in (tree_reg1, tree_reg2, tree_reg3))
plt.figure()
plt.plot(X, y, "b.", label="data")
plt.plot(X_new, y_pred1, "y-", linewidth=3, label="1st iter.")
plt.plot(X_new, y_pred2, "g-", linewidth=2, label="2nd iter.")
plt.plot(X_new, y_pred3, "r-", linewidth=1, label="3rd iter.")
plt.legend()
plt.savefig("../plots/ex_7_01.pdf")

# gradient boosting
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
gbrt = GradientBoostingRegressor(max_depth=2, n_estimators=120, learning_rate=0.1)
gbrt.fit(X_train, y_train)
errors = [mean_squared_error(y_val, y_pred) for y_pred in gbrt.staged_predict(X_val)]
train_errors = [mean_squared_error(y_train, y_pred)
               for y_pred in gbrt.staged_predict(X_train)]
bst_n_estimators = np.argmin(errors)
gbrt_best = GradientBoostingRegressor(max_depth=2, n_estimators=bst_n_estimators)
gbrt_best.fit(X_train, y_train)
plt.figure(figsize=(12,5))
plt.subplot(121)
plt.plot(range(len(errors)), errors, "b-", label="test")
plt.plot(range(len(errors)), [errors[bst_n_estimators]] * len(errors), "r--", label="best")
plt.plot(range(len(train_errors)), train_errors, "g-", label="train")
plt.legend()
plt.ylim([0,1])
plt.xlabel("n_estimators")
plt.ylabel("MSE")
plt.subplot(122)
plt.plot(X_train, y_train, "b.", label="data")
plt.plot(np.arange(-3,3,0.05), gbrt_best.predict(np.arange(-3,3,0.05).reshape(-1,1)), 
         "r--", label="GBRT")
plt.legend()
plt.xlabel("X")
plt.xlabel("y")
plt.savefig("../plots/ex_7_02.pdf")

# gradient boosting with early stop
gbrt = GradientBoostingRegressor(max_depth=2, warm_start=True)
min_val_error = float("inf")
error_going_up = 0
for n_estimators in range(1,120):
    gbrt.n_estimators = n_estimators
    gbrt.fit(X_train, y_train)
    y_pred = gbrt.predict(X_val)
    val_error = mean_squared_error(y_val, y_pred)
    if val_error < min_val_error:
        min_val_error = val_error
        error_going_up = 0
    else:
        error_going_up += 1
        if error_going_up == 5:
            break

# stacking
# not implemented by sklearn