In [1]:
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_moons

X, y = make_moons(n_samples=500, noise=0.30, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [2]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

log_clf = LogisticRegression()
rnd_clf = RandomForestClassifier()
svm_clf = SVC()

voting_clf = VotingClassifier(
    estimators = [('lr', log_clf), ('rf', rnd_clf), ('svc', svm_clf)],
    voting = 'hard'
)
voting_clf.fit(X_train, y_train)

In [3]:
from sklearn.metrics import accuracy_score

for clf in (log_clf, svm_clf, voting_clf):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, accuracy_score(y_test, y_pred))

LogisticRegression 0.864
SVC 0.896
VotingClassifier 0.912


In [4]:
# 결정 트리 분류기 500개의 앙상블을 훈련시키는 코드

from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

bag_clf = BaggingClassifier(
    DecisionTreeClassifier(), n_estimators = 500,
    max_samples = 100, bootstrap = True, n_jobs = 1)
bag_clf.fit(X_train, y_train)
y_pred = bag_clf.predict(X_test)

In [5]:
from sklearn.metrics import accuracy_score
print(accuracy_score(y_test, y_pred))

0.912


In [6]:
# 결정 트리 1개
tree_clf = DecisionTreeClassifier(random_state=42)
tree_clf.fit(X_train, y_train)
y_pred_tree = tree_clf.predict(X_test)
print(accuracy_score(y_test, y_pred_tree))

0.856


In [7]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

bag_clf = BaggingClassifier(
    DecisionTreeClassifier(), n_estimators = 500,
    bootstrap = True, n_jobs = 1, oob_score = True)
bag_clf.fit(X_train, y_train)

bag_clf.oob_score_

0.8986666666666666

In [8]:
bag_clf.oob_decision_function_

array([[0.40860215, 0.59139785],
       [0.31351351, 0.68648649],
       [1.        , 0.        ],
       [0.        , 1.        ],
       [0.00526316, 0.99473684],
       [0.10555556, 0.89444444],
       [0.35795455, 0.64204545],
       [0.00598802, 0.99401198],
       [0.97959184, 0.02040816],
       [0.9744898 , 0.0255102 ],
       [0.72916667, 0.27083333],
       [0.01190476, 0.98809524],
       [0.75531915, 0.24468085],
       [0.88020833, 0.11979167],
       [0.94736842, 0.05263158],
       [0.06010929, 0.93989071],
       [0.        , 1.        ],
       [0.97660819, 0.02339181],
       [0.92424242, 0.07575758],
       [1.        , 0.        ],
       [0.01212121, 0.98787879],
       [0.34031414, 0.65968586],
       [0.92105263, 0.07894737],
       [1.        , 0.        ],
       [0.98203593, 0.01796407],
       [0.        , 1.        ],
       [1.        , 0.        ],
       [1.        , 0.        ],
       [0.        , 1.        ],
       [0.57837838, 0.42162162],
       [0.

In [9]:
from sklearn.ensemble import RandomForestClassifier

rnd_clf = RandomForestClassifier(n_estimators = 500, max_leaf_nodes = 16, n_jobs = -1)
rnd_clf.fit(X_train, y_train)

y_pred_rf = rnd_clf.predict(X_test)

In [12]:
from sklearn.datasets import load_iris

iris = load_iris()
rnd_clf = RandomForestClassifier(n_estimators = 500, n_jobs = -1)
rnd_clf.fit(iris['data'], iris['target'])

for name, score in zip(iris['feature_names'], rnd_clf.feature_importances_):
    print(name, score)

sepal length (cm) 0.09276691297867452
sepal width (cm) 0.021312861556052984
petal length (cm) 0.46067067392649497
petal width (cm) 0.4252495515387775


In [13]:
from sklearn.ensemble import AdaBoostClassifier

ada_clf = AdaBoostClassifier(
    DecisionTreeClassifier(max_depth = 1), n_estimators = 200,
    algorithm = 'SAMME.R', learning_rate = 0.5
)
ada_clf.fit(X_train, y_train)

In [28]:
np.random.seed(42)
X = np.random.rand(100, 1) - 0.5
y = 3*X[:, 0]**2 + 0.05 * np.random.randn(100)

In [29]:
from sklearn.tree import DecisionTreeRegressor

tree_reg1 = DecisionTreeRegressor(max_depth=2, random_state=42)
tree_reg1.fit(X, y)

In [30]:
y2 = y - tree_reg1.predict(X)
tree_reg2 = DecisionTreeRegressor(max_depth=2, random_state=42)
tree_reg2.fit(X, y2)

In [31]:
y3 = y2 - tree_reg2.predict(X)
tree_reg3 = DecisionTreeRegressor(max_depth=2, random_state=42)
tree_reg3.fit(X, y3)

In [32]:
import numpy as np

X_new = np.array([[0.8]])

y_pred = sum(tree.predict(X_new) for tree in (tree_reg1, tree_reg2, tree_reg3))

y_pred

array([0.75026781])

In [33]:
from sklearn.ensemble import GradientBoostingRegressor

gbrt = GradientBoostingRegressor(max_depth = 2, n_estimators = 3, learning_rate = 1.0)
gbrt.fit(X, y)

In [36]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

X_train, X_val, y_train, y_val = train_test_split(X, y)

gbrt = GradientBoostingRegressor(max_depth = 2, n_estimators = 120)
gbrt.fit(X_train, y_train)

errors = [mean_squared_error(y_val, y_pred) for y_pred in gbrt.staged_predict(X_val)]
bst_n_estimators = np.argmin(errors) + 1

gbrt_best = GradientBoostingRegressor(max_depth = 2, n_estimators = bst_n_estimators)
gbrt_best.fit(X_train, y_train)

In [37]:
gbrt = GradientBoostingRegressor(max_depth = 2, warm_start  = True)

min_val_error = float("inf")
error_going_up = 0

for n_estimators in range(1, 120):
    gbrt.n_estimators = n_estimators
    gbrt.fit(X_train, y_train)
    y_pred = gbrt.predict(X_val)
    val_error = mean_squared_error(y_val, y_pred)
    if val_error < min_val_error:
        min_val_error = val_error
        error_going_up = 0
    else:
        error_going_up += 1
        if error_going_up == 5:
            break

In [38]:
print(gbrt.n_estimators)

47


In [39]:
import xgboost

xgb_reg = xgboost.XGBRegressor()
xgb_reg.fit(X_train, y_train)
y_pred = xgb_reg.predict(X_val)

In [40]:
xgb_reg.fit(X_train, y_train, eval_set = [(X_val, y_val)], early_stopping_rounds = 2)
y_pred = xgb_reg.predict(X_val)



[0]	validation_0-rmse:0.16828
[1]	validation_0-rmse:0.12802
[2]	validation_0-rmse:0.10088
[3]	validation_0-rmse:0.08363
[4]	validation_0-rmse:0.07403
[5]	validation_0-rmse:0.06957
[6]	validation_0-rmse:0.06645
[7]	validation_0-rmse:0.06481
[8]	validation_0-rmse:0.06453
[9]	validation_0-rmse:0.06371
[10]	validation_0-rmse:0.06330
[11]	validation_0-rmse:0.06322
[12]	validation_0-rmse:0.06341
