In [2]:
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_moons

X, y = make_moons(n_samples=500, noise=0.3, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [3]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.svm import SVC

log_clf = LogisticRegression(solver="lbfgs", random_state=42)
rnd_clf = RandomForestClassifier(n_estimators=100, random_state=42)
svm_clf = SVC(gamma="scale", random_state=42)

voting_clf = VotingClassifier(
    estimators=[("lr", log_clf), ("rf", rnd_clf), ("svc", svm_clf)], 
    voting="hard")

voting_clf.fit(X_train, y_train)

VotingClassifier(estimators=[('lr', LogisticRegression(random_state=42)),
                             ('rf', RandomForestClassifier(random_state=42)),
                             ('svc', SVC(random_state=42))])

In [5]:
from sklearn.metrics import accuracy_score

for clf in (log_clf, rnd_clf, svm_clf, voting_clf):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, accuracy_score(y_test, y_pred))

LogisticRegression 0.864
RandomForestClassifier 0.896
SVC 0.896
VotingClassifier 0.912


In [7]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

bag_clf = BaggingClassifier(DecisionTreeClassifier(), 
                           n_estimators
                            =500, 
                           max_samples=100, 
                           bootstrap=True, 
                           n_jobs=1)
bag_clf.fit(X_train, y_train)
y_pred = bag_clf.predict(X_test)

In [8]:
accuracy_score(y_test, y_pred)

0.912

In [9]:
bag_clf = BaggingClassifier(DecisionTreeClassifier(), 
                           n_estimators=500, 
                           bootstrap=True, 
                           oob_score=True, 
                           n_jobs=1)
bag_clf.fit(X_train, y_train)

BaggingClassifier(base_estimator=DecisionTreeClassifier(), n_estimators=500,
                  n_jobs=1, oob_score=True)

In [12]:
bag_clf.oob_score_, accuracy_score(y_test, bag_clf.predict(X_test))

(0.896, 0.896)

In [13]:
bag_clf.oob_decision_function_

array([[0.38888889, 0.61111111],
       [0.37362637, 0.62637363],
       [1.        , 0.        ],
       [0.        , 1.        ],
       [0.        , 1.        ],
       [0.07486631, 0.92513369],
       [0.33502538, 0.66497462],
       [0.02209945, 0.97790055],
       [0.99479167, 0.00520833],
       [0.98039216, 0.01960784],
       [0.86111111, 0.13888889],
       [0.0106383 , 0.9893617 ],
       [0.79896907, 0.20103093],
       [0.87564767, 0.12435233],
       [0.97206704, 0.02793296],
       [0.05405405, 0.94594595],
       [0.        , 1.        ],
       [0.98342541, 0.01657459],
       [0.94210526, 0.05789474],
       [0.99428571, 0.00571429],
       [0.01075269, 0.98924731],
       [0.41081081, 0.58918919],
       [0.86631016, 0.13368984],
       [1.        , 0.        ],
       [0.97340426, 0.02659574],
       [0.        , 1.        ],
       [1.        , 0.        ],
       [1.        , 0.        ],
       [0.        , 1.        ],
       [0.61827957, 0.38172043],
       [0.

In [14]:
from sklearn.ensemble import RandomForestClassifier

rnd_clf = RandomForestClassifier(n_estimators=500, max_leaf_nodes=16, 
                                n_jobs=-1)
rnd_clf.fit(X_train, y_train)
accuracy_score(y_test, rnd_clf.predict(X_test))

0.912

In [15]:
from sklearn.ensemble import BaggingClassifier

bag_clf = BaggingClassifier(DecisionTreeClassifier(splitter="random", 
                                                  max_leaf_nodes=16), 
                           n_estimators=500, 
                           max_samples=1.0, 
                           n_jobs=-1)
bag_clf.fit(X_train, y_train)
accuracy_score(y_test, bag_clf.predict(X_test))

0.904

In [17]:
from sklearn.datasets import load_iris

iris = load_iris()
rnd_clf = RandomForestClassifier(n_estimators=500, n_jobs=-1)
rnd_clf.fit(iris["data"], iris["target"])
for name, score in zip(iris["feature_names"], rnd_clf.feature_importances_):
    print(name, score)

sepal length (cm) 0.10352988654006881
sepal width (cm) 0.023715024075005007
petal length (cm) 0.44535924623362094
petal width (cm) 0.42739584315130535


In [19]:
from sklearn.ensemble import AdaBoostClassifier

ada_clf = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), 
                            n_estimators=200, 
                            algorithm="SAMME.R", 
                            learning_rate=0.5)
ada_clf.fit(X_train, y_train)

AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=1),
                   learning_rate=0.5, n_estimators=200)

In [25]:
import numpy as np

np.random.seed(42)
X = np.random.rand(100, 1) - 0.5
y = 3*X[:, 0]**2 + 0.05*np.random.rand(100)

In [26]:
from sklearn.tree import DecisionTreeRegressor

tree_reg1 = DecisionTreeRegressor(max_depth=2)
tree_reg1.fit(X, y)

DecisionTreeRegressor(max_depth=2)

In [27]:
y2 = y - tree_reg1.predict(X)
tree_reg2 = DecisionTreeRegressor(max_depth=2)
tree_reg2.fit(X, y2)

DecisionTreeRegressor(max_depth=2)

In [28]:
y3 = y2 - tree_reg2.predict(X)
tree_reg3 = DecisionTreeRegressor(max_depth=2)
tree_reg3.fit(X, y3)

DecisionTreeRegressor(max_depth=2)

In [30]:
X_new = np.array([[0.8]])
y_pred = sum(tree.predict(X_new) for tree in (tree_reg1, tree_reg2, tree_reg3))
y_pred

array([0.74072131])

In [32]:
from sklearn.ensemble import GradientBoostingRegressor

gbrt = GradientBoostingRegressor(max_depth=2, n_estimators=3, learning_rate=1.0)
gbrt.fit(X, y)
gbrt.predict(X_new)

array([0.74072131])

In [36]:
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(X, y)
gbrt = GradientBoostingRegressor(max_depth=2, n_estimators=200, learning_rate=0.1)
gbrt.fit(X_train, y_train)

errors = [mean_squared_error(y_val, y_pred) for y_pred in gbrt.staged_predict(X_val)]
bst_n_estimators = np.argmin(errors)

gbrt_bst = GradientBoostingRegressor(max_depth=2, n_estimators=bst_n_estimators)
gbrt_bst.fit(X_train, y_train)

GradientBoostingRegressor(max_depth=2, n_estimators=92)

In [44]:
gbrt = GradientBoostingRegressor(max_depth=2, warm_start=True)

min_val_error= float("inf")
error_going_up = 0
for n_estimators in range(1, 120):
    gbrt.n_estimators = n_estimators
    gbrt.fit(X_train, y_train)
    y_pred = gbrt.predict(X_val)
    val_error = mean_squared_error(y_val, y_pred)
    if val_error < min_val_error:
        min_val_error = val_error
        error_going_up = 0
    else:
        error_going_up += 1
        if error_going_up == 5:
            break
gbrt

GradientBoostingRegressor(max_depth=2, n_estimators=88, warm_start=True)

In [None]:
!pip install xgboost

import xgboost
xgb_reg = xgboost.XGBRegressor()
xgb_reg.fit(X_train, y_train)
y_pred = xgb_reg.predict(X_val)

In [None]:
xgb_reg.fit(X_train, y_train, 
           eval_set=[(X_val, y_val)], 
           early_stopping_rounds=2)
y_pred = xgb_reg.predict(X_val)