In [1]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split

In [7]:
X,y= make_moons()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


(array([[ 1.92691676e+00,  1.24732995e-01],
        [-9.91790014e-01,  1.27877162e-01],
        [ 4.04783343e-01,  9.14412623e-01],
        [ 9.90311321e-02,  6.61162609e-02],
        [ 9.60230259e-02,  9.95379113e-01],
        [ 2.84527587e-01,  9.58667853e-01],
        [ 2.38554042e-01, -1.48228395e-01],
        [ 5.09442530e-02,  1.84891782e-01],
        [ 9.00968868e-01,  4.33883739e-01],
        [-9.81559157e-01,  1.91158629e-01],
        [-7.18349350e-01,  6.95682551e-01],
        [ 1.99794539e+00,  4.35929780e-01],
        [-1.00000000e+00,  1.22464680e-16],
        [-9.97945393e-01,  6.40702200e-02],
        [ 4.27883340e-01, -3.20172255e-01],
        [ 1.84408430e-02,  3.08841371e-01],
        [ 1.90096887e+00,  6.61162609e-02],
        [ 3.27051370e-02,  2.46345416e-01],
        [ 1.76144596e+00, -1.48228395e-01],
        [ 1.67230089e+00, -2.40277997e-01],
        [ 9.26916757e-01,  3.75267005e-01],
        [-8.38088105e-01,  5.45534901e-01],
        [ 0.00000000e+00,  5.000

In [3]:
log_clf = LogisticRegression()
rnd_clf = RandomForestClassifier()
svm_clf = SVC()

voting_clf = VotingClassifier(
    estimators=[('lr', log_clf), ('rf', rnd_clf), ('svc', svm_clf)],
    voting='hard'
)

voting_clf.fit(X_train, y_train)

In [4]:
from sklearn.metrics import accuracy_score
for clf in (log_clf, rnd_clf, svm_clf, voting_clf):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, accuracy_score(y_test, y_pred))

LogisticRegression 0.75
RandomForestClassifier 0.95
SVC 1.0
VotingClassifier 0.95


In [8]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

bag_clf  =  BaggingClassifier(
    DecisionTreeClassifier(), n_estimators =500,
    max_samples=20, bootstrap = True, n_jobs=-1
)

bag_clf.fit(X_train, y_train)
y_pred = bag_clf.predict(X_test)

In [10]:
bag_clf = BaggingClassifier(
    DecisionTreeClassifier(), n_estimators=500,
    bootstrap=True, n_jobs=-1, oob_score=True)
bag_clf.fit(X_train, y_train)
bag_clf.oob_score_

0.95

In [11]:
from sklearn.ensemble import RandomForestClassifier
rnd_clf = RandomForestClassifier(n_estimators=500, max_leaf_nodes=16, n_jobs=-1)
rnd_clf.fit(X_train, y_train)
y_pred_rf = rnd_clf.predict(X_test)

In [12]:
from sklearn.datasets import load_iris
iris= load_iris()
rnd_clf = RandomForestClassifier(n_estimators=500, n_jobs=-1)
rnd_clf.fit(iris["data"],iris["target"])
for name , score in zip(iris['feature_names'], rnd_clf.feature_importances_):
    print(name, score)

sepal length (cm) 0.0938344761912909
sepal width (cm) 0.02658295354065058
petal length (cm) 0.4524264853407328
petal width (cm) 0.42715608492732565


In [13]:
from sklearn.ensemble import AdaBoostClassifier

ada_clf = AdaBoostClassifier(
    DecisionTreeClassifier(max_depth=1), n_estimators=200,
    algorithm="SAMME.R", learning_rate=0.5
)
ada_clf.fit(X_train, y_train)



In [14]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import GradientBoostingRegressor

X_train, X_val, y_train, y_val = train_test_split(iris["data"], iris["target"])

gbrt = GradientBoostingRegressor(max_depth=2, n_estimators=120)
gbrt.fit(X_train, y_train)

errors= [mean_squared_error(y_val,y_pred)
        for y_pred in gbrt.staged_predict(X_val)]
bst_n_estimators= np.argmin(errors)

gbrt_best= GradientBoostingRegressor(max_depth=2, n_estimators=bst_n_estimators)
gbrt_best.fit(X_train, y_train)

In [None]:
gbrt = GradientBoostingRegressor(max_depth=2, warm_start=True)
min_val_error = float("inf")
error_going_up = 0
for n_estimators in range(1, 120):
    gbrt.n_estimators = n_estimators
    gbrt.fit(X_train, y_train)
    y_pred = gbrt.predict(X_val)
    val_error = mean_squared_error(y_val, y_pred)
    if val_error < min_val_error:
        min_val_error = val_error
        error_going_up = 0
    else:
        error_going_up += 1
        if error_going_up == 5:
            break # early stopping