In [3]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split

In [23]:
X, y = make_moons(n_samples=1000, noise=0.3)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [24]:
log_clf = LogisticRegression()
rnd_clf = RandomForestClassifier()
svm_clf = SVC(probability=True)

voting_clf = VotingClassifier(estimators=[('lr', log_clf), ('rf', rnd_clf), ('svm', svm_clf)],
                             voting='soft')
voting_clf.fit(X_train, y_train)

VotingClassifier(estimators=[('lr', LogisticRegression()),
                             ('rf', RandomForestClassifier()),
                             ('svm', SVC(probability=True))],
                 voting='soft')

In [25]:
from sklearn.metrics import accuracy_score
for clf in (log_clf, rnd_clf, svm_clf, voting_clf): 
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, accuracy_score(y_test, y_pred))

LogisticRegression 0.83
RandomForestClassifier 0.895
SVC 0.91
VotingClassifier 0.89


In [26]:
from sklearn.ensemble import BaggingClassifier 
from sklearn.tree import DecisionTreeClassifier
bag_clf = BaggingClassifier( DecisionTreeClassifier(), n_estimators=500, 
                            max_samples=100, bootstrap=True, n_jobs=-1)
bag_clf.fit(X_train, y_train) 
y_pred = bag_clf.predict(X_test)
accuracy_score(y_test, y_pred)

0.9

In [27]:
bag_clf = BaggingClassifier( DecisionTreeClassifier(), n_estimators=500, 
                            max_samples=100, bootstrap=True, n_jobs=-1,
                           oob_score=True)
bag_clf.fit(X_train, y_train)
bag_clf.oob_score_

0.8975

In [28]:
y_pred = bag_clf.predict(X_test)
accuracy_score(y_test, y_pred)

0.905

In [29]:
bag_clf.oob_decision_function_

array([[0.95238095, 0.04761905],
       [0.02272727, 0.97727273],
       [0.12780269, 0.87219731],
       ...,
       [0.21076233, 0.78923767],
       [0.59734513, 0.40265487],
       [0.1047836 , 0.8952164 ]])

In [30]:
from sklearn.ensemble import RandomForestClassifier
rnd_clf = RandomForestClassifier(n_estimators=500, max_leaf_nodes=16, n_jobs=-1)
rnd_clf.fit(X_train, y_train) 
y_pred_rf = rnd_clf.predict(X_test)

# equal to bag_clf = BaggingClassifier( DecisionTreeClassifier(splitter="random", max_leaf_nodes=16), 
#                             n_estimators=500, max_samples=1.0, bootstrap=True, n_jobs=-1)

In [31]:
from sklearn.datasets import load_iris
iris = load_iris()
rnd_clf = RandomForestClassifier(n_estimators=500, n_jobs=-1)
rnd_clf.fit(iris["data"], iris["target"])
for name, score in zip(iris["feature_names"], rnd_clf.feature_importances_):
    print(name, score)

sepal length (cm) 0.08942438893769376
sepal width (cm) 0.02000404529619185
petal length (cm) 0.44431032229498935
petal width (cm) 0.446261243471125


In [32]:
from sklearn.ensemble import AdaBoostClassifier
ada_clf = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), 
                             n_estimators=200, algorithm="SAMME.R", learning_rate=0.5)
ada_clf.fit(X_train, y_train)

AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=1),
                   learning_rate=0.5, n_estimators=200)

In [34]:
from sklearn.tree import DecisionTreeRegressor
tree1_clf = DecisionTreeRegressor(max_depth=2)
tree1_clf.fit(X_train, y_train)

y2 = y_train - tree1_clf.predict(X_train)
tree2_clf = DecisionTreeRegressor(max_depth=2) 
tree2_clf.fit(X_train, y2)

y3 = y2 - tree1_clf.predict(X_train)
tree3_clf = DecisionTreeRegressor(max_depth=2) 
tree3_clf.fit(X_train, y3)

y_pred = sum(tree.predict(X_test) for tree in (tree1_clf, tree2_clf, tree3_clf))

In [35]:
from sklearn.ensemble import GradientBoostingRegressor
gbrt = GradientBoostingRegressor(max_depth=2, n_estimators=3, learning_rate=1.0)
gbrt.fit(X_train, y_train)

GradientBoostingRegressor(learning_rate=1.0, max_depth=2, n_estimators=3)

In [37]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

gbrt = GradientBoostingRegressor(max_depth=2, n_estimators=120)
gbrt.fit(X_train, y_train)
errors = [mean_squared_error(y_test, y_pred) 
          for y_pred in gbrt.staged_predict(X_test)] 
bst_n_estimators = np.argmin(errors) + 1

gbrt_best = GradientBoostingRegressor(max_depth=2,n_estimators=bst_n_estimators)
gbrt_best.fit(X_train, y_train)

GradientBoostingRegressor(max_depth=2, n_estimators=84)

In [41]:
import xgboost
xgb_reg = xgboost.XGBRegressor() 
xgb_reg.fit(X_train, y_train) 
y_pred = xgb_reg.predict(X_test)