In [17]:
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier

iris = load_iris()

X = iris.data
y = iris.target

In [18]:
# Voting

from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

log_clf = LogisticRegression()
rnd_clf = RandomForestClassifier()
svm_clf = SVC()

voting_clf = VotingClassifier(
    estimators=[('lr', log_clf), ('rf', rnd_clf), ('svc', svm_clf)],
    voting='hard'
)

voting_clf.fit(X, y)



VotingClassifier(estimators=[('lr',
                              LogisticRegression(C=1.0, class_weight=None,
                                                 dual=False, fit_intercept=True,
                                                 intercept_scaling=1,
                                                 l1_ratio=None, max_iter=100,
                                                 multi_class='warn',
                                                 n_jobs=None, penalty='l2',
                                                 random_state=None,
                                                 solver='warn', tol=0.0001,
                                                 verbose=0, warm_start=False)),
                             ('rf',
                              RandomForestClassifier(bootstrap=True,
                                                     class_weight=None,
                                                     criterion='gini',...
                                        

In [22]:
voting_clf.predict(X[-1:])

array([2])

In [23]:
# Bagging

from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

bag_clf = BaggingClassifier(
    DecisionTreeClassifier(), n_estimators=500,
    max_samples=100, bootstrap=True, n_jobs=-1, oob_score=True
)

bag_clf.fit(X,y)
print(bag_clf.predict(X[-1:]))

# Out of bag
print(bag_clf.oob_score_)

[2]
0.96


In [30]:
# Random Forests == Bagging of Decision Tree

from sklearn.ensemble import RandomForestClassifier

rnd_clf = RandomForestClassifier(n_estimators=500, max_leaf_nodes=16, n_jobs=-1)
rnd_clf.fit(X,y)
print(rnd_clf.predict(X[-20:]))


[2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2]


In [25]:
rnd_clf.fit(iris['data'],y)

for name, score in zip(iris['feature_names'], rnd_clf.feature_importances_):
    print(name, score)

sepal length (cm) 0.09740842914877314
sepal width (cm) 0.023936688951077444
petal length (cm) 0.4210851087151574
petal width (cm) 0.457569773184992


In [32]:
# Boosting

## AdaBoosting

from sklearn.ensemble import AdaBoostClassifier

ada_clf = AdaBoostClassifier(
    DecisionTreeClassifier(max_depth=1), n_estimators=200,
    algorithm='SAMME.R', learning_rate=0.5
)

ada_clf.fit(X,y)
print(ada_clf.predict(X[-20:]), y[-20:])

[2 2 2 1 1 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2] [2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2]


In [38]:
## Gradient Boosting
## fit the residual errors maed by the previous predictor

from sklearn.tree import DecisionTreeRegressor


tree_reg1 = DecisionTreeRegressor(max_depth=2)
tree_reg1.fit(X,y)

y2 = y - tree_reg1.predict(X)

tree_reg2 = DecisionTreeRegressor(max_depth=2)
tree_reg2.fit(X,y2)

y_pred = sum(tree.predict(X) for tree in (tree_reg1, tree_reg2))

from sklearn.ensemble import GradientBoostingRegressor

gbrt = GradientBoostingRegressor(max_depth=2, n_estimators=120, learning_rate=1.0)

import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

X_train, X_val, y_train, y_val = train_test_split(X,y)

gbrt.fit(X_train, y_train)

errors = [mean_squared_error(y_val, y_pred) for y_pred in gbrt.staged_predict(X_val)]
print(errors)
best_n_estimators = np.argmin(errors)
print(best_n_estimators)

[0.07303974221267455, 0.08022899074725032, 0.08098567318219749, 0.08209984855372128, 0.0854189580727079, 0.08515105833272861, 0.0852302566491742, 0.08505579304353011, 0.08478050807014986, 0.08542962604448674, 0.08540486110324295, 0.08498140671213517, 0.08527929375120978, 0.08530233687209562, 0.08547430536045572, 0.08552439841428615, 0.08560412671631466, 0.08531940656194066, 0.08536788108224565, 0.08536889723786337, 0.08603223879985877, 0.08634888905635055, 0.08635062629625095, 0.08628332306831957, 0.08633166401519414, 0.08634062661284556, 0.08626494659869503, 0.08620602214819373, 0.08622066949681403, 0.08619643726143772, 0.08616490943400529, 0.08612759595796733, 0.08613305798004461, 0.08610633711153905, 0.08609750900880718, 0.08594110375826418, 0.08602594173136416, 0.08603229045365769, 0.08605319272769119, 0.08605245066021856, 0.08605836166518477, 0.08605472482537385, 0.08611468823370229, 0.08612178940516342, 0.08612087111458133, 0.08612813064471606, 0.08610293211989166, 0.086121117737

In [None]:
# Stacking