In [1]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_moons

X, y = make_moons(n_samples=500, noise=0.30, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

## Hard Voting

In [3]:
log_reg = LogisticRegression(solver="liblinear")
svm = SVC(gamma="auto")
random_forest = RandomForestClassifier(n_estimators=10)

In [4]:
voting_clf = VotingClassifier(
                            estimators = [('lr', log_reg), ('rf', random_forest), ('svc', svm)],
                            voting = "hard")

In [5]:
voting_clf.fit(X_train, y_train)

VotingClassifier(estimators=[('lr',
                              LogisticRegression(C=1.0, class_weight=None,
                                                 dual=False, fit_intercept=True,
                                                 intercept_scaling=1,
                                                 l1_ratio=None, max_iter=100,
                                                 multi_class='warn',
                                                 n_jobs=None, penalty='l2',
                                                 random_state=None,
                                                 solver='liblinear', tol=0.0001,
                                                 verbose=0, warm_start=False)),
                             ('rf',
                              RandomForestClassifier(bootstrap=True,
                                                     class_weight=None,
                                                     criterion='g...
                                        

In [6]:
from sklearn.metrics import accuracy_score

for clf in (log_reg, svm, random_forest, voting_clf):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, accuracy_score(y_test, y_pred))

LogisticRegression 0.864
SVC 0.888
RandomForestClassifier 0.912
VotingClassifier 0.88


## Soft Voting

In [7]:
log_reg = LogisticRegression(solver="liblinear")
svm = SVC(gamma="auto", probability = True)
random_forest = RandomForestClassifier(n_estimators=10)

In [8]:
voting_clf = VotingClassifier(
                            estimators = [('lr', log_reg), ('rf', random_forest), ('svc', svm)],
                            voting = "soft")

## Bagging

In [9]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

bag_clf = BaggingClassifier(
                            DecisionTreeClassifier(), n_estimators = 500,
                            max_samples = 100, bootstrap = True, n_jobs = -1, oob_score = True)

bag_clf.fit(X_train, y_train)

BaggingClassifier(base_estimator=DecisionTreeClassifier(class_weight=None,
                                                        criterion='gini',
                                                        max_depth=None,
                                                        max_features=None,
                                                        max_leaf_nodes=None,
                                                        min_impurity_decrease=0.0,
                                                        min_impurity_split=None,
                                                        min_samples_leaf=1,
                                                        min_samples_split=2,
                                                        min_weight_fraction_leaf=0.0,
                                                        presort=False,
                                                        random_state=None,
                                                        splitter='best'),
    

In [10]:
bag_clf.predict(X_test)

array([0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1,
       1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0,
       0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0,
       0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0,
       1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1,
       0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0], dtype=int64)

In [11]:
bag_clf.oob_score_

0.9253333333333333

In [12]:
from sklearn.metrics import accuracy_score

y_pred = bag_clf.predict(X_test)
accuracy_score(y_pred, y_test)

0.92

In [13]:
bag_clf.oob_decision_function_

array([[0.31382979, 0.68617021],
       [0.37988827, 0.62011173],
       [1.        , 0.        ],
       [0.01017812, 0.98982188],
       [0.02356021, 0.97643979],
       [0.1043956 , 0.8956044 ],
       [0.41361257, 0.58638743],
       [0.06426735, 0.93573265],
       [0.94545455, 0.05454545],
       [0.85365854, 0.14634146],
       [0.58900524, 0.41099476],
       [0.03252033, 0.96747967],
       [0.72135417, 0.27864583],
       [0.845953  , 0.154047  ],
       [0.91798942, 0.08201058],
       [0.11263736, 0.88736264],
       [0.03626943, 0.96373057],
       [0.92447917, 0.07552083],
       [0.68010076, 0.31989924],
       [0.96683673, 0.03316327],
       [0.04485488, 0.95514512],
       [0.25714286, 0.74285714],
       [0.86387435, 0.13612565],
       [0.99469496, 0.00530504],
       [0.95348837, 0.04651163],
       [0.00277008, 0.99722992],
       [0.9721519 , 0.0278481 ],
       [1.        , 0.        ],
       [0.02763819, 0.97236181],
       [0.73684211, 0.26315789],
       [0.

## Random Forests

In [14]:
rnd_clf = RandomForestClassifier(n_estimators = 500, max_leaf_nodes = 16, n_jobs = -1)
rnd_clf.fit(X_train, y_train)

y_pred = rnd_clf.predict(X_test)

In [15]:
accuracy_score(y_pred, y_test)

0.912

### Important

The following BaggingClassifier is roughly equivalent to the previous RandomForestClassifier:

In [16]:
bag_clf = BaggingClassifier(
                            DecisionTreeClassifier(splitter = "random", max_leaf_nodes = 16),
                            n_estimators = 500, max_samples = 1.0, bootstrap = True, n_jobs = -1)

In [17]:
bag_clf.fit(X_train, y_train)
accuracy_score(bag_clf.predict(X_test), y_test)

0.904

## AdaBoost

In [18]:
from sklearn.ensemble import AdaBoostClassifier

ada_clf = AdaBoostClassifier(
                            DecisionTreeClassifier(max_depth=1), n_estimators=200,
                            algorithm="SAMME.R", learning_rate=0.5)

ada_clf.fit(X_train, y_train)

AdaBoostClassifier(algorithm='SAMME.R',
                   base_estimator=DecisionTreeClassifier(class_weight=None,
                                                         criterion='gini',
                                                         max_depth=1,
                                                         max_features=None,
                                                         max_leaf_nodes=None,
                                                         min_impurity_decrease=0.0,
                                                         min_impurity_split=None,
                                                         min_samples_leaf=1,
                                                         min_samples_split=2,
                                                         min_weight_fraction_leaf=0.0,
                                                         presort=False,
                                                         random_state=None,
                             

In [19]:
accuracy_score(ada_clf.predict(X_test), y_test)

0.896

## Gradient Boosting

In [22]:
import numpy as np

np.random.seed(42)

X = np.random.rand(100, 1) - 0.5
y = 3*X[:, 0]**2 + 0.05 * np.random.randn(100)

In [23]:
from sklearn.tree import DecisionTreeRegressor

tree_reg1 = DecisionTreeRegressor(max_depth=2, random_state=42)
tree_reg1.fit(X, y)

DecisionTreeRegressor(criterion='mse', max_depth=2, max_features=None,
                      max_leaf_nodes=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      presort=False, random_state=42, splitter='best')

In [24]:
y2 = y - tree_reg1.predict(X)
tree_reg2 = DecisionTreeRegressor(max_depth=2, random_state=42)
tree_reg2.fit(X, y2)

DecisionTreeRegressor(criterion='mse', max_depth=2, max_features=None,
                      max_leaf_nodes=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      presort=False, random_state=42, splitter='best')

In [25]:
y3 = y2 - tree_reg2.predict(X)
tree_reg3 = DecisionTreeRegressor(max_depth=2, random_state=42)
tree_reg3.fit(X, y3)

DecisionTreeRegressor(criterion='mse', max_depth=2, max_features=None,
                      max_leaf_nodes=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      presort=False, random_state=42, splitter='best')

In [26]:
X_new = np.array([[0.8]])

In [27]:
y_pred = sum(tree.predict(X_new) for tree in (tree_reg1, tree_reg2, tree_reg3))

In [28]:
y_pred

array([0.75026781])