In [1]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_moons

X, y = make_moons(n_samples=500, noise=0.30, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

## Hard Voting

In [3]:
log_reg = LogisticRegression(solver="liblinear")
svm = SVC(gamma="auto")
random_forest = RandomForestClassifier(n_estimators=10)

In [4]:
voting_clf = VotingClassifier(
                            estimators = [('lr', log_reg), ('rf', random_forest), ('svc', svm)],
                            voting = "hard")

In [5]:
voting_clf.fit(X_train, y_train)

VotingClassifier(estimators=[('lr',
                              LogisticRegression(C=1.0, class_weight=None,
                                                 dual=False, fit_intercept=True,
                                                 intercept_scaling=1,
                                                 l1_ratio=None, max_iter=100,
                                                 multi_class='warn',
                                                 n_jobs=None, penalty='l2',
                                                 random_state=None,
                                                 solver='liblinear', tol=0.0001,
                                                 verbose=0, warm_start=False)),
                             ('rf',
                              RandomForestClassifier(bootstrap=True,
                                                     class_weight=None,
                                                     criterion='g...
                                        

In [6]:
from sklearn.metrics import accuracy_score

for clf in (log_reg, svm, random_forest, voting_clf):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, accuracy_score(y_test, y_pred))

LogisticRegression 0.864
SVC 0.888
RandomForestClassifier 0.904
VotingClassifier 0.896


## Soft Voting

In [7]:
log_reg = LogisticRegression(solver="liblinear")
svm = SVC(gamma="auto", probability = True)
random_forest = RandomForestClassifier(n_estimators=10)

In [8]:
voting_clf = VotingClassifier(
                            estimators = [('lr', log_reg), ('rf', random_forest), ('svc', svm)],
                            voting = "soft")

## Bagging

In [9]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

bag_clf = BaggingClassifier(
                            DecisionTreeClassifier(), n_estimators = 500,
                            max_samples = 100, bootstrap = True, n_jobs = -1, oob_score = True)

bag_clf.fit(X_train, y_train)

BaggingClassifier(base_estimator=DecisionTreeClassifier(class_weight=None,
                                                        criterion='gini',
                                                        max_depth=None,
                                                        max_features=None,
                                                        max_leaf_nodes=None,
                                                        min_impurity_decrease=0.0,
                                                        min_impurity_split=None,
                                                        min_samples_leaf=1,
                                                        min_samples_split=2,
                                                        min_weight_fraction_leaf=0.0,
                                                        presort=False,
                                                        random_state=None,
                                                        splitter='best'),
    

In [10]:
bag_clf.predict(X_test)

array([0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1,
       1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0,
       0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0,
       0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0,
       1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1,
       1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0], dtype=int64)

In [11]:
bag_clf.oob_score_

0.9253333333333333

In [12]:
from sklearn.metrics import accuracy_score

y_pred = bag_clf.predict(X_test)
accuracy_score(y_pred, y_test)

0.912

In [13]:
bag_clf.oob_decision_function_

array([[0.36507937, 0.63492063],
       [0.38147139, 0.61852861],
       [1.        , 0.        ],
       [0.00515464, 0.99484536],
       [0.01282051, 0.98717949],
       [0.09973753, 0.90026247],
       [0.40203562, 0.59796438],
       [0.08115183, 0.91884817],
       [0.94579946, 0.05420054],
       [0.83378016, 0.16621984],
       [0.5890411 , 0.4109589 ],
       [0.03367876, 0.96632124],
       [0.72351421, 0.27648579],
       [0.8537234 , 0.1462766 ],
       [0.91269841, 0.08730159],
       [0.10471204, 0.89528796],
       [0.03282828, 0.96717172],
       [0.92875318, 0.07124682],
       [0.66149871, 0.33850129],
       [0.94850949, 0.05149051],
       [0.05292479, 0.94707521],
       [0.2265625 , 0.7734375 ],
       [0.90106952, 0.09893048],
       [0.99234694, 0.00765306],
       [0.96551724, 0.03448276],
       [0.        , 1.        ],
       [0.95918367, 0.04081633],
       [1.        , 0.        ],
       [0.02046036, 0.97953964],
       [0.68298969, 0.31701031],
       [0.

## Random Forests

In [21]:
rnd_clf = RandomForestClassifier(n_estimators = 500, max_leaf_nodes = 16, n_jobs = -1)
rnd_clf.fit(X_train, y_train)

y_pred = rnd_clf.predict(X_test)

In [22]:
accuracy_score(y_pred, y_test)

0.92

### Important

The following BaggingClassifier is roughly equivalent to the previous RandomForestClassifier:

In [16]:
bag_clf = BaggingClassifier(
                            DecisionTreeClassifier(splitter = "random", max_leaf_nodes = 16),
                            n_estimators = 500, max_samples = 1.0, bootstrap = True, n_jobs = -1)

In [19]:
bag_clf.fit(X_train, y_train)
accuracy_score(bag_clf.predict(X_test), y_test)

0.904