In [1]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_moons

X, y = make_moons(n_samples=500, noise=0.30, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

## Hard Voting

In [3]:
log_reg = LogisticRegression(solver="liblinear")
svm = SVC(gamma="auto")
random_forest = RandomForestClassifier(n_estimators=10)

In [4]:
voting_clf = VotingClassifier(
                            estimators = [('lr', log_reg), ('rf', random_forest), ('svc', svm)],
                            voting = "hard")

In [5]:
voting_clf.fit(X_train, y_train)

VotingClassifier(estimators=[('lr',
                              LogisticRegression(C=1.0, class_weight=None,
                                                 dual=False, fit_intercept=True,
                                                 intercept_scaling=1,
                                                 l1_ratio=None, max_iter=100,
                                                 multi_class='warn',
                                                 n_jobs=None, penalty='l2',
                                                 random_state=None,
                                                 solver='liblinear', tol=0.0001,
                                                 verbose=0, warm_start=False)),
                             ('rf',
                              RandomForestClassifier(bootstrap=True,
                                                     class_weight=None,
                                                     criterion='g...
                                        

In [6]:
from sklearn.metrics import accuracy_score

for clf in (log_reg, svm, random_forest, voting_clf):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, accuracy_score(y_test, y_pred))

LogisticRegression 0.864
SVC 0.888
RandomForestClassifier 0.88
VotingClassifier 0.896


## Soft Voting

In [7]:
log_reg = LogisticRegression(solver="liblinear")
svm = SVC(gamma="auto", probability = True)
random_forest = RandomForestClassifier(n_estimators=10)

In [8]:
voting_clf = VotingClassifier(
                            estimators = [('lr', log_reg), ('rf', random_forest), ('svc', svm)],
                            voting = "soft")

## Bagging

In [11]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

bag_clf = BaggingClassifier(
                            DecisionTreeClassifier(), n_estimators = 500,
                            max_samples = 100, bootstrap = True, n_jobs = -1, oob_score = True)

bag_clf.fit(X_train, y_train)

BaggingClassifier(base_estimator=DecisionTreeClassifier(class_weight=None,
                                                        criterion='gini',
                                                        max_depth=None,
                                                        max_features=None,
                                                        max_leaf_nodes=None,
                                                        min_impurity_decrease=0.0,
                                                        min_impurity_split=None,
                                                        min_samples_leaf=1,
                                                        min_samples_split=2,
                                                        min_weight_fraction_leaf=0.0,
                                                        presort=False,
                                                        random_state=None,
                                                        splitter='best'),
    

In [12]:
bag_clf.predict(X_test)

array([1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1,
       1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0,
       0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0,
       0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0,
       1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1,
       1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0], dtype=int64)

In [13]:
bag_clf.oob_score_

0.9253333333333333

In [14]:
from sklearn.metrics import accuracy_score

y_pred = bag_clf.predict(X_test)
accuracy_score(y_pred, y_test)

0.92

In [15]:
bag_clf.oob_decision_function_

array([[0.32975871, 0.67024129],
       [0.38717949, 0.61282051],
       [1.        , 0.        ],
       [0.0183727 , 0.9816273 ],
       [0.03125   , 0.96875   ],
       [0.08311688, 0.91688312],
       [0.4       , 0.6       ],
       [0.06718346, 0.93281654],
       [0.95595855, 0.04404145],
       [0.86010363, 0.13989637],
       [0.5994898 , 0.4005102 ],
       [0.0626703 , 0.9373297 ],
       [0.71313673, 0.28686327],
       [0.84020619, 0.15979381],
       [0.92111959, 0.07888041],
       [0.11428571, 0.88571429],
       [0.03174603, 0.96825397],
       [0.93455497, 0.06544503],
       [0.68865435, 0.31134565],
       [0.97      , 0.03      ],
       [0.04545455, 0.95454545],
       [0.26086957, 0.73913043],
       [0.86910995, 0.13089005],
       [0.9921875 , 0.0078125 ],
       [0.96605744, 0.03394256],
       [0.0026178 , 0.9973822 ],
       [0.96747967, 0.03252033],
       [0.99742268, 0.00257732],
       [0.01570681, 0.98429319],
       [0.7007874 , 0.2992126 ],
       [0.