### VOTING CLASSIFIERS

In [33]:
from sklearn.datasets import make_moons
X, y = make_moons(n_samples = 100, noise = 0.15)

In [34]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

In [35]:
log_clf = LogisticRegression()
rnd_clf = RandomForestClassifier()
svm_clf = SVC(probability=True)

In [36]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [37]:
voting_clf = VotingClassifier(
                estimators=[('lr',log_clf),('rf',rnd_clf),('svc',svm_clf)],
                voting='soft')
voting_clf.fit(X_train, y_train)

VotingClassifier(estimators=[('lr', LogisticRegression()),
                             ('rf', RandomForestClassifier()),
                             ('svc', SVC(probability=True))],
                 voting='soft')

In [38]:
from sklearn.metrics import accuracy_score
for clf in (log_clf, rnd_clf, svm_clf, voting_clf):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, accuracy_score(y_test, y_pred))

LogisticRegression 0.84
RandomForestClassifier 0.96
SVC 0.96
VotingClassifier 0.96


### BAGGING AND PASTING

In [39]:
# Training the same algorithm on different random subsets of training set

In [40]:
# when sampling is performed with replacement, this method is called bagging
# (short for bootstrap), when sampling is performed without replacement, it is
# called pasting

In [41]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

In [45]:
bag_clf = BaggingClassifier(
    DecisionTreeClassifier(), n_estimators=500,
    max_samples=0.1, bootstrap=True, n_jobs=-1)
# n_jobs=-1 tells scikit-learn to use all available cores
bag_clf.fit(X_train, y_train)
y_pred = bag_clf.predict(X_test)

In [47]:
# max_samples can alternatively be set to a float between 0.0 and 1.0, in which case the max number of instances
# to sample is equal to the size of the training set times max_samples.