### VOTING CLASSIFIERS

In [1]:
from sklearn.datasets import make_moons
X, y = make_moons(n_samples = 100, noise = 0.15)

In [2]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

In [3]:
log_clf = LogisticRegression()
rnd_clf = RandomForestClassifier()
svm_clf = SVC(probability=True)

In [4]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [5]:
voting_clf = VotingClassifier(
                estimators=[('lr',log_clf),('rf',rnd_clf),('svc',svm_clf)],
                voting='soft')
voting_clf.fit(X_train, y_train)

VotingClassifier(estimators=[('lr', LogisticRegression()),
                             ('rf', RandomForestClassifier()),
                             ('svc', SVC(probability=True))],
                 voting='soft')

In [6]:
from sklearn.metrics import accuracy_score
for clf in (log_clf, rnd_clf, svm_clf, voting_clf):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, accuracy_score(y_test, y_pred))

LogisticRegression 0.88
RandomForestClassifier 0.92
SVC 0.96
VotingClassifier 0.92


### BAGGING AND PASTING

In [7]:
# Training the same algorithm on different random subsets of training set

In [8]:
# when sampling is performed with replacement, this method is called bagging
# (short for bootstrap), when sampling is performed without replacement, it is
# called pasting

In [9]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

In [10]:
bag_clf = BaggingClassifier(
    DecisionTreeClassifier(), n_estimators=500,
    max_samples=0.1, bootstrap=True, n_jobs=-1)
# n_jobs=-1 tells scikit-learn to use all available cores
bag_clf.fit(X_train, y_train)
y_pred = bag_clf.predict(X_test)

In [11]:
# max_samples can alternatively be set to a float between 0.0 and 1.0, in which case the max number of instances
# to sample is equal to the size of the training set times max_samples.

### OUT OF BAG EVALUATION

In [12]:
# With bagging, some instances may be sampled several times for any given predictor,
# while others may not be sampled at all. By default a BaggingClassifier samples m
# training instances with replacement (bootstrap=True), where m is the size of the
# training set. This means that only about 63% of the training instances are sampled on
# average for each predictor. The remaining 37% of the training instances that are not
# sampled are called out-of-bag (oob) instances. Note that they are not the same 37%
# for all predictors.

In [13]:
# Since a predictor never sees the oob instances during training, it can be evaluated on
# these instances, without the need for a separate validation set. You can evaluate the
# ensemble itself by averaging out the oob evaluations of each predictor.

In [14]:
# In Scikit-Learn, you can set oob_score=True when creating a BaggingClassifier to
# request an automatic oob evaluation after training. The following code demonstrates
# this. The resulting evaluation score is available through the oob_score_ variable:

In [15]:
bag_clf = BaggingClassifier(DecisionTreeClassifier(),
                           n_estimators=500,
                           bootstrap=True,
                           n_jobs=-1,
                           oob_score=True)
bag_clf.fit(X_train, y_train)
bag_clf.oob_score_

0.9333333333333333

In [16]:
from sklearn.metrics import accuracy_score
y_pred = bag_clf.predict(X_test)
accuracy_score(y_test, y_pred)

0.92

In [17]:
# The oob decision function for each training instance is also available through the
# oob_decision_function_ variable. In this case (since the base estimator has a pre
# dict_proba() method), the decision function returns the class probabilities for each
# training instance. For example, the oob evaluation estimates that the first training
# instance has a 68.25% probability of belonging to the positive class (and 31.75% of
# belonging to the negative class):

bag_clf.oob_decision_function_

array([[0.97883598, 0.02116402],
       [0.        , 1.        ],
       [0.        , 1.        ],
       [0.07222222, 0.92777778],
       [0.        , 1.        ],
       [1.        , 0.        ],
       [0.5027933 , 0.4972067 ],
       [1.        , 0.        ],
       [0.        , 1.        ],
       [1.        , 0.        ],
       [0.00537634, 0.99462366],
       [0.        , 1.        ],
       [0.55445545, 0.44554455],
       [0.41530055, 0.58469945],
       [0.        , 1.        ],
       [0.99447514, 0.00552486],
       [0.97512438, 0.02487562],
       [1.        , 0.        ],
       [0.0052356 , 0.9947644 ],
       [1.        , 0.        ],
       [0.27717391, 0.72282609],
       [0.79411765, 0.20588235],
       [0.01111111, 0.98888889],
       [0.96855346, 0.03144654],
       [0.43814433, 0.56185567],
       [0.91860465, 0.08139535],
       [0.97814208, 0.02185792],
       [0.03108808, 0.96891192],
       [0.95135135, 0.04864865],
       [0.94382022, 0.05617978],
       [1.

### RANDOM PATCHES AND RANDOM SUBSPACES

In [18]:
# The BaggingClassifier class supports sampling the features as well. Sampling is
# controlled by two hyperparameters: max_features and bootstrap_features. They
# work the same way as max_samples and bootstrap, but for feature sampling instead
# of instance sampling. Thus, each predictor will be trained on a random subset of the
# input features.

In [19]:
# This technique is particularly useful when you are dealing with high-dimensional
# inputs (such as images). Sampling both training instances and features is called the
# Random Patches method. Keeping all training instances (by setting bootstrap=False
# and max_samples=1.0) but sampling features (by setting bootstrap_features to
# True and/or max_features to a value smaller than 1.0) is called the Random Subspaces
# method.

### RANDOM FORESTS

In [20]:
from sklearn.ensemble import RandomForestClassifier

rnd_clf = RandomForestClassifier(n_estimators=500, max_leaf_nodes=16, n_jobs=-1)
rnd_clf.fit(X_train, y_train)

y_pred_rf = rnd_clf.predict(X_test)

In [21]:
print("Testing Score", accuracy_score(y_pred, y_test))

Testing Score 0.92


In [22]:
rnd_clf.score(X_train, y_train)

1.0

In [23]:
# With a few exceptions, a RandomForestClassifier has all the hyperparameters of a
# DecisionTreeClassifier (to control how trees are grown), plus all the hyperparameters
# of a BaggingClassifier to control the ensemble itself.

### FEATURE IMPORTANCE

In [None]:
from sklearn.datasets import load_iris
iris = load_iris()
rnd_clf = RandomForestClassifier(n_estimators=500)