In [2]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_moons

np.random.seed(42)

X, y = make_moons(n_samples=500, noise=0.30, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [6]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

log_clf = LogisticRegression()
rnd_clf = RandomForestClassifier()
svm_clf = SVC()

voting_clf = VotingClassifier(estimators = [('lr', log_clf), ('rf', rnd_clf), ('svc', svm_clf)], voting='hard')

voting_clf.fit(X_train, y_train)



VotingClassifier(estimators=[('lr', LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False)), ('rf', RandomFo...f', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False))],
         flatten_transform=None, n_jobs=None, voting='hard', weights=None)

In [7]:
from sklearn.metrics import accuracy_score

for clf in (log_clf, rnd_clf, svm_clf, voting_clf):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, accuracy_score(y_test, y_pred))

LogisticRegression 0.864
RandomForestClassifier 0.904
SVC 0.888
VotingClassifier 0.896




## Bagging and pasting

In [10]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

bag_clf = BaggingClassifier(DecisionTreeClassifier(), n_estimators=500, max_samples=100, bootstrap=True, n_jobs=-1)
bag_clf.fit(X_train, y_train)
y_pred = bag_clf.predict(X_test)

In [11]:
print('Bagging accuracy score: ',accuracy_score(y_test, y_pred))

Bagging accuracy score:  0.904


In [12]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

bag_clf = BaggingClassifier(DecisionTreeClassifier(), n_estimators=500, max_samples=100, bootstrap=False, n_jobs=-1)
bag_clf.fit(X_train, y_train)
y_pred = bag_clf.predict(X_test)

In [13]:
print('Pasting accurcy score: ', accuracy_score(y_test, y_pred))

Pasting accurcy score:  0.904


# Random forests

In [14]:
from sklearn.ensemble import RandomForestClassifier

rnd_clf = RandomForestClassifier(n_estimators=500, max_leaf_nodes=16, n_jobs=-1)
rnd_clf.fit(X_train, y_train)
y_pred = rnd_clf.predict(X_test)

In [15]:
print('Random forest classifier accuracy score: ',accuracy_score(y_test, y_pred))

Random forest classifier accuracy score:  0.92


In [9]:
from sklearn.datasets import fetch_openml

mnist = fetch_openml('mnist_784', version=1)

In [10]:
mnist.target = mnist.target.astype(np.int64)

In [12]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

X_train, X_test, y_train, y_test = train_test_split(mnist.data, mnist.target, random_state=42)
rnd_clf = RandomForestClassifier()
ext_tree = ExtraTreesClassifier()
voting_clf = VotingClassifier(estimators = [('rf',rnd_clf), ('et', ext_tree)], voting='hard')

In [13]:
rnd_clf.fit(X_train, y_train)
y_pred = rnd_clf.predict(X_test)
print('Random forest accuracy score: ',accuracy_score(y_test, y_pred))



Random forest accuracy score:  0.9441714285714286


In [14]:
ext_tree.fit(X_train, y_train)
y_pred = ext_tree.predict(X_test)
print('Extra trees classifier accuracy score: ', accuracy_score(y_test, y_pred))



Extra trees classifier accuracy score:  0.9481714285714286


In [15]:
voting_clf.fit(X_train, y_train)
y_pred = voting_clf.predict(X_test)
print('Voting classifier accuracy score: ',accuracy_score(y_test, y_pred))

Voting classifier accuracy score:  0.9390857142857143


In [16]:
voting_clf = VotingClassifier(estimators = [('rf',rnd_clf), ('et', ext_tree)], voting='soft')
voting_clf.fit(X_train, y_train)
y_pred = voting_clf.predict(X_test)
print('Voting classifier accuracy score: ',accuracy_score(y_test, y_pred))

Voting classifier accuracy score:  0.9606285714285714


In [18]:
X_train_val, X_test, y_train_val, y_test = train_test_split(mnist.data, mnist.target, test_size=10000, random_state=42)

In [19]:
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=10000, random_state=42)

In [22]:
rnd_clf.fit(X_train, y_train)
ext_tree.fit(X_train, y_train)

ExtraTreesClassifier(bootstrap=False, class_weight=None, criterion='gini',
           max_depth=None, max_features='auto', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=None,
           oob_score=False, random_state=None, verbose=0, warm_start=False)

In [25]:
import numpy as np

estimators = [rnd_clf, ext_tree]

X_val_predictors = np.empty((len(X_val), len(estimators)), dtype=np.float32)

for index, estimator in enumerate(estimators):
    X_val_predictors[:, index] = estimator.predict(X_val)

In [26]:
X_val_predictors

array([[5., 5.],
       [8., 8.],
       [2., 2.],
       ...,
       [7., 4.],
       [6., 6.],
       [7., 7.]], dtype=float32)

In [34]:
rnd_clf_blender = RandomForestClassifier(n_estimators=200, oob_score=True, random_state=42)
rnd_clf_blender.fit(X_val_predictors, y_val)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=200, n_jobs=None,
            oob_score=True, random_state=42, verbose=0, warm_start=False)

In [35]:
rnd_clf_blender.oob_score_

0.9505

In [30]:
X_test_predictions = np.empty((len(X_test), len(estimators)), dtype=np.float32)

for index, estimator in enumerate(estimators):
    X_test_predictions[:, index] = estimator.predict(X_test)

In [31]:
X_test_predictions

array([[8., 8.],
       [4., 4.],
       [8., 8.],
       ...,
       [3., 3.],
       [8., 8.],
       [3., 3.]], dtype=float32)

In [37]:
y_pred = rnd_clf_blender.predict(X_test_predictions)

In [38]:
print('Blender score: ',accuracy_score(y_test, y_pred))

Blender score:  0.9463
