In [2]:
from sklearn.datasets import fetch_openml
import numpy as np

mnist = fetch_openml('mnist_784', version = 1, as_frame = False)
mnist.target = mnist.target.astype(np.uint8)

In [3]:
from sklearn.model_selection import train_test_split

X_train_val, X_test, y_train_val, y_test = train_test_split(
    mnist.data, mnist.target, test_size = 10000, random_state = 42)

X_train, X_val, y_train, y_val = train_test_split(
    X_train_val, y_train_val, test_size = 10000, random_state = 42)

In [4]:
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.svm import LinearSVC
from sklearn.neural_network import MLPClassifier

In [5]:
random_forest_clf = RandomForestClassifier(n_estimators = 100, random_state = 42)
extra_trees_clf = ExtraTreesClassifier(n_estimators = 100, random_state = 42)
svm_clf = LinearSVC(max_iter = 100, tol = 20, random_state = 42)
mlp_clf = MLPClassifier(random_state = 42)

In [6]:
estimators = [random_forest_clf, extra_trees_clf, svm_clf, mlp_clf]
for estimator in estimators:
    print('Training the', estimator)
    estimator.fit(X_train, y_train)

Training the RandomForestClassifier(random_state=42)
Training the ExtraTreesClassifier(random_state=42)
Training the LinearSVC(max_iter=100, random_state=42, tol=20)
Training the MLPClassifier(random_state=42)


In [8]:
[estimator.score(X_val, y_val) for estimator in estimators]

[0.9692, 0.9715, 0.0997, 0.9621]

In [9]:
from sklearn.ensemble import VotingClassifier

In [12]:
voting_clf = VotingClassifier(estimators = [('random_forest_clf', random_forest_clf),
                                            ('extra_trees_clf', extra_trees_clf),
                                            ('svm_clf', svm_clf), ('mlp_clf', mlp_clf)])

In [13]:
voting_clf.fit(X_train, y_train)

In [14]:
voting_clf.score(X_val, y_val)

0.972

In [15]:
voting_clf.set_params(svm_clf = None)

In [16]:
voting_clf.estimators

[('random_forest_clf', RandomForestClassifier(random_state=42)),
 ('extra_trees_clf', ExtraTreesClassifier(random_state=42)),
 ('svm_clf', None),
 ('mlp_clf', MLPClassifier(random_state=42))]

In [17]:
voting_clf.estimators_

[RandomForestClassifier(random_state=42),
 ExtraTreesClassifier(random_state=42),
 LinearSVC(max_iter=100, random_state=42, tol=20),
 MLPClassifier(random_state=42)]

In [18]:
del voting_clf.estimators_[2]

In [19]:
voting_clf.score(X_val, y_val)

0.974

In [20]:
voting_clf.voting = 'soft'

In [21]:
voting_clf.score(X_val, y_val)

0.9692

In [25]:
voting_clf.voting = 'hard'
voting_clf.score(X_test, y_test)

0.9705

In [24]:
[estimator.score(X_test, y_test) for estimator in voting_clf.estimators_]

[0.9645, 0.9691, 0.9619]

In [26]:
X_val_predictions = np.array([estimator.predict(X_val) for estimator in estimators]).T

In [27]:
X_val_predictions

array([[5, 5, 0, 5],
       [8, 8, 0, 8],
       [2, 2, 0, 2],
       ...,
       [7, 7, 0, 7],
       [6, 6, 0, 6],
       [7, 7, 0, 7]], shape=(10000, 4), dtype=uint8)

In [28]:
rnd_forest_blender = RandomForestClassifier(n_estimators = 200, oob_score = True, random_state = 42)
rnd_forest_blender.fit(X_val_predictions, y_val)

In [29]:
rnd_forest_blender.oob_score_

0.97

In [30]:
X_test_predictions = np.array([estimator.predict(X_test) for estimator in estimators]).T

In [31]:
y_pred = rnd_forest_blender.predict(X_test_predictions)

In [32]:
from sklearn.metrics import accuracy_score

In [33]:
accuracy_score(y_pred, y_test)

0.9676