In [1]:
from sklearn.datasets import fetch_openml

mnist = fetch_openml("mnist_784", version=1)

In [2]:
x_train, x_val, x_test = mnist["data"][:50000], mnist["data"][50000 : 60000], mnist["data"][60000:]
y_train, y_val, y_test = mnist["target"][:50000], mnist["target"][50000 : 60000], mnist["target"][60000:]

In [4]:
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.svm import LinearSVC
from sklearn.neural_network import MLPClassifier

In [6]:
forest_clf = RandomForestClassifier()

In [10]:
import numpy as np

from sklearn.model_selection import GridSearchCV

params_grid = [
    {"n_estimators": np.arange(50, 201, 50),
     "max_depth": [2, 6, 10], 
     "random_state": [42]}
]

grid_search = GridSearchCV(forest_clf, params_grid, cv=3, scoring="accuracy")
grid_search.fit(x_train, y_train)

In [11]:
forest_clf = grid_search.best_estimator_

In [15]:
extra_clf = ExtraTreesClassifier(n_estimators=100, random_state=42)
extra_clf.fit(x_train, y_train)

In [17]:
svm_clf = LinearSVC(max_iter=100, tol=20, random_state=42)
svm_clf.fit(x_train, y_train)

In [18]:
mlp_clf = MLPClassifier(random_state=42)
mlp_clf.fit(x_train, y_train)

In [19]:
estimators = [forest_clf, extra_clf, svm_clf, mlp_clf]

In [20]:
[estimator.score(x_val, y_val) for estimator in estimators]

[0.9524, 0.9743, 0.8662, 0.9669]

In [37]:
from sklearn.ensemble import VotingClassifier

forest_clf = RandomForestClassifier(n_estimators = 100, random_state=42)
extra_clf = ExtraTreesClassifier(n_estimators=100, random_state=42)
svm_clf = LinearSVC(max_iter=100, tol=20, random_state=42)
mlp_clf = MLPClassifier(random_state=42)

named_estimators = [
    ("forest_clf", forest_clf),
    ("extra_clf", extra_clf),
    ("svm_clf", svm_clf),
    ("mlp_clf", mlp_clf)
]

In [38]:
voting_clf = VotingClassifier(named_estimators)
voting_clf.fit(x_train, y_train)

In [39]:
voting_clf.score(x_val, y_val)

0.9758

In [40]:
[estimator.score(x_val, y_val) for estimator in voting_clf.estimators_]

[0.0, 0.0, 0.0, 0.0]

In [41]:
voting_clf.set_params(svm_clf=None)

In [42]:
voting_clf.estimators

[('forest_clf', RandomForestClassifier(random_state=42)),
 ('extra_clf', ExtraTreesClassifier(random_state=42)),
 ('svm_clf', None),
 ('mlp_clf', MLPClassifier(random_state=42))]

In [44]:
del voting_clf.estimators_[2]
voting_clf.score(x_val, y_val)

0.9772

In [48]:
voting_clf.voting = "soft"
voting_clf.score(x_val, y_val)

0.9714

In [49]:
voting_clf.voting = "hard"
voting_clf.score(x_test, y_test)

0.9732

In [50]:
[estimator.score(x_test, y_test) for estimator in voting_clf.estimators_]

[0.0, 0.0, 0.0]

# Exersize 9 - Stacking Ensemble

In [63]:
x_val_predictions = np.empty((len(x_val), len(estimators)), dtype=np.float32)

for index, estimator in enumerate(estimators):
    x_val_predictions[:, index] = estimator.predict(x_val)

In [64]:
x_val_predictions

array([[3., 3., 3., 3.],
       [8., 8., 8., 8.],
       [6., 6., 6., 6.],
       ...,
       [5., 5., 5., 5.],
       [6., 6., 6., 6.],
       [8., 8., 8., 8.]], dtype=float32)

In [65]:
forest_blender = RandomForestClassifier(n_estimators=200, oob_score=True, 
                                        random_state=42)
forest_blender.fit(x_val_predictions, y_val)

In [66]:
forest_blender.oob_score_

0.9711

In [67]:
x_test_predictions = np.empty((len(x_test), len(estimators)), dtype=np.float32)

for index, estimator in enumerate(estimators):
    X_test_predictions[:, index] = estimator.predict(X_test)

NameError: name 'X_test' is not defined

In [59]:
y_pred = rnd_forest_blender.predict(x_test_predictions)

NameError: name 'rnd_forest_blender' is not defined

In [55]:
from sklearn.metrics import accuracy_score

In [None]:
accuracy_score(y_test, y_pred)