In [4]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.svm import LinearSVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import VotingClassifier
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV

In [5]:
mnist = fetch_openml('mnist_784', version=1)
mnist.target = mnist.target.astype(np.uint8)

X_train_val, X_test, y_train_val, y_test = train_test_split(
    mnist.data, mnist.target, test_size=10000, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(
    X_train_val, y_train_val, test_size=10000, random_state=42)

In [6]:
random_forest_clf = RandomForestClassifier(n_estimators=100, random_state=42)
extra_trees_clf = ExtraTreesClassifier(n_estimators=100, random_state=42)
mlp_clf = MLPClassifier(random_state=42)

In [21]:
for estimator in estimators:
    print("Training the", estimator)
    estimator.fit(X_train, y_train)

Training the RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=42, verbose=0,
                       warm_start=False)
Training the ExtraTreesClassifier(bootstrap=False, ccp_alpha=0.0, class_weight=None,
                     criterion='gini', max_depth=None, max_features='auto',
                     max_leaf_nodes=None, max_samples=None,
                     min_impurity_decrease=0.0, min_impurity_split=None,
                     min_samples_leaf=1, min_samples_split=2,
                     min_weight_fraction_leaf=0.0, n_estimators=100,
                 

In [7]:
named_estimators = [
    ("random_forest_clf", random_forest_clf),
    ("extra_trees_clf", extra_trees_clf),
    ("mlp_clf", mlp_clf),
]

estimators = [random_forest_clf, extra_trees_clf, mlp_clf]

In [8]:
voting_clf = VotingClassifier(named_estimators)

In [9]:
voting_clf.fit(X_train, y_train)

VotingClassifier(estimators=[('random_forest_clf',
                              RandomForestClassifier(bootstrap=True,
                                                     ccp_alpha=0.0,
                                                     class_weight=None,
                                                     criterion='gini',
                                                     max_depth=None,
                                                     max_features='auto',
                                                     max_leaf_nodes=None,
                                                     max_samples=None,
                                                     min_impurity_decrease=0.0,
                                                     min_impurity_split=None,
                                                     min_samples_leaf=1,
                                                     min_samples_split=2,
                                                     min_weight_fraction_lea

In [10]:
voting_clf.score(X_val, y_val)

0.9738

In [11]:
[estimator.score(X_val, y_val) for estimator in voting_clf.estimators_]

[0.9692, 0.9715, 0.9655]

In [19]:
X_val_predictions = np.empty((len(X_val), len(estimators)), dtype=np.float32)

In [22]:
for index, estimator in enumerate(estimators):
    X_val_predictions[:, index] = estimator.predict(X_val)

In [23]:
X_val_predictions

array([[5., 5., 5.],
       [8., 8., 8.],
       [2., 2., 2.],
       ...,
       [7., 7., 7.],
       [6., 6., 6.],
       [7., 7., 7.]], dtype=float32)

In [24]:
random_forest_blender = RandomForestClassifier(oob_score=True, random_state=42, warm_start=True)

In [39]:
X_test_predictions = np.empty((len(X_test), len(estimators)), dtype=np.float32)

for index, estimator in enumerate(estimators):
    X_test_predictions[:, index] = estimator.predict(X_test)

In [41]:
min_val_error = float("inf")
error_going_up = 0
for n_estimators in range(21, 200):
    random_forest_blender.n_estimators = n_estimators
    random_forest_blender.fit(X_val_predictions, y_val)
    y_pred = random_forest_blender.predict(X_test_predictions)
    val_error = mean_squared_error(y_test, y_pred)
    if val_error < min_val_error:
        min_val_error = val_error
        error_going_up = 0
    else:
        error_going_up += 1
        if error_going_up == 5:
            break  # early stopping

In [42]:
random_forest_blender.oob_score_

0.9695

In [44]:
y_pred = random_forest_blender.predict(X_test_predictions)
accuracy_score(y_test, y_pred)

0.9664

In [45]:
random_forest_blender.n_estimators

26

Voting Classifier is better than the Stacked Classifier

In [63]:
mlp_blender = MLPClassifier(random_state=42)

In [62]:
mlp_blender.fit(X_val_predictions, y_val)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(100,), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=200,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=42, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

In [51]:
y_pred = mlp_blender.predict(X_test_predictions)
accuracy_score(y_test, y_pred)

0.9558

In [64]:
param_grid = [
  {'activation': ['relu'], 'alpha': [0.0001, 0.001, 0.01, 0.1, 1.0], 'hidden_layer_sizes' : [(100,), (200,), (50,)], 'early_stopping' : [False]},
  {'activation': ['softmax'], 'alpha': [0.0001, 0.001, 0.01, 0.1, 1.0], 'hidden_layer_sizes' : [(100,), (200,), (50,)], 'early_stopping' : [True]},
 ]

In [65]:
search = GridSearchCV(mlp_blender, param_grid, cv=5)

In [66]:
search.fit(X_val_predictions, y_val)

GridSearchCV(cv=5, error_score=nan,
             estimator=MLPClassifier(activation='relu', alpha=0.0001,
                                     batch_size='auto', beta_1=0.9,
                                     beta_2=0.999, early_stopping=False,
                                     epsilon=1e-08, hidden_layer_sizes=(100,),
                                     learning_rate='constant',
                                     learning_rate_init=0.001, max_fun=15000,
                                     max_iter=200, momentum=0.9,
                                     n_iter_no_change=10,
                                     nesterovs_momentum=True, power_t=0.5,
                                     random_state...
             param_grid=[{'activation': ['relu'],
                          'alpha': [0.0001, 0.001, 0.01, 0.1, 1.0],
                          'early_stopping': [False],
                          'hidden_layer_sizes': [(100,), (200,), (50,)]},
                         {'activation

In [67]:
y_pred = search.best_estimator_.predict(X_test_predictions)
accuracy_score(y_test, y_pred)

0.9643

The hard voting still yields better results than the MLP and RND forests Blenders, so that is the one to go with.
MLP did preform a smaller gridsearch for hyper parameters, the Random Forests allowed for early stopping to not overfit. 