In [1]:
from sklearn.datasets import fetch_mldata 
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, VotingClassifier
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

In [2]:
mnist = fetch_mldata('MNIST original')

In [3]:
X, y = mnist["data"], mnist["target"]

In [4]:
X_train_val, X_test, y_train_val, y_test = train_test_split(
    mnist.data, mnist.target, test_size=10000, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(
    X_train_val, y_train_val, test_size=10000, random_state=42)

In [5]:
svm_clf = LinearSVC(random_state = 42)
rf_clf = RandomForestClassifier(random_state = 42, n_jobs = -1)
etc_clf = ExtraTreesClassifier(random_state = 42, n_jobs = -1)

In [6]:
estimators = [svm_clf, rf_clf, etc_clf]

for estimator in estimators:
    print("Training the", estimator)
    estimator.fit(X_train, y_train)

Training the LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=42, tol=0.0001,
     verbose=0)
Training the RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=-1,
            oob_score=False, random_state=42, verbose=0, warm_start=False)
Training the ExtraTreesClassifier(bootstrap=False, class_weight=None, criterion='gini',
           max_depth=None, max_features='auto', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=-1,
           o

In [7]:
estimators_score = [estimator.score(X_val, y_val) for estimator in estimators]

estimators_score

[0.8327, 0.9467, 0.9512]

In [8]:
name_estimators = [
    ("SVM", svm_clf),
    ("RandomForest", rf_clf),
    ("ExtraTrees", etc_clf)
]

In [9]:
voting_clf = VotingClassifier(name_estimators)

voting_clf.fit(X_train, y_train)

VotingClassifier(estimators=[('SVM', LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=42, tol=0.0001,
     verbose=0)), ('RandomForest', RandomForestClassifier(bootstrap=True, class_wei...stimators=10, n_jobs=-1,
           oob_score=False, random_state=42, verbose=0, warm_start=False))],
         flatten_transform=None, n_jobs=1, voting='hard', weights=None)

In [10]:
voting_clf.score(X_val, y_val)

  if diff:


0.9534

In [11]:
voting_clf.estimators

[('SVM', LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
       intercept_scaling=1, loss='squared_hinge', max_iter=1000,
       multi_class='ovr', penalty='l2', random_state=42, tol=0.0001,
       verbose=0)),
 ('RandomForest',
  RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
              max_depth=None, max_features='auto', max_leaf_nodes=None,
              min_impurity_decrease=0.0, min_impurity_split=None,
              min_samples_leaf=1, min_samples_split=2,
              min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=-1,
              oob_score=False, random_state=42, verbose=0, warm_start=False)),
 ('ExtraTrees',
  ExtraTreesClassifier(bootstrap=False, class_weight=None, criterion='gini',
             max_depth=None, max_features='auto', max_leaf_nodes=None,
             min_impurity_decrease=0.0, min_impurity_split=None,
             min_samples_leaf=1, min_samples_split=2,
             min_weight_fraction_leaf=0.0, n_es

In [12]:
del voting_clf.estimators_[0]

In [13]:
voting_clf.voting = "soft"

In [14]:
voting_clf.score(X_val, y_val)

  if diff:


0.9625

In [15]:
voting_clf.score(X_test, y_test)

  if diff:


0.9582

In [16]:
[estimator.score(X_test, y_test) for estimator in estimators]

[0.8396, 0.9434, 0.9444]