In [31]:
from sklearn.datasets import fetch_openml
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier

In [2]:
X, y = fetch_openml('mnist_784', version=1, return_X_y=True)

In [5]:
X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=0.2)
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.2)

In [32]:
models = [
    ('decision_tree', DecisionTreeClassifier()),
    ('extra_tree', ExtraTreeClassifier()),
    ('svm', LinearSVC()),
]

In [33]:
for name, model in models:
    print(f'Analysing model: {name}')
    model.fit(X_train, y_train)
    y_train_pred = model.predict(X_train)
    y_val_pred = model.predict(X_val)
    print(f'\tTrain accuracy: {accuracy_score(y_train_pred, y_train):.2f}')
    print(f'\t  Val accuracy: {accuracy_score(y_val_pred, y_val):.2f}')

Analysing model: decision_tree
	Train accuracy: 1.00
	  Val accuracy: 0.86
Analysing model: extra_tree
	Train accuracy: 1.00
	  Val accuracy: 0.81
Analysing model: svm
	Train accuracy: 0.89
	  Val accuracy: 0.87




In [34]:
ensemble_clf = VotingClassifier(models, n_jobs=-1)

In [35]:
ensemble_clf.fit(X_train, y_train)
accuracy_score(ensemble_clf.predict(X_val), y_val)

0.90125

In [36]:
accuracy_score(ensemble_clf.predict(X_test), y_test)

0.9032857142857142