## Train MNIST dataset in differente classifiers and ensemble methods

In [7]:
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_openml

mnist = fetch_openml('mnist_784', version=1, as_frame=False)
X = mnist['data']
y = mnist['target']
X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=(1/7))
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=(1/6))

In [8]:
from sklearn.ensemble import RandomForestClassifier

rnd_clf = RandomForestClassifier(n_estimators=550, max_leaf_nodes=18, n_jobs=-1)
rnd_clf.fit(X_train, y_train)

In [31]:
from sklearn.model_selection import RandomizedSearchCV
avoid_run()
clf = RandomizedSearchCV(
    RandomForestClassifier(n_jobs=-1),
    param_distributions=dict(n_estimators=list(range(100, 1000, 50)), max_leaf_nodes=(list(range(6, 20, 2)))),
    n_iter=20,
    cv=3
    )
clf.fit(X_train, y_train)

In [42]:
clf.best_params_

{'n_estimators': 550, 'max_leaf_nodes': 18}

In [38]:
for params, mean_test_score in zip(clf.cv_results_['params'], clf.cv_results_['mean_test_score']):
    print(params,'=',mean_test_score)

{'n_estimators': 500, 'max_leaf_nodes': 14} = 0.8113199958489731
{'n_estimators': 950, 'max_leaf_nodes': 16} = 0.8243800046595973
{'n_estimators': 800, 'max_leaf_nodes': 6} = 0.7190200021752595
{'n_estimators': 200, 'max_leaf_nodes': 12} = 0.7947199666351087
{'n_estimators': 350, 'max_leaf_nodes': 18} = 0.8319399746650453
{'n_estimators': 650, 'max_leaf_nodes': 16} = 0.8252399466591251
{'n_estimators': 400, 'max_leaf_nodes': 6} = 0.7211598909747474
{'n_estimators': 900, 'max_leaf_nodes': 6} = 0.7250799589792435
{'n_estimators': 250, 'max_leaf_nodes': 18} = 0.8314399726646053
{'n_estimators': 550, 'max_leaf_nodes': 18} = 0.8326400014661414
{'n_estimators': 750, 'max_leaf_nodes': 6} = 0.7258199873804037
{'n_estimators': 450, 'max_leaf_nodes': 8} = 0.760239858205356
{'n_estimators': 650, 'max_leaf_nodes': 8} = 0.7590399794068201
{'n_estimators': 200, 'max_leaf_nodes': 8} = 0.754639934202396
{'n_estimators': 850, 'max_leaf_nodes': 6} = 0.7234199333774035
{'n_estimators': 800, 'max_leaf_nod

In [10]:
from sklearn.metrics import accuracy_score

y_pred_rnd = rnd_clf.predict(X_val)
accuracy_score(y_val, y_pred_rnd)

0.834

In [57]:
from sklearn.ensemble import ExtraTreesClassifier
avoid_run()
clf_2 = RandomizedSearchCV(
    ExtraTreesClassifier(n_jobs=-1),
    param_distributions=dict(n_estimators=list(range(400, 1000, 50)), max_leaf_nodes=(list(range(14, 20, 1)))),
    n_iter=10,
    cv=2
    )
clf_2.fit(X_train, y_train)

In [60]:
clf_2.best_estimator_

In [11]:
from sklearn.ensemble import ExtraTreesClassifier

extree_clf = ExtraTreesClassifier(n_estimators=750, max_leaf_nodes=19, n_jobs=-1)
extree_clf.fit(X_train, y_train)

In [16]:
y_pred_extree = extree_clf.predict(X_val)
accuracy_score(y_val, y_pred_extree)

0.8241

In [13]:
from sklearn.svm import SVC

svc_clf = SVC()
svc_clf.fit(X_train, y_train)

In [14]:
y_pred_svc = svc_clf.predict(X_val)
accuracy_score(y_val, y_pred_svc)

0.9762

In [50]:
y_pred_svc = svc_clf.predict(X_test)
accuracy_score(y_test, y_pred_svc)

0.9787

In [36]:
from sklearn.ensemble import VotingClassifier

voting_clf = VotingClassifier(
    estimators=[('Randomforest',rnd_clf), ('Exxforest', extree_clf), ('svc', svc_clf)],
    voting='hard')
voting_clf.fit(X_train, y_train)

In [38]:
y_pred_voting = voting_clf.predict(X_val)
accuracy_score(y_val, y_pred_voting)

0.8594

In [42]:
import numpy as np

y_pred_rnd = rnd_clf.predict(X_val)
y_pred_extree = extree_clf.predict(X_val)
y_pred_svc = svc_clf.predict(X_val)
predictons = [y_pred_rnd, y_pred_extree, y_pred_svc]

pred_train = np.zeros(shape=(len(X_val), len(predictons)))
for prediction in range(len(predictons)):
    pred_train[:, prediction] = predictons[prediction]

In [44]:
voting_clf.fit(pred_train, y_val)

In [48]:
y_pred_rnd = rnd_clf.predict(X_test)
y_pred_extree = extree_clf.predict(X_test)
y_pred_svc = svc_clf.predict(X_test)
predictons = [y_pred_rnd, y_pred_extree, y_pred_svc]

pred_test = np.zeros(shape=(len(X_test), len(predictons)))
for prediction in range(len(predictons)):
    pred_test[:, prediction] = predictons[prediction]

In [49]:
y_pred_voting = voting_clf.predict(pred_test)
accuracy_score(y_test, y_pred_voting)

0.971