In [1]:
# Wymagane środowisko Python ≥3.5
import sys
assert sys.version_info >= (3, 5)
import math, time, random, datetime

# Wymagana biblioteka Scikit-Learn ≥0.20
import sklearn
assert sklearn.__version__ >= "0.20"

# Importuje standardowe biblioteki
import numpy as np
import os
import pandas as pd

# Aby wyniki uzyskiwane w tym notatniku były odtwarzalne
np.random.seed(42)

# Do rysowania ładnych wykresów
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)



In [2]:
from sklearn.datasets import fetch_openml
mnist = fetch_openml('mnist_784', version=1)
mnist.keys()

dict_keys(['data', 'target', 'feature_names', 'DESCR', 'details', 'categories', 'url'])

In [3]:
from sklearn.model_selection import train_test_split

In [4]:
X_train_test, X_val, y_train_test, y_val = train_test_split(mnist.data, mnist.target, test_size=10000, random_state=42)

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X_train_test, y_train_test, test_size=10000, random_state=42)

In [6]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score

In [7]:
rfc = RandomForestClassifier(random_state=42)
rfc.fit(X_train, y_train)



RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=10,
                       n_jobs=None, oob_score=False, random_state=42, verbose=0,
                       warm_start=False)

In [8]:
y_pred = rfc.predict(X_test)
accuracy_score(y_test, y_pred)

0.9469

In [9]:
XtreeC = ExtraTreesClassifier(random_state=42)
XtreeC.fit(X_train, y_train)



ExtraTreesClassifier(bootstrap=False, class_weight=None, criterion='gini',
                     max_depth=None, max_features='auto', max_leaf_nodes=None,
                     min_impurity_decrease=0.0, min_impurity_split=None,
                     min_samples_leaf=1, min_samples_split=2,
                     min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=None,
                     oob_score=False, random_state=42, verbose=0,
                     warm_start=False)

In [10]:
y_pred = XtreeC.predict(X_test)
accuracy_score(y_test, y_pred)

0.9492

In [11]:
from sklearn.neural_network import MLPClassifier
mlp_clf = MLPClassifier(random_state=42)
mlp_clf.fit(X_train, y_train)
y_pred = mlp_clf.predict(X_test)
accuracy_score(y_test, y_pred)

0.9582

In [12]:
from sklearn.ensemble import VotingClassifier

voting_clf = VotingClassifier(estimators=[('randomforrest', rfc),('extratrees',XtreeC),('mlp',mlp_clf)],
                             voting='soft')
voting_clf.fit(X_train, y_train)
y_pred = voting_clf.predict(X_test)
accuracy_score(y_test, y_pred)

0.9658

In [13]:
hard_voting_clf = VotingClassifier(estimators=[('randomforrest', rfc),('extratrees',XtreeC),('mlp',mlp_clf)],
                             voting='hard')
voting_clf.fit(X_train, y_train)
y_pred = voting_clf.predict(X_test)
accuracy_score(y_test, y_pred)

0.9658

In [14]:
y_pred_rfc = rfc.predict(X_val)

In [15]:
y_pred_XtreeC = XtreeC.predict(X_val)

In [16]:
y_pred_MLP = mlp_clf.predict(X_val)

In [17]:
y_pred_rfc

array(['8', '4', '3', ..., '3', '8', '3'], dtype=object)

In [18]:
y_pred_XtreeC

array(['8', '4', '8', ..., '3', '8', '3'], dtype=object)

In [19]:
y_pred_MLP

array(['8', '4', '6', ..., '3', '8', '3'], dtype='<U1')

In [26]:
X_val_predictions = np.column_stack((y_pred_rfc, y_pred_MLP, y_pred_XtreeC))

In [27]:
rnd_forest_blender = RandomForestClassifier(n_estimators=200, oob_score=True, random_state=42)
rnd_forest_blender.fit(X_val_predictions, y_val)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=200,
                       n_jobs=None, oob_score=True, random_state=42, verbose=0,
                       warm_start=False)

In [28]:
rnd_forest_blender.oob_score_

0.962