In [1]:
import warnings

warnings.filterwarnings('ignore')

In [2]:
from sklearn.datasets import fetch_openml

X, y = fetch_openml('mnist_784', version=1, return_X_y=True, as_frame=False)

Exercise 8.

In [3]:
len(X)

70000

In [4]:
X_train, X_valid, X_test = X[:50000], X[50000:60000], X[60000:]
y_train, y_valid, y_test = y[:50000], y[50000:60000], y[60000:]

In [5]:
from lightgbm import LGBMClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

et_clf = ExtraTreesClassifier(n_estimators=200, n_jobs=-1, random_state=42)
rf_clf = RandomForestClassifier(n_estimators=200, n_jobs=-1, random_state=42)
svm_clf = SVC(max_iter=200, random_state=42, probability=True)
lgbm_clf = LGBMClassifier(n_jobs=-1, random_state=42)

estimators = [('et', et_clf), ('rf', rf_clf), ('svm', svm_clf), ('lgbm', lgbm_clf)]

voting_clf = VotingClassifier(estimators=estimators, voting='hard', n_jobs=-1)

In [6]:
clfs = [et_clf, rf_clf, svm_clf, lgbm_clf, voting_clf]

for clf in clfs:
    clf.fit(X_train, y_train)
    print(clf.__class__.__name__, clf.score(X_valid, y_valid))

ExtraTreesClassifier 0.975
RandomForestClassifier 0.9741
SVC 0.9742
LGBMClassifier 0.9778
VotingClassifier 0.9789


In [7]:
voting_clf.voting = "soft"
voting_clf.score(X_valid, y_valid)

0.9795

In [8]:
voting_clf.score(X_test, y_test)

0.9783

Exercise 9.

In [9]:
import numpy as np

X_val_preds = np.empty((len(X_valid), len(clfs)), dtype=np.float32)

for idx, clf in enumerate(clfs):
    X_val_preds[:, idx] = clf.predict(X_valid)

In [10]:
X_val_preds.shape

(10000, 5)

In [11]:
X_val_preds

array([[3., 3., 3., 3., 3.],
       [8., 8., 8., 8., 8.],
       [6., 6., 6., 6., 6.],
       ...,
       [5., 5., 5., 5., 5.],
       [6., 6., 6., 6., 6.],
       [8., 8., 8., 8., 8.]], dtype=float32)

In [12]:
rf_blender = RandomForestClassifier(n_estimators=200, n_jobs=-1, random_state=42, oob_score=True)
rf_blender.fit(X_val_preds, y_valid)

RandomForestClassifier(n_estimators=200, n_jobs=-1, oob_score=True,
                       random_state=42)

In [13]:
rf_blender.oob_score_

0.9766

In [14]:
X_test_preds = np.empty((len(X_test), len(clfs)), dtype=np.float32)

for idx, clf in enumerate(clfs):
    X_test_preds[:, idx] = clf.predict(X_test)

In [15]:
y_pred = rf_blender.predict(X_test_preds)

In [16]:
from sklearn.metrics import accuracy_score

accuracy_score(y_test, y_pred)

0.9758

In [17]:
lgbm_blender = LGBMClassifier(n_jobs=-1, random_state=42)
lgbm_blender.fit(X_val_preds, y_valid)

LGBMClassifier(random_state=42)

In [18]:
y_pred = lgbm_blender.predict(X_test_preds)
accuracy_score(y_test, y_pred)

0.9761