In [1]:
# TODO: 1. Use MNIST dataset
# TODO: 2. Split the dataset into training/test/validation sets (50k/10k/10k)
# TODO: 3. Use RandomForestClassifier, ExtraTreesClassifier, DecisionTreeClassifier ( collect metrics )
# TODO: 4. Try to create ensemble of the above models ( collect metrics ) use hard/soft voting
# TODO: 5. After training use test set to evaluate the model and compare with original models

In [2]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.svm import LinearSVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import accuracy_score
import numpy as np

In [3]:
X, y = fetch_openml('mnist_784', version=1, return_X_y=True)
X_train, X_temp, y_train, y_temp = train_test_split(X, y, train_size=50000, random_state=42, stratify=y)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, train_size=10000, test_size=10000, random_state=42, stratify=y_temp)

In [4]:
print(f"Training set size: {X_train.shape}, Validation set size: {X_val.shape}, Test set size: {X_test.shape}")

Training set size: (50000, 784), Validation set size: (10000, 784), Test set size: (10000, 784)


In [5]:
random_forest_clf = RandomForestClassifier(n_estimators=100, random_state=42)
extra_trees_clf = ExtraTreesClassifier(n_estimators=100, random_state=42)
svm_clf = LinearSVC(max_iter=100, tol=20, random_state=42)
mlp_clf = MLPClassifier(random_state=42)

In [6]:
estimators = [random_forest_clf, extra_trees_clf, svm_clf, mlp_clf]
for estimator in estimators:
    print("Training the", estimator)
    estimator.fit(X_train, y_train)

Training the RandomForestClassifier(random_state=42)
Training the ExtraTreesClassifier(random_state=42)
Training the LinearSVC(max_iter=100, random_state=42, tol=20)
Training the MLPClassifier(random_state=42)


In [7]:
[estimator.score(X_val, y_val) for estimator in estimators]

[0.9669, 0.9686, 0.0986, 0.9601]

In [8]:
named_estimators = [
    ("random_forest_clf", random_forest_clf),
    ("extra_trees_clf", extra_trees_clf),
    ("svm_clf", svm_clf),
    ("mlp_clf", mlp_clf),
]

In [9]:
voting_clf = VotingClassifier(named_estimators)

voting_clf.fit(X_train, y_train)
score = voting_clf.score(X_val, y_val)
score

0.9696

In [10]:
voting_clf.estimators_

[RandomForestClassifier(random_state=42),
 ExtraTreesClassifier(random_state=42),
 LinearSVC(max_iter=100, random_state=42, tol=20),
 MLPClassifier(random_state=42)]

In [11]:
# Let's remove the SVM to see if performance improves. It is possible to remove an estimator by setting it to None using set_params() like this:
voting_clf.set_params(svm_clf=None)

0,1,2
,estimators,"[('random_forest_clf', ...), ('extra_trees_clf', ...), ...]"
,voting,'hard'
,weights,
,n_jobs,
,flatten_transform,True
,verbose,False

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,False

0,1,2
,hidden_layer_sizes,"(100,)"
,activation,'relu'
,solver,'adam'
,alpha,0.0001
,batch_size,'auto'
,learning_rate,'constant'
,learning_rate_init,0.001
,power_t,0.5
,max_iter,200
,shuffle,True


In [12]:
voting_clf.estimators

[('random_forest_clf', RandomForestClassifier(random_state=42)),
 ('extra_trees_clf', ExtraTreesClassifier(random_state=42)),
 ('svm_clf', None),
 ('mlp_clf', MLPClassifier(random_state=42))]

In [13]:
del voting_clf.estimators_[2]

In [14]:
voting_clf.estimators_

[RandomForestClassifier(random_state=42),
 ExtraTreesClassifier(random_state=42),
 MLPClassifier(random_state=42)]

In [15]:
voting_clf.voting = "soft"
score = voting_clf.score(X_val, y_val)
score

0.9666

In [16]:
voting_clf.voting = "hard"
score = voting_clf.score(X_test, y_test)
score

0.9697

In [17]:
# [0.9669, 0.9686, 0.0986, 0.9601]
# LinearSVC models is poor -> removed (Score 0.0986)
# scores for best models is [0.9669, 0.9686, 0.9601] avg = 0.9652
# Soft Voting score = 0.9666 ( 0.9666 * 100 ) / 0.9652 = 1.0014 ~ 0,14% ( less than 1% )
# Hard Voting score = 0.9697 ( 0.9697 * 100 ) / 0.9652 = 1.0046 ~ 0,46% ( less than 1% )
# Conclusion: Ensemble model is slightly better than the best individual model

####  Stacking Ensemble

Exercise: Run the individual classifiers from the previous exercise to make predictions on the validation set, and create a new training set with the resulting predictions: each training instance is a vector containing the set of predictions from all your classifiers for an image, and the target is the image's class. Train a classifier on this new training set.

In [19]:


X_val_predictions = np.empty((len(X_val), len(estimators)), dtype=np.float32)

for index,estimator in enumerate(estimators):
    X_val_predictions[:, index] = estimator.predict(X_val)

In [20]:
X_val_predictions

array([[1., 1., 0., 1.],
       [8., 8., 0., 8.],
       [5., 3., 0., 5.],
       ...,
       [9., 5., 0., 5.],
       [6., 6., 0., 6.],
       [1., 1., 0., 3.]], shape=(10000, 4), dtype=float32)

In [21]:
rnd_forest_blender = RandomForestClassifier(n_estimators=200, oob_score=True, random_state=42)
rnd_forest_blender.fit(X_val_predictions, y_val)

0,1,2
,n_estimators,200
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [22]:
rnd_forest_blender.oob_score_

0.967

You could fine-tune this blender or try other types of blenders (e.g., an MLPClassifier), then select the best one using cross-validation, as always.

Exercise: Congratulations, you have just trained a blender, and together with the classifiers they form a stacking ensemble! Now let's evaluate the ensemble on the test set. For each image in the test set, make predictions with all your classifiers, then feed the predictions to the blender to get the ensemble's predictions. How does it compare to the voting classifier you trained earlier?

In [24]:
X_test_predictions = np.empty((len(X_test), len(estimators)), dtype=np.float32)

for index, estimator in enumerate(estimators):
    X_test_predictions[:, index] = estimator.predict(X_test)

In [25]:
y_pred = rnd_forest_blender.predict(X_test_predictions)

In [26]:
accuracy_score(y_test, y_pred)

0.9678

This stacking ensemble does not perform as well as the voting classifier we trained earlier, it's not quite as good as the best individual classifier.