In [4]:
try:
    from sklearn.datasets import fetch_openml
    mnist = fetch_openml('mnist_784', version=1, cache=True, as_frame=False)
except ImportError:
    from sklearn.datasets import fetch_mldata
    mnist = fetch_mldata('MNIST original')

X = mnist["data"]
y = mnist["target"]

X_train = X[:50000]
y_train = y[:50000]
X_valid = X[50000:60000]
y_valid = y[50000:60000]
X_test = X[60000:]
y_test = y[60000:]

In [5]:
#Create random index for better shuffling the data train data
import numpy as np
np.random.seed(42)
rand_idx=np.random.permutation(50000)
X_train=X_train[rand_idx]
y_train=y_train[rand_idx]

In [6]:
#Standardize the training data for training SVM
from sklearn.preprocessing import StandardScaler

scaler=StandardScaler()
X_train_scaled=scaler.fit_transform(X_train.astype(np.float32))
X_test_scaled=scaler.fit_transform(X_test.astype(np.float32))

In [7]:
#RandomForest Classifier creation
from sklearn.ensemble import RandomForestClassifier

rnd_clf = RandomForestClassifier(n_estimators=10, n_jobs=-1, random_state=42)
rnd_clf.fit(X_train,y_train)

In [8]:
from sklearn.metrics import accuracy_score

y_pred_rnd=rnd_clf.predict(X_train)
accuracy_score_rnd=accuracy_score(y_train,y_pred_rnd)
accuracy_score_rnd

In [9]:
#ExtraTree Classifier creation
from sklearn.ensemble import ExtraTreesClassifier

extra_clf = ExtraTreesClassifier(n_estimators=10, n_jobs=-1, random_state=42)
extra_clf.fit(X_train,y_train)

In [10]:
y_pred_extra = extra_clf.predict(X_train)
accuracy_score_extra = accuracy_score(y_train,y_pred_extra)
accuracy_score_extra

In [11]:
#SVM Classifier
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
import numpy as np

scaler=StandardScaler()
X_train_scaled = scaler.fit_transform(X_train.astype(np.float32))
svm_clf = SVC(random_state=42)
svm_clf.fit(X_train_scaled,y_train)

In [12]:
y_pred_svc=svm_clf.predict(X_train_scaled)
accuracy_score_svm=accuracy_score(y_train,y_pred_svc)

In [13]:
accuracy_score_svm

In [14]:
#Creating ensemble of the 3 above models
from sklearn.ensemble import VotingClassifier

voting_clf=VotingClassifier(estimators=[('rnd',rnd_clf),('extra_tree',extra_clf),('svc',svm_clf)],voting='hard')
voting_clf.fit(X_train,y_train)

In [15]:
y_pred_voting=voting_clf.predict(X_train)
accuracy_score_voting=accuracy_score(y_train,y_pred_voting)
accuracy_score_voting

In [17]:
voting_clf.score(X_valid, y_valid)

In [18]:
voting_clf.score(X_test, y_test)

In [22]:
#Merging the predictions of the validation set of each model of the ensemble into one array
estimators = [rnd_clf, extra_clf, svm_clf]
x_val_predictions=np.empty((len(X_valid), len(estimators)),dtype=np.float32)

In [25]:
extra_clf.predict(X_valid)

In [41]:
for index, estimator in enumerate(estimators):
    x_val_predictions[:, index]=estimator.predict(X_valid)
x_val_predictions

In [42]:
#Creating Blended model and running it on the merged validation set
rnd_clf_stacked=RandomForestClassifier(n_estimators=200,oob_score=True,random_state=42)
rnd_clf_stacked.fit(x_val_predictions,y_valid)

In [33]:
#Measure out of bag score
rnd_clf_stacked.oob_score_

In [37]:
#Stacking up the predictions of each model of the ensemble
x_test_predictions=np.empty((len(X_test), len(estimators)),dtype=np.float32)

for index, estimator in enumerate(estimators):
    x_test_predictions[:, index]=estimator.predict(X_test)
x_test_predictions

In [36]:
y_test

In [39]:
#Using the Blended model to predict test values
y_pred_blender=rnd_clf_stacked.predict(x_test_predictions)

In [40]:
#Measure of accuracy
accuracy_score(y_test,y_pred_blender)

In [None]:
#Hence Voting Method of Ensemble works better than the other methods