In [1]:
from __future__ import division, print_function, unicode_literals
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [2]:
%matplotlib inline

In [3]:
try:
    from sklearn.datasets import fetch_openml
    mnist = fetch_openml('mnist_784', version=1)
    mnist.target = mnist.target.astype(np.int64)
except ImportError:
    from sklearn.datasets import fetch_mldata
    mnist = fetch_mldata('MNIST original')

In [4]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test=train_test_split(mnist['data'],mnist['target'], test_size=0.2,random_state=42)

In [5]:
from sklearn.ensemble import RandomForestClassifier,ExtraTreesClassifier
from sklearn.svm import LinearSVC
from sklearn.neural_network import MLPClassifier

In [6]:
## Problem :#8

lin_clf=LinearSVC(random_state=42)
rf_clf=RandomForestClassifier(n_estimators=10,random_state=42)
extra_clf=ExtraTreesClassifier(n_estimators=10,random_state=42)
mlp_clf=MLPClassifier(random_state=42)


In [7]:
estimators = [lin_clf, rf_clf, extra_clf, mlp_clf]
for estimator in estimators:
    print("Training the", estimator)
    estimator.fit(X_train, y_train)

Training the LinearSVC(random_state=42)




Training the RandomForestClassifier(n_estimators=10, random_state=42)
Training the ExtraTreesClassifier(n_estimators=10, random_state=42)
Training the MLPClassifier(random_state=42)


In [8]:
[estimator.score(X_test, y_test) for estimator in estimators]


[0.8585714285714285,
 0.9455714285714286,
 0.9480714285714286,
 0.9617857142857142]

In [9]:
from sklearn.ensemble import VotingClassifier
named_estimator=[
      ('lr',lin_clf),
    ('rf',rf_clf),
    ('extra',extra_clf),
    ('mlp',mlp_clf)
]
voting_clf=VotingClassifier(estimators=named_estimator,voting='hard')
voting_clf.fit(X_train,y_train)



VotingClassifier(estimators=[('lr', LinearSVC(random_state=42)),
                             ('rf',
                              RandomForestClassifier(n_estimators=10,
                                                     random_state=42)),
                             ('extra',
                              ExtraTreesClassifier(n_estimators=10,
                                                   random_state=42)),
                             ('mlp', MLPClassifier(random_state=42))])

In [10]:
voting_clf.score(X_test,y_test)

0.9609285714285715

In [11]:
[estimator.score(X_test, y_test) for estimator in voting_clf.estimators_]


[0.8585714285714285,
 0.9455714285714286,
 0.9480714285714286,
 0.9617857142857142]

In [12]:
voting_clf.estimators

[('lr', LinearSVC(random_state=42)),
 ('rf', RandomForestClassifier(n_estimators=10, random_state=42)),
 ('extra', ExtraTreesClassifier(n_estimators=10, random_state=42)),
 ('mlp', MLPClassifier(random_state=42))]

In [13]:
voting_clf.set_params(lr=None)

VotingClassifier(estimators=[('lr', None),
                             ('rf',
                              RandomForestClassifier(n_estimators=10,
                                                     random_state=42)),
                             ('extra',
                              ExtraTreesClassifier(n_estimators=10,
                                                   random_state=42)),
                             ('mlp', MLPClassifier(random_state=42))])

In [14]:
voting_clf.estimators_

[LinearSVC(random_state=42),
 RandomForestClassifier(n_estimators=10, random_state=42),
 ExtraTreesClassifier(n_estimators=10, random_state=42),
 MLPClassifier(random_state=42)]

In [15]:
del voting_clf.estimators_[0]

In [16]:
voting_clf.score(X_test,y_test)

0.9646428571428571

In [17]:
[estimator.score(X_test, y_test) for estimator in voting_clf.estimators_]


[0.9455714285714286, 0.9480714285714286, 0.9617857142857142]

In [18]:
voting_clf.voting='soft'
print(voting_clf.score(X_test,y_test))
[estimator.score(X_test, y_test) for estimator in voting_clf.estimators_]


0.9693571428571428


[0.9455714285714286, 0.9480714285714286, 0.9617857142857142]

# Problem 9

In [20]:
X_val_predictions = np.empty((len(X_val), len(estimators)), dtype=np.float32)

for index, estimator in enumerate(estimators):
    X_val_predictions[:, index] = estimator.predict(X_val)

In [21]:
X_val_predictions


array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       ...,
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]], dtype=float32)

In [23]:
rnd_forest_blender = RandomForestClassifier(n_estimators=200, oob_score=True, random_state=42)
rnd_forest_blender.fit(X_val_predictions, y_test)

RandomForestClassifier(n_estimators=200, oob_score=True, random_state=42)

In [24]:
rnd_forest_blender.oob_score_


0.11428571428571428

In [25]:
X_test_predictions = np.empty((len(X_test), len(estimators)), dtype=np.float32)

for index, estimator in enumerate(estimators):
    X_test_predictions[:, index] = estimator.predict(X_test)

In [26]:
y_pred = rnd_forest_blender.predict(X_test_predictions)


In [27]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred)


0.11428571428571428