##Q.8

In [2]:
import numpy as np
import pandas as pd
from numpy.random import randn

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import GridSearchCV, cross_val_score, KFold, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, r2_score, mean_squared_error
from sklearn.linear_model import LinearRegression, LogisticRegression, RANSACRegressor, Ridge, Lasso, ElasticNet
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.svm import SVC

In [3]:
from sklearn.datasets import fetch_openml

mnist = fetch_openml('mnist_784', version=1, as_frame=False)
mnist.keys()

dict_keys(['data', 'target', 'frame', 'feature_names', 'target_names', 'DESCR', 'details', 'categories', 'url'])

In [4]:
X_train_val, X_test, y_train_val, y_test = train_test_split(
    mnist.data, mnist.target, test_size=10000, random_state=2021)
X_train, X_val, y_train, y_val = train_test_split(
    X_train_val, y_train_val, test_size=10000, random_state=2021)

In [7]:
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.svm import LinearSVC
from sklearn.neural_network import MLPClassifier

random_forest_clf = RandomForestClassifier(n_estimators=100, random_state=2021)
extra_trees_clf = ExtraTreesClassifier(n_estimators=100, random_state=2021)
svm_clf = LinearSVC(max_iter=100, tol=10, random_state=2021)
mlp_clf = MLPClassifier(random_state=2021)

estimators = [random_forest_clf, extra_trees_clf, svm_clf,mlp_clf]

for estimator in estimators:
    estimator.fit(X_train, y_train)
    score_esti=estimator.score(X_val,y_val)
    print(f'{estimator} score: {score_esti}')

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=2021,
                       verbose=0, warm_start=False) score: 0.9669
ExtraTreesClassifier(bootstrap=False, ccp_alpha=0.0, class_weight=None,
                     criterion='gini', max_depth=None, max_features='auto',
                     max_leaf_nodes=None, max_samples=None,
                     min_impurity_decrease=0.0, min_impurity_split=None,
                     min_samples_leaf=1, min_samples_split=2,
                     min_weight_fraction_leaf=0.0, n_estimators=100,
                     n_jobs

In [9]:
models=[("random_forest_clf", random_forest_clf),
    ("extra_trees_clf", extra_trees_clf),
    ("svm_clf", svm_clf)]

from sklearn.ensemble import VotingClassifier
voting_clf=VotingClassifier(models)
voting_clf.fit(X_train, y_train)

VotingClassifier(estimators=[('random_forest_clf',
                              RandomForestClassifier(bootstrap=True,
                                                     ccp_alpha=0.0,
                                                     class_weight=None,
                                                     criterion='gini',
                                                     max_depth=None,
                                                     max_features='auto',
                                                     max_leaf_nodes=None,
                                                     max_samples=None,
                                                     min_impurity_decrease=0.0,
                                                     min_impurity_split=None,
                                                     min_samples_leaf=1,
                                                     min_samples_split=2,
                                                     min_weight_fraction_lea

In [10]:
voting_clf.score(X_val, y_val)

0.9668

In [11]:
del voting_clf.estimators_[2]

voting_clf.score(X_val, y_val)

0.9682

In [12]:
voting_clf.voting='soft'
voting_clf.score(X_val, y_val)

0.9702

In [13]:
voting_clf.voting='hard'
voting_clf.score(X_test, y_test)

0.9695

In [18]:
voting_clf.estimators_
print(random_forest_clf.score(X_test, y_test))
print(extra_trees_clf.score(X_test, y_test))
print(svm_clf.score(X_test, y_test))

0.9676
0.9715
0.851


##Q.9

In [15]:
X_val_predictions = np.empty((len(X_val), len(estimators)), dtype=np.float32)

for index, estimator in enumerate(estimators):
    X_val_predictions[:, index] = estimator.predict(X_val)

X_val_predictions

array([[6., 6., 6., 6.],
       [9., 9., 9., 9.],
       [4., 4., 4., 4.],
       ...,
       [9., 9., 4., 9.],
       [8., 8., 8., 8.],
       [3., 3., 3., 3.]], dtype=float32)

In [16]:
new_rf=RandomForestClassifier(n_estimators=100, oob_score=True, random_state=2021)
new_rf.fit(X_val_predictions, y_val)
print(new_rf.oob_score_)

new_et=ExtraTreesClassifier(n_estimators=100, oob_score=True, bootstrap=True, random_state=2021)
new_et.fit(X_val_predictions, y_val)
print(new_et.oob_score_)

0.9684
0.9687


In [17]:
X_test_predictions = np.empty((len(X_test), len(estimators)), dtype=np.float32)

for index, estimator in enumerate(estimators):
    X_test_predictions[:, index] = estimator.predict(X_test)

y_pred = new_rf.predict(X_test_predictions)

accuracy_score(y_test, y_pred)

0.9705