# 匯入套件

In [1]:
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import BaggingClassifier

# MNIST

In [2]:
mnist = fetch_openml('mnist_784', version=1, as_frame=False)
mnist.keys()

dict_keys(['data', 'target', 'frame', 'categories', 'feature_names', 'target_names', 'DESCR', 'details', 'url'])

In [3]:
X_train_val, X_test, y_train_val, y_test = train_test_split(mnist.data, mnist.target, test_size=10000, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=10000, random_state=42)

In [4]:
print(X_train.shape)
print(X_test.shape)
print(X_val.shape)

(50000, 784)
(10000, 784)
(10000, 784)


In [5]:
random_forest_clf = RandomForestClassifier(n_estimators=100, random_state=42)
extra_trees_clf = ExtraTreesClassifier(n_estimators=100, random_state=42)
svm_clf = SVC(max_iter=100,tol=0.001,random_state=42,probability=True)

In [6]:
estimators = [random_forest_clf, extra_trees_clf, svm_clf]
for estimator in estimators:
    print("Training the", estimator)
    estimator.fit(X_train, y_train)

Training the RandomForestClassifier(random_state=42)
Training the ExtraTreesClassifier(random_state=42)
Training the SVC(max_iter=100, probability=True, random_state=42)




In [7]:
[estimator.score(X_val, y_val) for estimator in estimators]

[0.9692, 0.9715, 0.9336]

In [8]:
named_estimators = [
    ("random_forest_clf", random_forest_clf),
    ("extra_trees_clf", extra_trees_clf),
    ("svm_clf", svm_clf)
]

In [9]:
voting_clf = VotingClassifier(named_estimators,voting = "soft")

In [10]:
voting_clf.fit(X_train, y_train)



In [11]:
voting_clf.score(X_val, y_val)

0.9691

## Test

In [12]:
[estimator.score(X_test, y_test) for estimator in estimators]

[0.9645, 0.9691, 0.9348]

In [13]:
voting_clf.score(X_test, y_test)

0.9668

## Bonus

In [14]:
X_val_predictions = np.empty((len(X_val), len(estimators)), dtype=np.float32)

for index, estimator in enumerate(estimators):
    X_val_predictions[:, index] = estimator.predict(X_val)

In [15]:
Bagging_clf = BaggingClassifier(n_estimators=100,oob_score=True,random_state=42)

In [16]:
Bagging_clf.fit(X_val_predictions,y_val)

In [17]:
Bagging_clf.oob_score_

0.9699

In [18]:
X_test_predictions = np.empty((len(X_test), len(estimators)), dtype=np.float32)

for index, estimator in enumerate(estimators):
    X_test_predictions[:, index] = estimator.predict(X_test)

In [19]:
y_pred = Bagging_clf.predict(X_test_predictions)

In [20]:
accuracy_score(y_test, y_pred)

0.9672

# Fashion MNIST

In [21]:
mnist = fetch_openml('Fashion-MNIST', version=1, as_frame=False)
mnist.keys()

dict_keys(['data', 'target', 'frame', 'categories', 'feature_names', 'target_names', 'DESCR', 'details', 'url'])

In [22]:
X_train_val, X_test, y_train_val, y_test = train_test_split(mnist.data, mnist.target, test_size=10000, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=10000, random_state=42)

In [23]:
print(X_train.shape)
print(X_test.shape)
print(X_val.shape)

(50000, 784)
(10000, 784)
(10000, 784)


In [24]:
random_forest_clf = RandomForestClassifier(n_estimators=100, random_state=42)
extra_trees_clf = ExtraTreesClassifier(n_estimators=100, random_state=42)
svm_clf = SVC(max_iter=100,tol=0.001,random_state=42,probability=True)

In [25]:
estimators = [random_forest_clf, extra_trees_clf, svm_clf]
for estimator in estimators:
    print("Training the", estimator)
    estimator.fit(X_train, y_train)

Training the RandomForestClassifier(random_state=42)
Training the ExtraTreesClassifier(random_state=42)
Training the SVC(max_iter=100, probability=True, random_state=42)




In [26]:
[estimator.score(X_val, y_val) for estimator in estimators]

[0.8845, 0.8844, 0.6085]

In [27]:
named_estimators = [
    ("random_forest_clf", random_forest_clf),
    ("extra_trees_clf", extra_trees_clf),
    ("svm_clf", svm_clf)
]

In [28]:
voting_clf = VotingClassifier(named_estimators,voting = "soft")

In [29]:
voting_clf.fit(X_train, y_train)



In [30]:
voting_clf.score(X_val, y_val)

0.8749

## Test

In [31]:
[estimator.score(X_test, y_test) for estimator in estimators]

[0.8843, 0.8801, 0.61]

In [32]:
voting_clf.score(X_test, y_test)

0.8729

## Bonus

In [33]:
X_val_predictions = np.empty((len(X_val), len(estimators)), dtype=np.float32)

for index, estimator in enumerate(estimators):
    X_val_predictions[:, index] = estimator.predict(X_val)

In [34]:
Bagging_clf = BaggingClassifier(n_estimators=100,oob_score=True,random_state=42)

In [35]:
Bagging_clf.fit(X_val_predictions,y_val)

In [36]:
Bagging_clf.oob_score_

0.8873

In [37]:
X_test_predictions = np.empty((len(X_test), len(estimators)), dtype=np.float32)

for index, estimator in enumerate(estimators):
    X_test_predictions[:, index] = estimator.predict(X_test)

In [38]:
y_pred = Bagging_clf.predict(X_test_predictions)

In [39]:
accuracy_score(y_test, y_pred)

0.8843