## MNIST部分

In [35]:
from sklearn.datasets import fetch_openml
mnist = fetch_openml('mnist_784', version=1, as_frame=False)
mnist.keys()

dict_keys(['data', 'target', 'frame', 'categories', 'feature_names', 'target_names', 'DESCR', 'details', 'url'])

In [36]:
X, y = mnist["data"], mnist["target"]
X.shape

(70000, 784)

### Step1:用train_test_split拆分為訓練集50000、驗證10000、測試10000

In [37]:
from sklearn.model_selection import train_test_split

In [38]:
X_train,X_test,y_train,y_test=train_test_split(X,y,train_size=60000,random_state=42)
X_train,X_val,y_train,y_val=train_test_split(X_train,y_train,train_size=50000,random_state=42)

In [39]:
#查看拆分完的狀態
print(X_train.shape)
print(X_val.shape)
print(X_test.shape)

(50000, 784)
(10000, 784)
(10000, 784)


### Step2:訓練一個Random Forest分類器、一個Extra-Trees分類器、一個SVM分類器

In [40]:
from sklearn.ensemble import RandomForestClassifier,ExtraTreesClassifier,VotingClassifier
from sklearn.svm import SVC

In [41]:
rnd_clf=RandomForestClassifier(n_estimators=100,random_state=42)
extra_trees_clf=ExtraTreesClassifier(n_estimators=100,random_state=42)
svm_clf=SVC(random_state=42,probability=True)

### Step3:嘗試使用soft voting將三個分類器合成一個整體(該整體優於驗證集上的每個單獨分類器)，並且找到後在測試集嘗試一下，最後討論整體跟個別表現哪個好

In [42]:
voting_clf=VotingClassifier(
    estimators=[('rf',rnd_clf),('et',extra_trees_clf),('svc',svm_clf)],
    voting='soft'
)

In [43]:
from sklearn.metrics import accuracy_score

In [44]:
for clf in (rnd_clf,extra_trees_clf,svm_clf):
    clf.fit(X_train,y_train)
    y_pred=clf.predict(X_val)
    print(clf.__class__.__name__,accuracy_score(y_val,y_pred))

RandomForestClassifier 0.9692
ExtraTreesClassifier 0.9715
SVC 0.9788


In [45]:
voting_clf.fit(X_val,y_val)

In [46]:
voting_clf.score(X_val, y_val)

1.0

In [47]:
#在測試集上的嘗試
for clf in(rnd_clf,extra_trees_clf,svm_clf):
    y_pred=clf.predict(X_test)
    print(clf.__class__.__name__,accuracy_score(y_test,y_pred))

RandomForestClassifier 0.9645
ExtraTreesClassifier 0.9691
SVC 0.976


In [48]:
voting_clf.fit(X_test,y_test)

In [49]:
voting_clf.score(X_test,y_test)

1.0

## Fashion-MNIST部分

In [50]:
fashion_mnist = fetch_openml('Fashion-MNIST', version=1, as_frame=False)
fashion_mnist.keys()

dict_keys(['data', 'target', 'frame', 'categories', 'feature_names', 'target_names', 'DESCR', 'details', 'url'])

In [51]:
X, y = fashion_mnist["data"], fashion_mnist["target"]
X.shape

(70000, 784)

### Step1:用train_test_split拆分為訓練集50000、驗證10000、測試10000

In [52]:
from sklearn.model_selection import train_test_split

In [53]:
X_train,X_test,y_train,y_test=train_test_split(X,y,train_size=60000,random_state=42)
X_train,X_val,y_train,y_val=train_test_split(X_train,y_train,train_size=50000,random_state=42)

In [54]:
#查看拆分完的狀態
print(X_train.shape)
print(X_val.shape)
print(X_test.shape)

(50000, 784)
(10000, 784)
(10000, 784)


### Step2:訓練一個Random Forest分類器、一個Extra-Trees分類器、一個SVM分類器

In [55]:
from sklearn.ensemble import RandomForestClassifier,ExtraTreesClassifier,VotingClassifier
from sklearn.svm import SVC

In [56]:
rnd_clf=RandomForestClassifier(n_estimators=100,random_state=42)
extra_trees_clf=ExtraTreesClassifier(n_estimators=100,random_state=42)
svm_clf=SVC(random_state=42,probability=True)

### Step3:嘗試使用soft voting將三個分類器合成一個整體(該整體優於驗證集上的每個單獨分類器)，並且找到後在測試集嘗試一下，最後討論整體跟個別表現哪個好

In [57]:
voting_clf=VotingClassifier(
    estimators=[('rf',rnd_clf),('et',extra_trees_clf),('svc',svm_clf)],
    voting='soft'
)

In [58]:
from sklearn.metrics import accuracy_score

In [65]:
for clf in (rnd_clf,extra_trees_clf,svm_clf):
    clf.fit(X_train,y_train)
    y_pred=clf.predict(X_val)
    print(clf.__class__.__name__,accuracy_score(y_val,y_pred))

RandomForestClassifier 0.8845
ExtraTreesClassifier 0.8844
SVC 0.8928


In [60]:
voting_clf.fit(X_val,y_val)

In [61]:
voting_clf.score(X_val, y_val)

1.0

In [62]:
#在測試集上的嘗試
for clf in(rnd_clf,extra_trees_clf,svm_clf):
    y_pred=clf.predict(X_test)
    print(clf.__class__.__name__,accuracy_score(y_test,y_pred))

RandomForestClassifier 0.8843
ExtraTreesClassifier 0.8801
SVC 0.8853


In [63]:
voting_clf.fit(X_test,y_test)

In [64]:
voting_clf.score(X_test,y_test)

1.0