# 作業三: 分類器的原理與評比實驗
## 資料來源: 
## 來自 AT&T 10 個人的人臉影像共 400 張，每張大小 64×64
## 目標: 
## 計畫執行這篇講義描述的分類器比較，即採用三種分類器分別對三組資料進行分類學習與測試。其中分類器包括： 
## 1.多元羅吉斯回歸 2.支援向量機 3.神經網路
## 影像資料處理: 
## 1.原始資料 2.進行PCA主成分分析
## 分類方法: 
- ## Logistic Regression
- ## SVM
- ## Neural Network
### 姓名: 鄭欣莉
### 學號: 410877039

## 標準化後原始資料 + 羅吉斯迴歸

In [3]:
import numpy as np
import pandas as pd 
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

df = pd.read_csv('face_data.csv')
X = df.drop('target', axis=1)
y = df['target']
face_data = np.array(X) #400X4096
test_size = 0.30 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)
#標準化
scalar = StandardScaler()
X_train_ = scalar.fit_transform(X_train)
X_test_ = scalar.fit_transform(X_test)

- solver = 'lbfgs'

In [4]:
def clf_LR(solver):
    from sklearn.linear_model import LogisticRegression
    from sklearn.metrics import classification_report
    opts = dict(tol = 1e-6, max_iter = int(1e6), verbose = 1)
    clf_LR = LogisticRegression(solver = solver, **opts)
    clf_LR.fit(X_train_,y_train)
    y_pred = clf_LR.predict(X_test_)
    print(f"{clf_LR.score(X_test_, y_test):.2%}\n")
    print(classification_report(y_test,y_pred))

In [5]:
clf_LR(solver = 'lbfgs')

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


95.83%

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         3
           1       1.00      1.00      1.00         2
           2       1.00      1.00      1.00         3
           3       1.00      1.00      1.00         4
           4       1.00      1.00      1.00         2
           5       1.00      1.00      1.00         4
           6       1.00      1.00      1.00         3
           7       1.00      0.60      0.75         5
           8       1.00      1.00      1.00         2
           9       0.50      1.00      0.67         2
          10       1.00      1.00      1.00         4
          11       1.00      1.00      1.00         3
          12       1.00      1.00      1.00         1
          13       1.00      1.00      1.00         2
          14       1.00      1.00      1.00         3
          15       1.00      1.00      1.00         3
          16       1.00      0.60      0.75         5
          17       

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   33.5s finished
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


- solver = 'liblinear'

In [6]:
clf_LR(solver = 'liblinear')

[LibLinear]95.83%

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         3
           1       1.00      1.00      1.00         2
           2       1.00      1.00      1.00         3
           3       1.00      1.00      1.00         4
           4       1.00      1.00      1.00         2
           5       1.00      1.00      1.00         4
           6       1.00      1.00      1.00         3
           7       1.00      1.00      1.00         5
           8       0.50      1.00      0.67         2
           9       1.00      1.00      1.00         2
          10       1.00      1.00      1.00         4
          11       1.00      1.00      1.00         3
          12       0.00      0.00      0.00         1
          13       1.00      1.00      1.00         2
          14       1.00      1.00      1.00         3
          15       1.00      1.00      1.00         3
          16       1.00      0.80      0.89         5
        

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


- solver = 'newton-cg'

In [7]:
clf_LR(solver = 'newton-cg')

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


95.83%

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         3
           1       1.00      1.00      1.00         2
           2       1.00      1.00      1.00         3
           3       1.00      1.00      1.00         4
           4       1.00      1.00      1.00         2
           5       1.00      1.00      1.00         4
           6       1.00      1.00      1.00         3
           7       1.00      0.60      0.75         5
           8       1.00      1.00      1.00         2
           9       0.50      1.00      0.67         2
          10       1.00      1.00      1.00         4
          11       1.00      1.00      1.00         3
          12       1.00      1.00      1.00         1
          13       1.00      1.00      1.00         2
          14       1.00      1.00      1.00         3
          15       1.00      1.00      1.00         3
          16       1.00      0.60      0.75         5
          17       

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   29.7s finished
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## 結論:
### 1. Classification Report 的衡量指標個別代表的意思:
- ### 精確率(Precision) 為預測為真的樣本有幾個預測正確
- ### 召回率(Recall) 為事實為真的樣本中有幾個是預測正確的
- ### F1 score 為精確率和召回率的調和平均數
### 2. 以不同solver的第一筆資料為例:
- ### 每個 solver 的精確率、召回率、F1 score 為百分之百
### 3. 標準化過後的AT&T資料，各個solver = 'lbfgs' 或 'liblinear' 或 'newton-cg' 表現差不多，準確率均為96%，對於40筆資料來說表現絕對不差。

## PCA主成分分析(成分比例採0.8) + 羅吉斯迴歸

- solver = 'lbfgs'

In [8]:
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

def PCA_LR(n_components, solver):
    pca = PCA(n_components=n_components).fit(X_train_)
    Z_train = pca.transform(X_train_)
    Z_test = pca.transform(X_test_)
    opts = dict(tol = 1e-6, max_iter = int(1e6), verbose = 1)
    clf_PCA = LogisticRegression(solver = solver, **opts)
    clf_PCA.fit(Z_train, y_train)
    y_pred = clf_PCA.predict(Z_test)
    print(f"{clf_PCA.score(Z_test, y_test):.2%}\n")
    print(classification_report(y_test,y_pred))

In [9]:
PCA_LR(n_components = 0.8, solver = 'lbfgs')

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


92.50%

              precision    recall  f1-score   support

           0       1.00      0.67      0.80         3
           1       1.00      1.00      1.00         2
           2       0.75      1.00      0.86         3
           3       1.00      1.00      1.00         4
           4       1.00      1.00      1.00         2
           5       1.00      1.00      1.00         4
           6       1.00      1.00      1.00         3
           7       1.00      0.40      0.57         5
           8       1.00      1.00      1.00         2
           9       0.50      1.00      0.67         2
          10       1.00      1.00      1.00         4
          11       1.00      1.00      1.00         3
          12       1.00      1.00      1.00         1
          13       1.00      1.00      1.00         2
          14       1.00      1.00      1.00         3
          15       1.00      1.00      1.00         3
          16       0.75      0.60      0.67         5
          17       

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.8s finished
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


- solver = 'liblinear'

In [10]:
PCA_LR(n_components = 0.8, solver = 'liblinear')

[LibLinear]93.33%

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         3
           1       0.67      1.00      0.80         2
           2       1.00      1.00      1.00         3
           3       0.75      0.75      0.75         4
           4       0.67      1.00      0.80         2
           5       1.00      1.00      1.00         4
           6       1.00      0.67      0.80         3
           7       1.00      0.60      0.75         5
           8       1.00      1.00      1.00         2
           9       0.50      1.00      0.67         2
          10       1.00      1.00      1.00         4
          11       1.00      1.00      1.00         3
          12       0.00      0.00      0.00         1
          13       1.00      1.00      1.00         2
          14       1.00      1.00      1.00         3
          15       1.00      1.00      1.00         3
          16       1.00      0.80      0.89         5
        

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


- solver = 'newton-cg'

In [11]:
PCA_LR(n_components = 0.8, solver = 'newton-cg')

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


92.50%

              precision    recall  f1-score   support

           0       1.00      0.67      0.80         3
           1       1.00      1.00      1.00         2
           2       0.75      1.00      0.86         3
           3       1.00      1.00      1.00         4
           4       1.00      1.00      1.00         2
           5       1.00      1.00      1.00         4
           6       1.00      1.00      1.00         3
           7       1.00      0.40      0.57         5
           8       1.00      1.00      1.00         2
           9       0.50      1.00      0.67         2
          10       1.00      1.00      1.00         4
          11       1.00      1.00      1.00         3
          12       1.00      1.00      1.00         1
          13       1.00      1.00      1.00         2
          14       1.00      1.00      1.00         3
          15       1.00      1.00      1.00         3
          16       0.75      0.60      0.67         5
          17       

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.4s finished
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## 結論:
### PCA的AT&T資料，各個solver('lbfgs','liblinear','newton-cg')表現均差不多，準確率均在90%以上，也對於40筆資料來說表現絕對不差。

## 若成分比例採0.6

In [45]:
PCA_LR(n_components = 0.6, solver = 'lbfgs')

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


75.83%

              precision    recall  f1-score   support

           0       1.00      0.67      0.80         3
           1       1.00      1.00      1.00         2
           2       1.00      0.67      0.80         3
           3       0.75      0.75      0.75         4
           4       0.67      1.00      0.80         2
           5       1.00      1.00      1.00         4
           6       0.50      0.33      0.40         3
           7       1.00      0.20      0.33         5
           8       1.00      1.00      1.00         2
           9       0.67      1.00      0.80         2
          10       1.00      1.00      1.00         4
          11       1.00      1.00      1.00         3
          12       0.00      0.00      0.00         1
          13       1.00      1.00      1.00         2
          14       0.60      1.00      0.75         3
          15       1.00      1.00      1.00         3
          16       0.75      0.60      0.67         5
          17       

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    2.6s finished
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## 結論:
### 可以發現當成分比例採0.8時，準確率都還維持在90%以上，當成分比例採0.6時，準確率已經下滑至75%，所以可以推測出當成分比例在0.8左右時，在降低維度的同時保留原本資料的重要資訊，所以接下來的成分比例都將採0.8

## 標準化後原始資料+SVC

### one vs one

- kernel = 'linear'

In [12]:
from sklearn.svm import SVC,LinearSVC

def clf_SVC(C, opts, clf_svm):
    clf_svm.fit(X_train_,y_train)
    predictions = clf_svm.predict(X_test_)
    print(classification_report(y_test, predictions))

In [20]:
C = 1
opts = dict(C = C, decision_function_shape = 'ovo', \
             tol = 1e-6, max_iter = int(1e6))
clf_svm = SVC(kernel = 'linear', **opts)
clf_SVC(C = C, opts = opts, clf_svm = clf_svm)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         3
           1       1.00      1.00      1.00         2
           2       1.00      1.00      1.00         3
           3       1.00      1.00      1.00         4
           4       1.00      1.00      1.00         2
           5       1.00      1.00      1.00         4
           6       1.00      1.00      1.00         3
           7       1.00      0.80      0.89         5
           8       1.00      1.00      1.00         2
           9       0.67      1.00      0.80         2
          10       1.00      1.00      1.00         4
          11       1.00      1.00      1.00         3
          12       1.00      1.00      1.00         1
          13       1.00      1.00      1.00         2
          14       1.00      1.00      1.00         3
          15       1.00      1.00      1.00         3
          16       1.00      0.60      0.75         5
          17       1.00    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


- kernel = 'rbf'

In [21]:
C = 1
opts = dict(C = C, decision_function_shape = 'ovo', \
             tol = 1e-6, max_iter = int(1e6))
clf_svm = SVC(kernel = 'rbf', **opts)
clf_SVC(C = C, opts = opts, clf_svm = clf_svm)

              precision    recall  f1-score   support

           0       0.75      1.00      0.86         3
           1       1.00      1.00      1.00         2
           2       1.00      1.00      1.00         3
           3       0.75      0.75      0.75         4
           4       1.00      1.00      1.00         2
           5       1.00      1.00      1.00         4
           6       1.00      1.00      1.00         3
           7       1.00      0.60      0.75         5
           8       0.67      1.00      0.80         2
           9       0.50      1.00      0.67         2
          10       1.00      1.00      1.00         4
          11       1.00      1.00      1.00         3
          12       0.33      1.00      0.50         1
          13       1.00      1.00      1.00         2
          14       1.00      1.00      1.00         3
          15       1.00      1.00      1.00         3
          16       1.00      0.60      0.75         5
          17       1.00    

- kernel = 'poly'

In [22]:
C = 1
opts = dict(C = C, decision_function_shape = 'ovo', \
             tol = 1e-6, max_iter = int(1e6))
clf_svm = SVC(kernel = 'poly', **opts)
clf_SVC(C = C, opts = opts, clf_svm = clf_svm)

              precision    recall  f1-score   support

           0       0.50      0.33      0.40         3
           1       1.00      1.00      1.00         2
           2       0.00      0.00      0.00         3
           3       0.00      0.00      0.00         4
           4       1.00      0.50      0.67         2
           5       1.00      1.00      1.00         4
           6       1.00      0.33      0.50         3
           7       1.00      0.40      0.57         5
           8       0.00      0.00      0.00         2
           9       0.50      0.50      0.50         2
          10       1.00      1.00      1.00         4
          11       1.00      0.67      0.80         3
          12       0.00      0.00      0.00         1
          13       0.00      0.00      0.00         2
          14       0.00      0.00      0.00         3
          15       1.00      0.67      0.80         3
          16       0.75      0.60      0.67         5
          17       1.00    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## 結論:
### 在決策函數中選擇一對一(one vs one)的模式，可以看到當 kernel 選擇 'linear' 準確率有最高97%，kernel 選擇 'rbf' 準確率有91%，而 kernel 選擇 'poly' 準確率只有48%，表現得不好

### one vs the rest

In [13]:
C = 1
opts = dict(C = C,tol = 1e-6,max_iter = int(1e6))
clf_svm = LinearSVC(**opts)
clf_SVC(C = C, opts = opts, clf_svm = clf_svm)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         3
           1       1.00      1.00      1.00         2
           2       1.00      1.00      1.00         3
           3       1.00      1.00      1.00         4
           4       1.00      1.00      1.00         2
           5       1.00      1.00      1.00         4
           6       1.00      1.00      1.00         3
           7       1.00      1.00      1.00         5
           8       0.50      1.00      0.67         2
           9       1.00      1.00      1.00         2
          10       1.00      1.00      1.00         4
          11       1.00      1.00      1.00         3
          12       0.00      0.00      0.00         1
          13       1.00      1.00      1.00         2
          14       1.00      1.00      1.00         3
          15       1.00      1.00      1.00         3
          16       1.00      0.80      0.89         5
          17       1.00    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## 結論:
### 1. 當模式為一對其他（one vs the rest），準確率為94%
### 2. 模式為 一對一 跟 一對其他，可以看到準確率皆在90%以上，相較於一對其他，一對一的模式的準確率較高

## PCA主成分分析(成分比例採0.8) + SVM

### one vs one

- kernel = 'linear'

In [23]:
from sklearn.decomposition import PCA

pca = PCA(n_components=0.8).fit(X_train_)
Z_train = pca.transform(X_train_) #降維
Z_test = pca.transform(X_test_)
C = 1
opts = dict(C = C, decision_function_shape = 'ovo', \
             tol = 1e-6, max_iter = int(1e6))
clf_svm = SVC(kernel = 'linear', **opts)
clf_svm.fit(Z_train,y_train)
predictions = clf_svm.predict(Z_test)
print(classification_report(y_test, predictions))

              precision    recall  f1-score   support

           0       1.00      0.67      0.80         3
           1       1.00      1.00      1.00         2
           2       1.00      1.00      1.00         3
           3       0.67      1.00      0.80         4
           4       1.00      1.00      1.00         2
           5       1.00      1.00      1.00         4
           6       1.00      1.00      1.00         3
           7       1.00      0.40      0.57         5
           8       0.67      1.00      0.80         2
           9       0.50      1.00      0.67         2
          10       1.00      1.00      1.00         4
          11       1.00      1.00      1.00         3
          12       0.50      1.00      0.67         1
          13       1.00      1.00      1.00         2
          14       1.00      1.00      1.00         3
          15       1.00      1.00      1.00         3
          16       0.75      0.60      0.67         5
          17       1.00    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### one vs the rest

In [24]:
from sklearn.decomposition import PCA

pca = PCA(n_components=0.8).fit(X_train_)
Z_train = pca.transform(X_train_) #降維
Z_test = pca.transform(X_test_)
C = 1
opts = dict(C=C,tol = 1e-6,max_iter = int(1e6))
clf_svm = SVC(kernel = 'linear',**opts)
clf_svm.fit(Z_train,y_train)
predictions = clf_svm.predict(Z_test)
print(classification_report(y_test, predictions))

              precision    recall  f1-score   support

           0       1.00      0.67      0.80         3
           1       1.00      1.00      1.00         2
           2       1.00      1.00      1.00         3
           3       0.67      1.00      0.80         4
           4       1.00      1.00      1.00         2
           5       1.00      1.00      1.00         4
           6       1.00      1.00      1.00         3
           7       1.00      0.40      0.57         5
           8       0.67      1.00      0.80         2
           9       0.50      1.00      0.67         2
          10       1.00      1.00      1.00         4
          11       1.00      1.00      1.00         3
          12       0.50      1.00      0.67         1
          13       1.00      1.00      1.00         2
          14       1.00      1.00      1.00         3
          15       1.00      1.00      1.00         3
          16       0.75      0.60      0.67         5
          17       1.00    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## 結論:
### 當成分比例採0.8時，不管選擇一對一或一對其他都能維持準確率在90%

## 標準化後原始資料 + 神經網路NN

- hidden_layers = (30,) , activation = 'logistic', solver = 'adam'

In [25]:
from sklearn.neural_network import MLPClassifier

hidden_layers = (30,)
activation = 'logistic'
opts = dict(hidden_layer_sizes = hidden_layers,verbose = True,activation = activation,tol = 1e-6,max_iter = int(1e6))
solver = 'adam'
clf_MLP = MLPClassifier(solver = solver, **opts)
clf_MLP.fit(X_train_,y_train)
predictions = clf_MLP.predict(X_test_)
print(classification_report(y_test,predictions))

Iteration 1, loss = 3.71675275
Iteration 2, loss = 3.42003555
Iteration 3, loss = 3.28374889
Iteration 4, loss = 3.19359201
Iteration 5, loss = 3.12137016
Iteration 6, loss = 3.06066795
Iteration 7, loss = 3.00514139
Iteration 8, loss = 2.95749885
Iteration 9, loss = 2.91391547
Iteration 10, loss = 2.87297481
Iteration 11, loss = 2.83537261
Iteration 12, loss = 2.79767338
Iteration 13, loss = 2.76317905
Iteration 14, loss = 2.73207044
Iteration 15, loss = 2.70069477
Iteration 16, loss = 2.67061858
Iteration 17, loss = 2.64200325
Iteration 18, loss = 2.61306638
Iteration 19, loss = 2.58360261
Iteration 20, loss = 2.55615918
Iteration 21, loss = 2.52968527
Iteration 22, loss = 2.50283960
Iteration 23, loss = 2.47616920
Iteration 24, loss = 2.45008440
Iteration 25, loss = 2.42436367
Iteration 26, loss = 2.39912591
Iteration 27, loss = 2.37407067
Iteration 28, loss = 2.34926779
Iteration 29, loss = 2.32445058
Iteration 30, loss = 2.30035931
Iteration 31, loss = 2.27677253
Iteration 32, los

- hidden_layers = (30,) , activation = 'relu', solver = 'adam'

In [26]:
from sklearn.neural_network import MLPClassifier

hidden_layers = (30,)
activation = 'relu'
opts = dict(hidden_layer_sizes = hidden_layers,verbose = True,activation = activation,tol = 1e-6,max_iter = int(1e6))
solver = 'adam'
clf_MLP = MLPClassifier(solver = solver, **opts)
clf_MLP.fit(X_train_,y_train)
predictions = clf_MLP.predict(X_test_)
print(classification_report(y_test,predictions))

Iteration 1, loss = 3.92284120
Iteration 2, loss = 2.70205202
Iteration 3, loss = 2.03790412
Iteration 4, loss = 1.54289766
Iteration 5, loss = 1.19753175
Iteration 6, loss = 0.93280993
Iteration 7, loss = 0.73391345
Iteration 8, loss = 0.58356599
Iteration 9, loss = 0.47199727
Iteration 10, loss = 0.38114793
Iteration 11, loss = 0.31257131
Iteration 12, loss = 0.26098446
Iteration 13, loss = 0.22026617
Iteration 14, loss = 0.18549616
Iteration 15, loss = 0.15715687
Iteration 16, loss = 0.13392923
Iteration 17, loss = 0.11164844
Iteration 18, loss = 0.09406502
Iteration 19, loss = 0.07661063
Iteration 20, loss = 0.06265050
Iteration 21, loss = 0.05098174
Iteration 22, loss = 0.04196640
Iteration 23, loss = 0.03555432
Iteration 24, loss = 0.03034489
Iteration 25, loss = 0.02692195
Iteration 26, loss = 0.02385549
Iteration 27, loss = 0.02146664
Iteration 28, loss = 0.01950457
Iteration 29, loss = 0.01790892
Iteration 30, loss = 0.01653100
Iteration 31, loss = 0.01530961
Iteration 32, los

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


- hidden_layers = (30,) , solver = 'lbfgs'

In [27]:
from sklearn.neural_network import MLPClassifier

hidden_layers = (30,)
activation = 'logistic'
opts = dict(hidden_layer_sizes = hidden_layers,verbose = True,activation = activation,tol = 1e-6,max_iter = int(1e6))
solver = 'lbfgs'
clf_MLP = MLPClassifier(solver = solver, **opts)
clf_MLP.fit(X_train_,y_train)
predictions = clf_MLP.predict(X_test_)
print(classification_report(y_test,predictions))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         3
           1       1.00      1.00      1.00         2
           2       1.00      1.00      1.00         3
           3       1.00      1.00      1.00         4
           4       1.00      1.00      1.00         2
           5       0.80      1.00      0.89         4
           6       1.00      1.00      1.00         3
           7       1.00      0.80      0.89         5
           8       1.00      1.00      1.00         2
           9       0.67      1.00      0.80         2
          10       1.00      1.00      1.00         4
          11       0.60      1.00      0.75         3
          12       0.00      0.00      0.00         1
          13       1.00      1.00      1.00         2
          14       1.00      1.00      1.00         3
          15       1.00      1.00      1.00         3
          16       1.00      0.60      0.75         5
          17       1.00    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


- hidden_layers = (512,)

In [28]:
from sklearn.neural_network import MLPClassifier

hidden_layers = (512,)
activation = 'logistic'
opts = dict(hidden_layer_sizes = hidden_layers,verbose = True,activation = activation,tol = 1e-6,max_iter = int(1e6))
solver = 'adam'
clf_MLP = MLPClassifier(solver = solver, **opts)
clf_MLP.fit(X_train_,y_train)
predictions = clf_MLP.predict(X_test_)
print(classification_report(y_test,predictions))

Iteration 1, loss = 3.59783294
Iteration 2, loss = 2.01007045
Iteration 3, loss = 1.38300530
Iteration 4, loss = 0.99612100
Iteration 5, loss = 0.74502940
Iteration 6, loss = 0.56160761
Iteration 7, loss = 0.42544535
Iteration 8, loss = 0.32880152
Iteration 9, loss = 0.25910717
Iteration 10, loss = 0.20695003
Iteration 11, loss = 0.16794196
Iteration 12, loss = 0.13874900
Iteration 13, loss = 0.11522481
Iteration 14, loss = 0.09745428
Iteration 15, loss = 0.08377847
Iteration 16, loss = 0.07266671
Iteration 17, loss = 0.06362277
Iteration 18, loss = 0.05643220
Iteration 19, loss = 0.05051096
Iteration 20, loss = 0.04554897
Iteration 21, loss = 0.04149164
Iteration 22, loss = 0.03807717
Iteration 23, loss = 0.03516921
Iteration 24, loss = 0.03267920
Iteration 25, loss = 0.03054056
Iteration 26, loss = 0.02866553
Iteration 27, loss = 0.02700638
Iteration 28, loss = 0.02549777
Iteration 29, loss = 0.02422493
Iteration 30, loss = 0.02308095
Iteration 31, loss = 0.02202897
Iteration 32, los

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


訓練資料的classification report 

In [39]:
from sklearn.neural_network import MLPClassifier

hidden_layers = (512,)
activation = 'logistic'
opts = dict(hidden_layer_sizes = hidden_layers,verbose = True,activation = activation,tol = 1e-6,max_iter = int(1e6))
solver = 'adam'
clf_MLP = MLPClassifier(solver = solver, **opts)
clf_MLP.fit(X_train_,y_train)
pred = clf_MLP.predict(X_train_)
print(classification_report(y_train,pred))

Iteration 1, loss = 3.58026335
Iteration 2, loss = 1.98147730
Iteration 3, loss = 1.37545512
Iteration 4, loss = 0.98457755
Iteration 5, loss = 0.72261990
Iteration 6, loss = 0.54409384
Iteration 7, loss = 0.41794984
Iteration 8, loss = 0.32115604
Iteration 9, loss = 0.25327760
Iteration 10, loss = 0.20235390
Iteration 11, loss = 0.16494933
Iteration 12, loss = 0.13660005
Iteration 13, loss = 0.11390252
Iteration 14, loss = 0.09672891
Iteration 15, loss = 0.08284534
Iteration 16, loss = 0.07165431
Iteration 17, loss = 0.06236537
Iteration 18, loss = 0.05529642
Iteration 19, loss = 0.04937488
Iteration 20, loss = 0.04464491
Iteration 21, loss = 0.04068999
Iteration 22, loss = 0.03734484
Iteration 23, loss = 0.03448664
Iteration 24, loss = 0.03201388
Iteration 25, loss = 0.02991032
Iteration 26, loss = 0.02814038
Iteration 27, loss = 0.02651698
Iteration 28, loss = 0.02510839
Iteration 29, loss = 0.02386637
Iteration 30, loss = 0.02272688
Iteration 31, loss = 0.02170027
Iteration 32, los

- hidden_layers = (60, 60, 60)

In [29]:
from sklearn.neural_network import MLPClassifier

hidden_layers = (60,60,60)
activation = 'logistic'
opts = dict(hidden_layer_sizes = hidden_layers,verbose = True,activation = activation,tol = 1e-6,max_iter = int(1e6))
solver = 'adam'
clf_MLP = MLPClassifier(solver = solver, **opts)
clf_MLP.fit(X_train_,y_train)
predictions = clf_MLP.predict(X_test_)
print(classification_report(y_test,predictions))

Iteration 1, loss = 3.75694497
Iteration 2, loss = 3.72770521
Iteration 3, loss = 3.70857108
Iteration 4, loss = 3.69493432
Iteration 5, loss = 3.68350455
Iteration 6, loss = 3.67276668
Iteration 7, loss = 3.66606760
Iteration 8, loss = 3.65904794
Iteration 9, loss = 3.65402024
Iteration 10, loss = 3.64854411
Iteration 11, loss = 3.64433902
Iteration 12, loss = 3.64074859
Iteration 13, loss = 3.63639017
Iteration 14, loss = 3.63320657
Iteration 15, loss = 3.62965533
Iteration 16, loss = 3.62604179
Iteration 17, loss = 3.62277868
Iteration 18, loss = 3.61969745
Iteration 19, loss = 3.61567279
Iteration 20, loss = 3.61201167
Iteration 21, loss = 3.60817236
Iteration 22, loss = 3.60452827
Iteration 23, loss = 3.60009472
Iteration 24, loss = 3.59541978
Iteration 25, loss = 3.59079532
Iteration 26, loss = 3.58592309
Iteration 27, loss = 3.58074838
Iteration 28, loss = 3.57550263
Iteration 29, loss = 3.56931398
Iteration 30, loss = 3.56362338
Iteration 31, loss = 3.55693451
Iteration 32, los

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


- hidden_layers = (512, 512, 512)

In [34]:
from sklearn.neural_network import MLPClassifier

hidden_layers = (512,512,512)
activation = 'logistic'
opts = dict(hidden_layer_sizes = hidden_layers,verbose = True,activation = activation,tol = 1e-6,max_iter = int(1e6))
solver = 'adam'
clf_MLP = MLPClassifier(solver = solver, **opts)
clf_MLP.fit(X_train_,y_train)
predictions = clf_MLP.predict(X_test_)
print(classification_report(y_test,predictions))

Iteration 1, loss = 3.75579787
Iteration 2, loss = 3.66156903
Iteration 3, loss = 3.63444604
Iteration 4, loss = 3.59642082
Iteration 5, loss = 3.54088638
Iteration 6, loss = 3.47528850
Iteration 7, loss = 3.41007488
Iteration 8, loss = 3.33491817
Iteration 9, loss = 3.24625292
Iteration 10, loss = 3.13987158
Iteration 11, loss = 3.03355789
Iteration 12, loss = 2.92350691
Iteration 13, loss = 2.80637735
Iteration 14, loss = 2.68491349
Iteration 15, loss = 2.56299280
Iteration 16, loss = 2.43585464
Iteration 17, loss = 2.30935641
Iteration 18, loss = 2.18977938
Iteration 19, loss = 2.06583782
Iteration 20, loss = 1.93991985
Iteration 21, loss = 1.81868251
Iteration 22, loss = 1.70053212
Iteration 23, loss = 1.58641755
Iteration 24, loss = 1.47427676
Iteration 25, loss = 1.36885817
Iteration 26, loss = 1.26850831
Iteration 27, loss = 1.17248843
Iteration 28, loss = 1.08108804
Iteration 29, loss = 0.99112100
Iteration 30, loss = 0.90939707
Iteration 31, loss = 0.83183677
Iteration 32, los

訓練資料的classification report

In [35]:
pred = clf_MLP.predict(X_train_)
print(classification_report(y_train,pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         7
           1       1.00      1.00      1.00         8
           2       1.00      1.00      1.00         7
           3       1.00      1.00      1.00         6
           4       1.00      1.00      1.00         8
           5       1.00      1.00      1.00         6
           6       1.00      1.00      1.00         7
           7       1.00      1.00      1.00         5
           8       1.00      1.00      1.00         8
           9       1.00      1.00      1.00         8
          10       1.00      1.00      1.00         6
          11       1.00      1.00      1.00         7
          12       1.00      1.00      1.00         9
          13       1.00      1.00      1.00         8
          14       1.00      1.00      1.00         7
          15       1.00      1.00      1.00         7
          16       1.00      1.00      1.00         5
          17       1.00    

## 結論:
### 1. hidden layers = (30,)，選擇不同的activation 跟 solver ，效果並沒有差太多，準確率皆在90%以上
### 2. 當hidden layers = (512,) 跟 hidden layers = (60,60,60) 或 hidden layers = (512,512,512)，差別並沒有太大，只用一層512個神經元就可以達到跟三層60個神經元或是三層512個神經元的效果，所以下面採hidden layers = (512,)
### 3. 由2.可以知道，低維度的資料如果擴展到過多的神經元，其實資料無法提供足夠多的細節，也因此造成了許多無用、重複的數據。有時候不是模型大、模型深就是會比較好，要根據不同的狀況做適當的設計才是最好的選擇
### 4. 訓練準確率提升後，實際上測試的準確度沒下降多少，可能因為模型過度訓練到適合訓練資料集，反而不適合其他資料

## PCA主成分分析(成分比例採0.8) + 神經網路

In [36]:
from sklearn.decomposition import PCA

hidden_layers = (512,)
activation = 'logistic'
opts = dict(hidden_layer_sizes = hidden_layers,verbose = True,activation = activation,tol = 1e-6,max_iter = int(1e6))
solver = 'adam'
clf_MLP = MLPClassifier(solver = solver, **opts)
clf_MLP.fit(Z_train,y_train)
predictions = clf_MLP.predict(Z_test)
print(classification_report(y_test,predictions))

Iteration 1, loss = 3.79965357
Iteration 2, loss = 3.46697809
Iteration 3, loss = 3.19959574
Iteration 4, loss = 2.95940895
Iteration 5, loss = 2.73788417
Iteration 6, loss = 2.52542154
Iteration 7, loss = 2.32427314
Iteration 8, loss = 2.13590139
Iteration 9, loss = 1.96169883
Iteration 10, loss = 1.80158187
Iteration 11, loss = 1.65560148
Iteration 12, loss = 1.51771600
Iteration 13, loss = 1.39213266
Iteration 14, loss = 1.27952599
Iteration 15, loss = 1.17495957
Iteration 16, loss = 1.08226297
Iteration 17, loss = 0.99465287
Iteration 18, loss = 0.91619497
Iteration 19, loss = 0.84367556
Iteration 20, loss = 0.77786020
Iteration 21, loss = 0.71790559
Iteration 22, loss = 0.66244893
Iteration 23, loss = 0.61389715
Iteration 24, loss = 0.56813914
Iteration 25, loss = 0.52813199
Iteration 26, loss = 0.49118107
Iteration 27, loss = 0.45783128
Iteration 28, loss = 0.42687692
Iteration 29, loss = 0.39877348
Iteration 30, loss = 0.37288859
Iteration 31, loss = 0.34913107
Iteration 32, los

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [37]:
pred = clf_MLP.predict(Z_train)
print(classification_report(y_train,pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         7
           1       1.00      1.00      1.00         8
           2       1.00      1.00      1.00         7
           3       1.00      1.00      1.00         6
           4       1.00      1.00      1.00         8
           5       1.00      1.00      1.00         6
           6       1.00      1.00      1.00         7
           7       1.00      1.00      1.00         5
           8       1.00      1.00      1.00         8
           9       1.00      1.00      1.00         8
          10       1.00      1.00      1.00         6
          11       1.00      1.00      1.00         7
          12       1.00      1.00      1.00         9
          13       1.00      1.00      1.00         8
          14       1.00      1.00      1.00         7
          15       1.00      1.00      1.00         7
          16       1.00      1.00      1.00         5
          17       1.00    

## 結論:
### 當成分比例採0.8時，準確率還不錯，為91%