### 版本：scikit-learn 0.22.1

# 一、資料匯入

In [1]:
import pandas as pd
df = pd.read_csv("iris.csv",encoding = "big5") #也可load sklearn內建iris datasets
df.head()

Unnamed: 0,花萼長度,花萼寬度,花瓣長度,花瓣寬度,屬種
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


### 切分資料

In [2]:
from sklearn.model_selection import train_test_split
X = df[['花萼長度','花萼寬度','花瓣長度','花瓣寬度']]
y = df[['屬種']]
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.3)

### 標準化

In [3]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

# 二、KNN with Grid Search
可調整k, weight (uniform, distance)

### Grid Search

In [4]:
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier

n_neighbors = [i for i in range(1,11,1)]
weights = ['uniform','distance']

hyperparameters = dict(n_neighbors=n_neighbors, weights=weights)

model = KNeighborsClassifier()
knn = GridSearchCV(model, hyperparameters, cv=5, verbose=0) 
# FutureWarning: The parameter 'iid' is deprecated in 0.22 and will be removed in 0.24. 
 
best_model = knn.fit(X_train_std, y_train.values.ravel())

# 查看最好的超參數
print('n_neighbors: ', best_model.best_estimator_.get_params()['n_neighbors'])
print('weights: ', best_model.best_estimator_.get_params()['weights'])
print('所有超參數: ', best_model.best_estimator_.get_params())

n_neighbors:  1
weights:  uniform
所有超參數:  {'algorithm': 'auto', 'leaf_size': 30, 'metric': 'minkowski', 'metric_params': None, 'n_jobs': None, 'n_neighbors': 1, 'p': 2, 'weights': 'uniform'}


### 使用 Grid Search 的結果來建模

In [5]:
knn = KNeighborsClassifier(n_neighbors=1, weights='uniform')
knn.fit(X_train_std, y_train.values.ravel())

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=1, p=2,
                     weights='uniform')

### 準確度 & 混淆矩陣 (測試資料集)

In [6]:
from sklearn import metrics
print(metrics.classification_report(y_test, knn.predict(X_test_std)))
print(metrics.confusion_matrix(y_test, knn.predict(X_test_std),labels=['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']))

                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        12
Iris-versicolor       0.89      0.89      0.89        19
 Iris-virginica       0.86      0.86      0.86        14

       accuracy                           0.91        45
      macro avg       0.92      0.92      0.92        45
   weighted avg       0.91      0.91      0.91        45

[[12  0  0]
 [ 0 17  2]
 [ 0  2 12]]


# 三、SVM with RandomizedSearchCV
可調整 kernel、C值

### RandomizedSearchCV（隨機搜尋加速運算）

In [7]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.svm import SVC
import numpy as np

C = np.linspace(0.1,10,50)
kernel = ['linear', 'poly', 'rbf', 'sigmoid']

hyperparameters = dict(C=C, kernel=kernel)

model = SVC(gamma='scale')
svc = RandomizedSearchCV(model, hyperparameters, cv=5)
# FutureWarning: The parameter 'iid' is deprecated in 0.22 and will be removed in 0.24.
best_model = svc.fit(X_train_std, y_train.values.ravel())

# 查看最好的超參數
print('C: ', best_model.best_estimator_.get_params()['C'])
print('kernel: ', best_model.best_estimator_.get_params()['kernel'])
print('所有超參數: ', best_model.best_estimator_.get_params())

C:  8.181632653061225
kernel:  rbf
所有超參數:  {'C': 8.181632653061225, 'break_ties': False, 'cache_size': 200, 'class_weight': None, 'coef0': 0.0, 'decision_function_shape': 'ovr', 'degree': 3, 'gamma': 'scale', 'kernel': 'rbf', 'max_iter': -1, 'probability': False, 'random_state': None, 'shrinking': True, 'tol': 0.001, 'verbose': False}


### 使用 RandomizedSearchCV 的結果來建模

In [8]:
svc = SVC(C=8.181632653061225, kernel="rbf")
svc.fit(X_train_std, y_train.values.ravel())

SVC(C=8.181632653061225, break_ties=False, cache_size=200, class_weight=None,
    coef0=0.0, decision_function_shape='ovr', degree=3, gamma='scale',
    kernel='rbf', max_iter=-1, probability=False, random_state=None,
    shrinking=True, tol=0.001, verbose=False)

### 準確度 & 混淆矩陣 (測試資料集)

In [9]:
from sklearn import metrics
print(metrics.classification_report(y_test, knn.predict(X_test_std)))
print(metrics.confusion_matrix(y_test, knn.predict(X_test_std),labels=['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']))

                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        12
Iris-versicolor       0.89      0.89      0.89        19
 Iris-virginica       0.86      0.86      0.86        14

       accuracy                           0.91        45
      macro avg       0.92      0.92      0.92        45
   weighted avg       0.91      0.91      0.91        45

[[12  0  0]
 [ 0 17  2]
 [ 0  2 12]]


### it's your turn...