# 乳癌資料庫預測SVM分類
>使用scikit-learn 機器學習套件裡的SVR演算法

* (一)引入函式庫及內建乳癌資料集<br>
引入之函式庫如下<br>
sklearn.datasets: 用來匯入內建之乳癌資料集`datasets.load_breast_cancer()`<br>
sklearn.SVR: 支持向量機回歸分析之演算法<br>
matplotlib.pyplot: 用來繪製影像

In [1]:
from sklearn import svm
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

## Step1. 下載資料

In [2]:
breast_cancer=datasets.load_breast_cancer()

In [3]:
features=breast_cancer.data

In [4]:
target=breast_cancer.target

## Step2. 區分訓練集與測試集

In [14]:
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.3, random_state=0)

## Step3. 建模

In [15]:
clf=svm.SVC(kernel='linear', C=1)
clf.fit(X_train,y_train)

SVC(C=1, kernel='linear')

In [19]:
clf=svm.SVC(kernel='rbf')
clf.fit(X_train,y_train)

SVC()

In [23]:
clf=svm.SVC(kernel='sigmoid')
clf.fit(X_train,y_train)

SVC(kernel='sigmoid')

In [27]:
clf=svm.SVC(kernel='poly')
clf.fit(X_train,y_train)

SVC(kernel='poly')

## Step4. 預測

```

```


In [28]:
y_pred = clf.predict(X_test)

## Step5. 準確度分析

In [29]:
print(clf.score(X_train,y_train))
print(clf.score(X_test, y_test))

0.907035175879397
0.9181286549707602


In [30]:
print(accuracy_score(y_test, y_pred))

0.9181286549707602


In [13]:
# 自動調random_state跟kernel，並找出最好與最差預測的random_state+kernel
max_predict_train = 0
max_predict_test = 0
min_predict_train = 1
min_predict_test = 1
kernel = ['linear', 'rbf', 'sigmoid', 'poly']

for i in range(1000):
    X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.3, random_state=i)
    for j in kernel:
        clf=svm.SVC(kernel=j)
        clf.fit(X_train,y_train)
        clf.predict(X_test)
        if max_predict_train < clf.score(X_train,y_train):
            max_predict_train = clf.score(X_train,y_train)
            max_predict_train_random_state = i
            max_predict_train_kernel = j
        if max_predict_test < clf.score(X_test,y_test):
            max_predict_test = clf.score(X_test,y_test)
            max_predict_test_random_state = i
            max_predict_test_kernel = j
        if min_predict_train > clf.score(X_train,y_train):
            min_predict_train = clf.score(X_train,y_train)
            min_predict_train_random_state = i
            min_predict_train_kernel = j
        if min_predict_test > clf.score(X_test,y_test):
            min_predict_test = clf.score(X_test,y_test)
            min_predict_test_random_state = i
            min_predict_test_kernel = j
print("max_predict_train_random_state={0}, max_predict_train={1}, max_predict_train_kernel={2}".format(max_predict_train_random_state, max_predict_train, max_predict_train_kernel))
print("max_predict_test_random_state={0}, max_predict_test={1}, max_predict_test_kernel={2}".format(max_predict_test_random_state, max_predict_test, max_predict_test_kernel))
print("min_predict_train_random_state={0}, min_predict_train={1}, min_predict_train_kernel={2}".format(min_predict_train_random_state, min_predict_train, min_predict_train_kernel))
print("min_predict_test_random_state={0}, min_predict_test={1}, min_predict_test_kernel={2}".format(min_predict_test_random_state, min_predict_test, min_predict_test_kernel))

max_predict_train_random_state=674, max_predict_train=0.9849246231155779, max_predict_train_kernel=linear
max_predict_test_random_state=265, max_predict_test=0.9883040935672515, max_predict_test_kernel=linear
min_predict_train_random_state=170, min_predict_train=0.3944723618090452, min_predict_train_kernel=sigmoid
min_predict_test_random_state=490, min_predict_test=0.3333333333333333, min_predict_test_kernel=sigmoid
