# 第三题：支持向量机的分类任务

实验内容：
1. 使用支持向量机完成Breast_Cancer_Wisconsin数据集分类任务
2. 使用不同核函数和惩罚系数C在训练集上训练模型，并分别计算对应模型在测试集的精度，查准率，查全率，F1值，将结果填写到下表中。

| 核函数 | C | 精度 | 查准率 | 查全率 | F1| 
| - | - | - | - | - | - |
rbf | 0.1 | 0.883 | 0.979 | 0.712 | 0.825 | 
rbf | 1 | 0.883 | 0.926 | 0.758 | 0.833 | 
linear | 0.1 | 0.924 | 0.921 | 0.879 | 0.899 | 
linear | 1 | 0.93 | 0.922 | 0.894 | 0.908 | 
sigmoid | 0.1 | 0.614 | 0.0 | 0.0 | 0.0 | 
sigmoid | 1 | 0.444 | 0.226 | 0.182 | 0.202 | 

In [None]:
# 导入数据
import numpy as np
import pandas as pd
data = pd.read_csv('data\Breast_Cancer_Wisconsin\data')
data['diagnosis'] = data['diagnosis'].apply(lambda x:1 if x == "M" else 0)

In [None]:
# 划分数据为特征和标签
data = data.values
x = data[:,2:-1]
y = data[:,1:2].reshape(-1)

In [None]:
# 数据集分割
from sklearn.model_selection import train_test_split
trainX, testX, trainY, testY = train_test_split(x, y, test_size = 0.3, random_state = 32)
trainX.shape, trainY.shape, testX.shape, testY.shape

**注意：计算线性核的时候，要使用 LinearSVC 这个类，不要使用SVC(kernel = 'linear')。LinearSVC不需要设置kernel参数！**

In [None]:
# 引入模型
from sklearn.svm import SVC
from sklearn.svm import LinearSVC

In [None]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score

In [None]:
clf = SVC(kernel="rbf", C=0.1)
clf.fit(trainX, trainY)
predY = clf.predict(testX)
acc = accuracy_score(testY, predY)
prec = precision_score(testY, predY)
rec = recall_score(testY, predY)
f1 = f1_score(testY, predY)
print(f'Kernel: rbf, C: 0.1, Accuracy: {acc:.3}, Precision: {prec:.3}, Recall: {rec:.3}, F1: {f1:.3}')

In [None]:
clf = SVC(kernel="rbf", C=1)
clf.fit(trainX, trainY)
predY = clf.predict(testX)
acc = accuracy_score(testY, predY)
prec = precision_score(testY, predY)
rec = recall_score(testY, predY)
f1 = f1_score(testY, predY)
print(f'Kernel: rbf, C: 1, Accuracy: {acc:.3}, Precision: {prec:.3}, Recall: {rec:.3}, F1: {f1:.3}')

In [None]:
clf = LinearSVC(C=0.1, dual=False)
clf.fit(trainX, trainY)
predY = clf.predict(testX)
acc = accuracy_score(testY, predY)
prec = precision_score(testY, predY)
rec = recall_score(testY, predY)
f1 = f1_score(testY, predY)
print(f'Kernel: linear, C: 0.1, Accuracy: {acc:.3}, Precision: {prec:.3}, Recall: {rec:.3}, F1: {f1:.3}')

In [None]:
clf = LinearSVC(C=1, dual=False)
clf.fit(trainX, trainY)
predY = clf.predict(testX)
acc = accuracy_score(testY, predY)
prec = precision_score(testY, predY)
rec = recall_score(testY, predY)
f1 = f1_score(testY, predY)
print(f'Kernel: linear, C: 1, Accuracy: {acc:.3}, Precision: {prec:.3}, Recall: {rec:.3}, F1: {f1:.3}')

In [None]:
clf = SVC(kernel="sigmoid", C=0.1)
clf.fit(trainX, trainY)
predY = clf.predict(testX)
acc = accuracy_score(testY, predY)
prec = precision_score(testY, predY)
rec = recall_score(testY, predY)
f1 = f1_score(testY, predY)
print(f'Kernel: sigmoid, C: 0.1, Accuracy: {acc:.3}, Precision: {prec:.3}, Recall: {rec:.3}, F1: {f1:.3}')

In [None]:
clf = SVC(kernel="sigmoid", C=1)
clf.fit(trainX, trainY)
predY = clf.predict(testX)
acc = accuracy_score(testY, predY)
prec = precision_score(testY, predY)
rec = recall_score(testY, predY)
f1 = f1_score(testY, predY)
print(f'Kernel: sigmoid, C: 1, Accuracy: {acc:.3}, Precision: {prec:.3}, Recall: {rec:.3}, F1: {f1:.3}')