In [1]:
## 加载所需的函数,
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
cancer = load_breast_cancer()
cancer_data = cancer['data']
cancer_target = cancer['target']
cancer_names = cancer['feature_names']

In [2]:
cancer_data

array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,
        1.189e-01],
       [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,
        8.902e-02],
       [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,
        8.758e-02],
       ...,
       [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
        7.820e-02],
       [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
        1.240e-01],
       [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
        7.039e-02]])

In [3]:
cancer_data[0]

array([1.799e+01, 1.038e+01, 1.228e+02, 1.001e+03, 1.184e-01, 2.776e-01,
       3.001e-01, 1.471e-01, 2.419e-01, 7.871e-02, 1.095e+00, 9.053e-01,
       8.589e+00, 1.534e+02, 6.399e-03, 4.904e-02, 5.373e-02, 1.587e-02,
       3.003e-02, 6.193e-03, 2.538e+01, 1.733e+01, 1.846e+02, 2.019e+03,
       1.622e-01, 6.656e-01, 7.119e-01, 2.654e-01, 4.601e-01, 1.189e-01])

In [4]:
cancer_target

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,
       1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,
       1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,
       0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1,
       1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0,
       0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0,
       1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0,

In [5]:
cancer_names

array(['mean radius', 'mean texture', 'mean perimeter', 'mean area',
       'mean smoothness', 'mean compactness', 'mean concavity',
       'mean concave points', 'mean symmetry', 'mean fractal dimension',
       'radius error', 'texture error', 'perimeter error', 'area error',
       'smoothness error', 'compactness error', 'concavity error',
       'concave points error', 'symmetry error',
       'fractal dimension error', 'worst radius', 'worst texture',
       'worst perimeter', 'worst area', 'worst smoothness',
       'worst compactness', 'worst concavity', 'worst concave points',
       'worst symmetry', 'worst fractal dimension'], dtype='<U23')

In [6]:
## 将数据划分为训练集测试集
cancer_data_train,cancer_data_test,cancer_target_train,cancer_target_test = \
         train_test_split(cancer_data,cancer_target,test_size = 0.2,random_state = 22)

In [7]:
## 数据标准化
stdScaler = StandardScaler().fit(cancer_data_train) #标准差标准化
cancer_trainStd = stdScaler.transform(cancer_data_train)
cancer_testStd = stdScaler.transform(cancer_data_test)

In [8]:
cancer_trainStd

array([[-0.65189103, -0.15680254, -0.58122701, ...,  0.61806352,
         3.25309114,  3.02311263],
       [ 1.12090127, -0.7607802 ,  1.17259239, ...,  1.47984285,
         1.50609584,  1.43256206],
       [-0.19513295,  0.52533693, -0.23892727, ..., -0.05462035,
         0.19671422, -0.69574608],
       ...,
       [-1.49746443, -0.91710383, -1.46041872, ..., -1.04008672,
        -0.77191684, -0.54156005],
       [-0.30646773, -0.18285648, -0.28415233, ...,  0.17167422,
         0.40254832, -0.06223087],
       [ 0.58135579,  0.51586277,  0.5917201 , ...,  0.25537222,
         1.0373476 , -0.43985138]])

In [9]:
cancer_trainStd[0]

array([-0.65189103, -0.15680254, -0.58122701, -0.60685353,  1.0688307 ,
        0.90764819,  0.43813717,  0.08487301,  1.8169707 ,  2.12302781,
        0.27272439, -0.33148675,  0.28964385,  0.01148279, -0.49922137,
        0.47605033,  0.31943569, -0.2313993 ,  0.29919857,  0.63434202,
        0.11699875,  0.39881416,  0.36495532,  0.01555988,  1.39876933,
        2.05806621,  2.0497137 ,  0.61806352,  3.25309114,  3.02311263])

In [10]:
## 建立SVM模型
svm = SVC().fit(cancer_trainStd,cancer_target_train)

In [11]:
cancer_target_pred = svm.predict(cancer_testStd)
print('预测前20个结果为：\n',cancer_target_pred[:20])

预测前20个结果为：
 [1 0 0 0 1 1 1 1 1 1 1 1 0 1 1 1 0 0 1 1]


In [12]:
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score,cohen_kappa_score
print('准确率为：', accuracy_score(cancer_target_test,cancer_target_pred))
print('精确率为：', precision_score(cancer_target_test,cancer_target_pred))
print('召回率为：', recall_score(cancer_target_test,cancer_target_pred))
print('F1值为：', f1_score(cancer_target_test,cancer_target_pred))
print('Cohen\'s Kappa系数为：',cohen_kappa_score(cancer_target_test,cancer_target_pred))

准确率为： 0.9736842105263158
精确率为： 0.9594594594594594
召回率为： 1.0
F1值为： 0.9793103448275862
Cohen's Kappa系数为： 0.9432082364662903


In [13]:
from sklearn.metrics import classification_report
print('分类评价报告为：','\n',classification_report(cancer_target_test,cancer_target_pred))

分类评价报告为： 
               precision    recall  f1-score   support

           0       1.00      0.93      0.96        43
           1       0.96      1.00      0.98        71

    accuracy                           0.97       114
   macro avg       0.98      0.97      0.97       114
weighted avg       0.97      0.97      0.97       114

