# 支持向量机
> SVM
>
> * numpy 实现
>
> * 继续采用a9a.txt中的数据
>
> * 本次使用sklearn的工具包来完成

In [1]:
import numpy as np
from matplotlib import pyplot as plt
from sklearn.datasets import load_svmlight_file
from sklearn.metrics import classification_report, accuracy_score
from sklearn import svm

## 读取数据 + 简单预处理

In [2]:
x_val, y_val = load_svmlight_file('./a9a.txt', n_features=123)  # 验证集
x_train, y_train = load_svmlight_file('./a9a_train.txt', n_features=123)  # 训练集

# 类型标签-1 变成 0，方便损失函数计算
y_train[y_train == -1] = 0
y_val[y_val == -1] = 0

x_train = np.array(x_train.todense())
x_test = np.array(x_val.todense())
y_train = np.array(y_train).reshape(len(y_train), )
y_test = np.array(y_val).reshape(len(y_val), )

b = np.ones(x_train.shape[0])
x_train = np.insert(x_train, 123, values=b, axis=1)

b = np.ones(x_test.shape[0])
x_test = np.insert(x_test, 123, values=b, axis=1)

theta = np.random.normal(size=(x_train.shape[1], ))

## 功能函数

In [3]:
def plot_hyperplane(clf, X, y,
                    h=0.02,
                    draw_sv=True,
                    title='hyperplan'):
    """画图"""
    # create a mesh to plot in
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h))
    plt.title(title)
    plt.xlim(xx.min(), xx.max())
    plt.ylim(yy.min(), yy.max())
    plt.xticks(())
    plt.yticks(())

    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
    # Put the result into a color plot
    Z = Z.reshape(xx.shape)
    plt.contourf(xx, yy, Z, cmap='hot', alpha=0.5)

    markers = ['o', 's', '^']
    colors = ['b', 'r', 'c']
    labels = np.unique(y)
    for label in labels:
        plt.scatter(X[y==label][:, 0],
                    X[y==label][:, 1],
                    c=colors[label],
                    marker=markers[label])
    if draw_sv:
        sv = clf.support_vectors_
        plt.scatter(sv[:, 0], sv[:, 1], c='y', marker='x')



## 功能函数

In [4]:
def training_svm(x_train, x_test, y_train, y_test):
    """训练函数"""
    clf = svm.SVC(C=1.0, kernel='rbf', gamma=0.5)  # 选择高斯核函数
    clf.fit(x_train, y_train)  # 开始训练

    print('系数:%s \n\n偏置量 %s' % (clf.coef_, clf.intercept_))  # 参数值

    print('\n\n评分: %.2f' % clf.score(x_test, y_test))  # 评分
    predictions = clf.predict(x_test)  # 预测
    print(classification_report(y_test, predictions))  # 打印预测报告
    print("准确度", accuracy_score(y_test, predictions))  # 准确度

    # 画图打印
    plt.figure(figsize=(10, 10), dpi=144)
    plot_hyperplane(clf, x_test, y_test, title="高斯核函数")


## 开始训练

In [None]:
training_svm(x_train, x_val, y_train, y_test)