## 一、线性SVM分类

**1.1 软间隔分类**

In [1]:
import numpy as np
from sklearn import datasets
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC

iris = datasets.load_iris()
X = iris["data"][:, (2, 3)] # 取petal length和petal width两个特征
y = (iris["target"] == 2).astype(np.float64) # 是否为Iris-Virginica

svm_clf = Pipeline((
        ("scaler", StandardScaler()),
        ("linear_svc", LinearSVC(C=1, loss="hinge")),
    ))

svm_clf.fit(X, y)

Pipeline(memory=None,
     steps=[('scaler', StandardScaler(copy=True, with_mean=True, with_std=True)), ('linear_svc', LinearSVC(C=1, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='hinge', max_iter=1000, multi_class='ovr',
     penalty='l2', random_state=None, tol=0.0001, verbose=0))])

In [2]:
svm_clf.predict([[5.5, 1.7]])

array([1.])

## 二、非线性SVM分类

In [3]:
# 解决非线性数据集的一个方法：增加更多的特征，如多项式特征
from sklearn.datasets import make_moons
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures

X, y = make_moons(n_samples=100, noise=0.15, random_state=42)
polynomial_svm_clf = Pipeline((
        ("poly_features", PolynomialFeatures(degree=3)),
        ("scaler", StandardScaler()),
        ("svm_clf", LinearSVC(C=10, loss="hinge"))
    ))

polynomial_svm_clf.fit(X, y)

Pipeline(memory=None,
     steps=[('poly_features', PolynomialFeatures(degree=3, include_bias=True, interaction_only=False)), ('scaler', StandardScaler(copy=True, with_mean=True, with_std=True)), ('svm_clf', LinearSVC(C=10, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='hinge', max_iter=1000, multi_class='ovr',
     penalty='l2', random_state=None, tol=0.0001, verbose=0))])

**2.1 多项式核函数**

In [4]:
from sklearn.svm import SVC
poly_kernel_svm_clf = Pipeline((
        ("scaler", StandardScaler()),
        ("svm_clf", SVC(kernel="poly", degree=3, coef0=1, C=5)) # coef0为核函数中的系数,若等于0，核函数为齐次的
    ))                                                          # degree越大，越容易过拟合
poly_kernel_svm_clf.fit(X, y)

Pipeline(memory=None,
     steps=[('scaler', StandardScaler(copy=True, with_mean=True, with_std=True)), ('svm_clf', SVC(C=5, cache_size=200, class_weight=None, coef0=1,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='poly',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False))])

**2.2 添加相似特征**  
用高斯函数定义相似函数  
缺点：和多项式特征方法一样，相似特征对任何机器学习算法都有用，但计算成本也许很高，尤其在巨大的训练集上

**2.3 高斯核函数**

In [5]:
# 运用RBF核函数，可以达到和增加许多相似特征一样的效果，实际上却没有增加它们
rbf_kernel_svm_clf = Pipeline((
        ("scaler", StandardScaler()),
        ("svm_clf", SVC(kernel="rbf", gamma=5, C=0.001)) # gamma值越大越容易过拟合
    ))
rbf_kernel_svm_clf.fit(X, y)

Pipeline(memory=None,
     steps=[('scaler', StandardScaler(copy=True, with_mean=True, with_std=True)), ('svm_clf', SVC(C=0.001, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=5, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False))])

**2.4 计算复杂性**  
liblinear库中的LinearSVC类可以执行线性SVM的优化算法，但不支持核技巧，时间复杂度为$O\left ( m\times n \right )$  
libsvm库中的SVC类可以执行支持核技巧的算法，时间复杂度在$O\left ( m^{2}\times n \right )$与$O\left ( m^{3}\times n \right )$之间


## 三、SVM回归

优化目标：  
$min \frac{1}{2}\left \| w \right \|^{2}+C\sum_{i=1}^{n}(\xi _{i}+\xi_{i}^{\ast })$  
$s.t.\left\{\begin{matrix}
y_{i}-f(\boldsymbol x_{i},\boldsymbol \omega )\leqslant \varepsilon  _{i}+\xi _{i}^{\ast }\\ 
f(\boldsymbol x_{i},\boldsymbol \omega )-y_{i}\leqslant \varepsilon  _{i}+\xi _{i}\\ 
\xi_{i},\xi _{i}^{\ast }\geqslant 0,i=1,\cdots ,n
\end{matrix}\right.$  
转换成对偶问题：  
$f(\boldsymbol x)=\sum_{i=1}^{n_{sv}}(\alpha _{i}-\alpha _{i}^{\ast })K(\boldsymbol x_{i},\boldsymbol x)$  
$s.t.\left\{\begin{matrix}
0\leq \alpha _{i}\leq C\\ 
0\leq \alpha _{i}^{\ast }\leq C
\end{matrix}\right.$

In [6]:
from sklearn.svm import LinearSVR

svm_reg = LinearSVR(epsilon=1.5)
svm_reg.fit(X, y)

LinearSVR(C=1.0, dual=True, epsilon=1.5, fit_intercept=True,
     intercept_scaling=1.0, loss='epsilon_insensitive', max_iter=1000,
     random_state=None, tol=0.0001, verbose=0)

In [7]:
from sklearn.svm import SVR

svm_poly_reg = SVR(kernel="poly", degree=2, C=100, epsilon=0.1)
svm_poly_reg.fit(X, y)

SVR(C=100, cache_size=200, coef0=0.0, degree=2, epsilon=0.1, gamma='auto',
  kernel='poly', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

## 四、底层  
下面探讨SVM的一些细节，这里只列出目录，详见于书中。

**4.1 决策函数和预测**

**4.2 训练目标**

**4.3 二次规划**

**4.4 对偶问题**

**4.5 核SVM**

**4.6 在线SVM**