In [1]:
import numpy as np

def createTrainDatas(W, b, start, end, size = 10):
    """
    创建线性可分的训练数据集
    args:
        W - 目标权重系数
        b - 偏移量
        start - 最小值
        end - 最大值
        size - 训练数据集大小
    return:
        X - 训练集特征值
        y - 训练集目标值
    """
    np.random.seed(42)
    X = np.random.uniform(start, end, (size, W.shape[0]))
    y = np.sign(X.dot(W) + b)
    return X, y

def buildLine(W, b, start, end, size = 100):
    """
    构建一条指定的直线
    args:
        W - 权重系数
        b - 偏移量
        start - 最小值
        end - 最大值
        size - 组成线的点的数量
        w0 * x0 + w1 * x1 + b = 0
    """
    x0 = np.linspace(start, end, size)
    if W[1] == 0:
        x0 = np.ones(size) * (-b / W[0])
        x1 = np.zeros(size)
    else:
        x1 = -(b + W[0] * x0) / W[1]
    return x0, x1

In [2]:
# 坐标轴起始点
start = -10
# 坐标轴结束点
end = 10
# 目标权重系数
W = np.array([5, 4])
b = 0
# 创建线性可分的训练数据集
X, y = createTrainDatas(W, b, start, end, size = 20)

In [3]:
import numpy as np
import matplotlib.pyplot as plt

%matplotlib notebook

plt.rcParams['font.sans-serif'] = ['PingFang HK']  # 选择一个本地的支持中文的字体
fig, ax = plt.subplots()
ax.set_facecolor('#f8f9fa')

x1 = X[y==-1][:, 0]
y1 = X[y==-1][:, 1]
x2 = X[y==1][:, 0]
y2 = X[y==1][:, 1]
p1 = plt.scatter(x1, y1, c='#e63946', marker='o', s=20)
p2 = plt.scatter(x2, y2, c='#457b9d', marker='x', s=20)

x3, y3 = buildLine([0.39, 0.26], -0.13, start, end)
x4, y4 = buildLine([0.19, 0.18], -0.12, start, end)
p3, = plt.plot(x3, y3, '#457b9d')
p4, = plt.plot(x4, y4, '#e63946')

ax.set_title('硬间隔支持向量机', color='#264653')
ax.set_xlabel('X1', color='#264653')
ax.set_ylabel('X2', color='#264653')
ax.tick_params(labelcolor='#264653')
plt.legend([p1, p2, p3, p4], ["-1", "1", "A", "B"], loc="upper right")
plt.show()

<IPython.core.display.Javascript object>

In [4]:
import numpy as np
import matplotlib.pyplot as plt

%matplotlib notebook

plt.rcParams['font.sans-serif'] = ['PingFang HK']  # 选择一个本地的支持中文的字体
fig, ax = plt.subplots()
ax.set_facecolor('#f8f9fa')

x1 = X[y==-1][:, 0]
y1 = X[y==-1][:, 1]
x2 = X[y==1][:, 0]
y2 = X[y==1][:, 1]
p1 = plt.scatter(x1, y1, c='#e63946', marker='o', s=20)
p2 = plt.scatter(x2, y2, c='#457b9d', marker='x', s=20)

x3, y3 = buildLine([0.39609699, 0.26030793], -1.1364341, start, end)
x4, y4 = buildLine([0.39609699, 0.26030793], -0.1364341, start, end)
x5, y5 = buildLine([0.39609699, 0.26030793], 0.8635659, start, end)
p3, = plt.plot(x3, y3, '#457b9d', linestyle='--')
p4, = plt.plot(x4, y4, '#666666')
p5, = plt.plot(x5, y5, '#e63946', linestyle='--')

ax.set_title('硬间隔支持向量机', color='#264653')
ax.set_xlabel('X1', color='#264653')
ax.set_ylabel('X2', color='#264653')
ax.tick_params(labelcolor='#264653')
plt.legend([p1, p2, p3, p4, p5], ["-1", "1", "A", "B", "C"], loc="upper right")
plt.show()

<IPython.core.display.Javascript object>

创建线性可分的训练数据集：

In [5]:
# 坐标轴起始点
start = -10
# 坐标轴结束点
end = 10
# 目标权重系数
W = np.array([5, 4])
b = 0
# 创建线性可分的训练数据集
X, y = createTrainDatas(W, b, start, end, size = 50)

可视化：

In [6]:
import numpy as np
import matplotlib.pyplot as plt

%matplotlib notebook

plt.rcParams['font.sans-serif'] = ['PingFang HK']  # 选择一个本地的支持中文的字体
fig, ax = plt.subplots()
ax.set_facecolor('#f8f9fa')

x1 = X[y==-1][:, 0]
y1 = X[y==-1][:, 1]
x2 = X[y==1][:, 0]
y2 = X[y==1][:, 1]
p1 = plt.scatter(x1, y1, c='#e63946', marker='o', s=20)
p2 = plt.scatter(x2, y2, c='#457b9d', marker='x', s=20)

ax.set_title('硬间隔支持向量机', color='#264653')
ax.set_xlabel('X1', color='#264653')
ax.set_ylabel('X2', color='#264653')
ax.tick_params(labelcolor='#264653')
plt.legend([p1, p2], ["-1", "1"], loc="upper right")
plt.show()

<IPython.core.display.Javascript object>

拟合数据：

In [7]:
from sklearn.svm import SVC

svc = SVC(kernel = "linear")
# 拟合数据
svc.fit(X, y)
# 权重系数
w = svc.coef_
# 截距
b = svc.intercept_
print("w", w, "b", b)

w [[1.12858695 0.79360049]] b [-0.36279147]


可视化：

In [8]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap

%matplotlib notebook

plt.rcParams['font.sans-serif'] = ['PingFang HK']  # 选择一个本地的支持中文的字体
fig, ax = plt.subplots()
ax.set_facecolor('#f8f9fa')

x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
xx, yy = np.meshgrid(np.arange(x_min, x_max, .05), np.arange(y_min, y_max, .05))
Z = svc.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
clist=['#ffadad', '#8ecae6']
newcmp = LinearSegmentedColormap.from_list('point_color', clist)
plt.pcolormesh(xx, yy, Z, cmap = newcmp)
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())

x1 = X[y==-1][:, 0]
y1 = X[y==-1][:, 1]
x2 = X[y==1][:, 0]
y2 = X[y==1][:, 1]
p1 = plt.scatter(x1, y1, c='#e63946', marker='o', s=20)
p2 = plt.scatter(x2, y2, c='#457b9d', marker='x', s=20)

x3, y3 = buildLine(w[0], b - 1, start, end)
x4, y4 = buildLine(w[0], b + 1, start, end)
plt.plot(x3, y3, '#457b9d', linestyle='--')
plt.plot(x4, y4, '#e63946', linestyle='--')

ax.set_title('硬间隔支持向量机', color='#264653')
ax.set_xlabel('X1', color='#264653')
ax.set_ylabel('X2', color='#264653')
ax.tick_params(labelcolor='#264653')
plt.legend([p1, p2], ["-1", "1"], loc="upper right")
plt.show()

<IPython.core.display.Javascript object>



In [9]:
import numpy as np

class SMO:
    """
    硬间隔支持向量机
    序列最小优化算法实现（Sequential minimal optimization/SMO）
    """

    def __init__(self, X, y):
        # 训练样本特征矩阵（N * p）
        self.X = X
        # 训练样本标签向量（N * 1）
        self.y = y
        # 拉格朗日乘子向量（N * 1）
        self.alpha = np.zeros(X.shape[0])
        # 误差向量，默认为负的标签向量（N * 1）
        self.errors = -y
        # 偏移量 
        self.b = 0
        # 权重向量（p * 1）
        self.w = np.zeros(X.shape[1])
        # 代价值
        self.cost = -np.inf

    def fit(self, tol = 1e-6):
        """
        算法来自 John C. Platt 的论文
        https://www.microsoft.com/en-us/research/uploads/prod/1998/04/sequential-minimal-optimization.pdf
        """
        # 更新变化次数
        numChanged = 0
        # 是否检查全部
        examineAll = True
        while numChanged > 0 or examineAll:
            numChanged = 0
            if examineAll:
                for idx in range(X.shape[0]):
                    numChanged += self.update(idx)
            else:
                for idx in range(X.shape[0]):
                    if self.alpha[idx] <= 0:
                        continue
                    numChanged += self.update(idx)
            if examineAll:
                examineAll = False
            elif numChanged == 0:
                examineAll = True
            # 计算代价值
            cost = self.calcCost()
            # 当代价值的变化小于容许的范围时结束算法
            if cost - self.cost <= tol:
                break
            self.cost = cost

    def update(self, idx):
        """
        对下标为 idx 的拉格朗日乘子进行更新
        """
        X = self.X
        y = self.y
        alpha = self.alpha
        # 检查当前拉格朗日乘子是否满足KKT条件，满足条件则直接返回 0
        if self.checkKKT(idx):
            return 0
        if len(alpha[(alpha != 0)]) > 1:
            # 按照｜E1 - E2｜最大的原则寻找第二个待优化的拉格朗日乘子的下标
            jdx = self.selectJdx(idx)
            # 对下标为 idx、jdx 的拉格朗日乘子进行更新，当成功更新时直接返回 1
            if self.updateAlpha(idx, jdx):
                return 1
        # 当未更新成功时，遍历不为零的拉格朗日乘子进行更新
        for jdx in range(X.shape[0]):
            if alpha[jdx] == 0:
                continue
            # 对下标为 idx、jdx 的拉格朗日乘子进行更新，当成功更新时直接返回 1
            if self.updateAlpha(idx, jdx):
                return 1
        # 当依然没有没有更新成功时，遍历为零的拉格朗日乘子进行更新
        for jdx in range(X.shape[0]):
            if alpha[jdx] != 0:
                continue
            # 对下标为 idx、jdx 的拉格朗日乘子进行更新，当成功更新时直接返回 1
            if self.updateAlpha(idx, jdx):
                return 1
        # 依然没有更新时返回 0
        return 0

    def selectJdx(self, idx):
        """
        寻找第二个待优化的拉格朗日乘子的下标
        """
        errors = self.errors
        if errors[idx] > 0:
            # 当误差项大于零时，选择误差向量中最小值的下标
            return np.argmin(errors)
        elif errors[idx] < 0:
            # 当误差项小于零时，选择误差向量中最大值的下标
            return np.argmax(errors)
        else:
            # 当误差项等于零时，选择误差向量中最大值和最小值的绝对值最大的下标
            minJdx = np.argmin(errors)
            maxJdx = np.argmax(errors)
            if max(np.abs(errors[minJdx]), np.abs(errors[maxJdx])) - errors[minJdx]:
                return minJdx
            else:
                return maxJdx

    def calcB(self):
        """
        计算偏移量
        分别计算每一个拉格朗日乘子不为零对应的偏移量后取其平均值
        """
        X = self.X
        y = self.y
        alpha = self.alpha
        alpha_gt = alpha[alpha > 0]
        # 拉格朗日乘子向量中不为零的数量
        alpha_gt_len = len(alpha_gt)
        # 全部为零时直接返回 0
        if alpha_gt_len == 0:
            return 0
        # b = y - Wx，具体算法请参考文章中的说明
        X_gt = X[alpha > 0]
        y_gt = y[alpha > 0]
        alpha_gt_y = np.array(np.multiply(alpha_gt, y_gt)).reshape(-1, 1)
        s = np.sum(np.multiply(alpha_gt_y, X_gt), axis=0)
        return np.sum(y_gt - X_gt.dot(s)) / alpha_gt_len

    def calcCost(self):
        """
        计算代价值
        按照文章中的算法计算即可
        """
        X = self.X
        y = self.y
        alpha = self.alpha
        cost = 0
        for idx in range(X.shape[0]):
            for jdx in range(X.shape[0]):
                cost = cost + (y[idx] * y[jdx] * X[idx].dot(X[jdx]) * alpha[idx] * alpha[jdx])
        return np.sum(alpha) - cost / 2

    def checkKKT(self, idx):
        """
        检查下标为 idx 的拉格朗日乘子是否满足 KKT 条件
        1. alpha >= 0
        2. y * f(x) - 1 >= 0
        3. alpha * (y * f(x) - 1) = 0
        """
        y = self.y
        errors = self.errors
        alpha = self.alpha
        r = errors[idx] * y[idx]
        if (alpha[idx] > 0 and r == 0) or (alpha[idx] == 0 and r >= 0):
            return True
        return False

    def calcE(self):
        """
        计算误差向量
        E = f(x) - y
        """
        X = self.X
        y = self.y
        alpha = self.alpha
        alpha_y = np.array(np.multiply(alpha, y)).reshape(-1, 1)
        errors = X.dot(X.T).dot(alpha_y).T[0] + self.b - y
        return errors

    def calcU(self, idx, jdx):
        """
        计算拉格朗日乘子上界，使两个待优化的拉格朗日乘子同时大于等于0
        按照文章中的算法计算即可
        """
        y = self.y
        alpha = self.alpha
        if y[idx] * y[jdx] == 1:
            return 0
        else:
            return max(0.0, alpha[jdx] - alpha[idx])

    def calcV(self, idx, jdx):
        """
        计算拉格朗日乘子下界，使两个待优化的拉格朗日乘子同时大于等于0
        按照文章中的算法计算即可
        """
        y = self.y
        alpha = self.alpha
        if y[idx] * y[jdx] == 1:
            return alpha[jdx] + alpha[idx]
        else:
            return np.inf

    def updateAlpha(self, idx, jdx):
        """
        对下标为 idx、jdx 的拉格朗日乘子进行更新
        按照文章中的算法计算即可
        """
        if idx == jdx:
            return False
        X = self.X
        y = self.y
        alpha = self.alpha
        errors = self.errors
        # idx 的误差项
        Ei = errors[idx]
        # jdx 的误差项
        Ej = errors[jdx]
        Kii = X[idx].dot(X[idx])
        Kjj = X[jdx].dot(X[jdx])
        Kij = X[idx].dot(X[jdx])
        # 计算 K
        K = Kii + Kjj - 2 * Kij
        oldAlphaIdx = alpha[idx]
        oldAlphaJdx = alpha[jdx]
        # 计算 jdx 的新拉格朗日乘子
        newAlphaJdx = oldAlphaJdx + y[jdx] * (Ei - Ej) / K
        U = self.calcU(idx, jdx)
        V = self.calcV(idx, jdx)
        if newAlphaJdx < U:
            # 当新值超过上界时，修改其为上界
            newAlphaJdx = U
        if newAlphaJdx > V:
            # 当新值低于下界时，修改其为下界
            newAlphaJdx = V
        if oldAlphaJdx == newAlphaJdx:
            # 当新值与旧值相等时，判断为未更新，直接返回
            return False
        # 计算 idx 的新拉格朗日乘子
        newAlphaIdx = oldAlphaIdx + y[idx] * y[jdx] * (oldAlphaJdx - newAlphaJdx)
        # 重新计算偏移量
        self.b = self.calcB()
        # 更新权重向量
        self.w = self.w + y[idx] * (newAlphaIdx - oldAlphaIdx) * X[idx] + y[jdx] * (newAlphaJdx - oldAlphaJdx) * X[jdx]
        # 更新拉格朗日乘子向量
        alpha[idx] = newAlphaIdx
        alpha[jdx] = newAlphaJdx
        # 重新计算误差向量
        self.errors = self.calcE()
        return True

In [10]:
smo = SMO(X, y)
smo.fit()
print("w", smo.w, "b", smo.b)

w [1.12784119 0.79278739] b -0.359467023348705


In [11]:
def hypothesis(X, W, b):
    return np.sign(X.dot(W) + b)

In [12]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap

%matplotlib notebook

plt.rcParams['font.sans-serif'] = ['PingFang HK']  # 选择一个本地的支持中文的字体
fig, ax = plt.subplots()
ax.set_facecolor('#f8f9fa')

x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
xx, yy = np.meshgrid(np.arange(x_min, x_max, .05), np.arange(y_min, y_max, .05))
Z = hypothesis(np.c_[xx.ravel(), yy.ravel()], smo.w, smo.b)
Z = Z.reshape(xx.shape)
clist=['#ffadad', '#8ecae6']
newcmp = LinearSegmentedColormap.from_list('point_color', clist)
plt.pcolormesh(xx, yy, Z, cmap = newcmp)
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())

x1 = X[y==-1][:, 0]
y1 = X[y==-1][:, 1]
x2 = X[y==1][:, 0]
y2 = X[y==1][:, 1]
p1 = plt.scatter(x1, y1, c='#e63946', marker='o', s=20)
p2 = plt.scatter(x2, y2, c='#457b9d', marker='x', s=20)

x3, y3 = buildLine(smo.w, smo.b - 1, start, end)
x4, y4 = buildLine(smo.w, smo.b + 1, start, end)
plt.plot(x3, y3, '#457b9d', linestyle='--')
plt.plot(x4, y4, '#e63946', linestyle='--')

ax.set_title('硬间隔支持向量机', color='#264653')
ax.set_xlabel('X1', color='#264653')
ax.set_ylabel('X2', color='#264653')
ax.tick_params(labelcolor='#264653')
plt.legend([p1, p2], ["-1", "1"], loc="upper right")
plt.show()

<IPython.core.display.Javascript object>

