### 6.3.2 应用简化版的SMO算法处理小规模数据集
所谓简化，是指对于第二个$\alpha$的选择是完全随机的，没有采用启发式选择
#### 程序清单6-1 SMO算法的辅助函数

In [4]:
import numpy as np


def loadDataSet(fileName):
    """读取数据集,使用逻辑回归那一张的马数据集"""
    dataMat = []
    labelMat = []
    fr = open(fileName)
    for line in fr.readlines():
        lineArr = line.strip().split("\t")
        dataMat.append([float(lineArr[0]), float(lineArr[1])])
        labelMat.append(float(lineArr[2]))
    return dataMat, labelMat


def selectJrand(i, m):
    """
    i:第一个alpha的下标,
    m:alpha的数目
    随机选择出于i不同的j
    """
    j = i
    while j == i:
        j = int(np.random.uniform(0, m))
    return j


def clipAlpha(aj, H, L):
    """
    防止选择到过大或者过小的aj,合并用得函数,相当于np.clip()
    aj:输入的数
    H:上边界
    L:下边界
    """
    if aj > H:
        return H
    if aj < L:
        return L
    return aj


filePath = 'D:\\机器学习实战代码\\machinelearninginaction\\Ch06\\testSet.txt'
dataArr, labelArr = loadDataSet(fileName=filePath)
labelArr[:10]


[-1.0, -1.0, 1.0, -1.0, 1.0, 1.0, 1.0, -1.0, -1.0, -1.0]

#### 程序清单6-2 简化版的SMO算法

In [8]:
def smoSimple(dataMatIn,classLabels,C,toler,maxIter):
    """toler:容错率"""
    dataMatrix=np.mat(dataMatIn)
    labelMat=np.mat(classLabels).transpose()    # 列向量
    m,n=np.shape(dataMatrix)    # 样本数、特征数
    # 初始化
    b=0
    alphas=np.mat(np.zeros((m,1)))
    iter=0
    
    # 外层迭代
    while iter<maxIter:
        alphaPairsChanged=0
        for i in range(m):
            fXi=float(np.multiply(alphas,labelMat).T*\
                (dataMatrix*dataMatrix[i,:].T))+b
            Ei=fXi-float(labelMat[i])    # 计算误差
            if ((labelMat[i]*Ei < -toler) and (alphas[i] < C)) or ((labelMat[i]*Ei > -toler) and (alphas[i] > 0)):
                j=selectJrand(i,m)
                fXj=float(np.multiply(alphas,labelMat).T*\
                    (dataMatrix*dataMatrix[j,:].T)) + b
                Ej=fXj-float(labelMat[j])    # alpha_j 对应的样本对应的误差
                
                alphaIold=alphas[i].copy()
                alphaJold=alphas[j].copy()
                
                # 获得不同情况下的上下界
                if (labelMat[i]!=labelMat[j]):
                    L=np.max((0,alphas[j]-alphas[i]))
                    H=np.min((C,C+alphas[j]-alphas[i]))
                else:
                    L=np.max((0,alphas[i]+alphas[j]-C))
                    H=np.min((C,alphas[i]+alphas[j]))
                    
                if L==H:
                    print('L==H!')
                    continue
                
                eta=2*dataMatrix[i,:]*dataMatrix[j,:].T-\
                    dataMatrix[i,:]*dataMatrix[i,:].T-\
                        dataMatrix[j,:]*dataMatrix[j,:].T
                
                if eta>=0:
                    print('eta>=0!')
                    continue
                
                # 当ets<0时,更新alpha_j
                alphas[j]-=labelMat[j]*(Ei-Ej)/eta
                alphas[j]=clipAlpha(alphas[j],H,L)
                
                # alpha_j的更新幅度过于小也不行：
                if np.abs(alphas[j]-alphaJold)<0.00001:
                    print('j is not moving enough!')
                    continue
                
                # 更新alpha_i
                alphas[i]+=labelMat[j]*labelMat[i]*(alphaJold-alphas[j])
                
                # 计算常数项b
                b1=b-Ei-labelMat[i]*(alphas[i]-alphaIold)*dataMatrix[i,:]*dataMatrix[i,:].T*\
                    labelMat[j]*(alphas[j]-alphaJold)*dataMatrix[i,:]*dataMatrix[j,:].T
                b2=b-Ej-labelMat[i]*(alphas[i]-alphaIold)*dataMatrix[i,:]*dataMatrix[j,:].T*\
                    labelMat[j]*(alphas[j]-alphaJold)*dataMatrix[j,:]*dataMatrix[j,:].T
                    
                if (0<alphas[i]) and (C>alphas[j]):
                    b=b1
                elif (0<alphas[j]) and (C>alphas[j]):
                    b=b2
                else:
                    b=(b1+b2)/2
                
                alphaPairsChanged+=1
                print('iter:%d,下标i:%d,alpha pairs changed:%d'%(iter,i,alphaPairsChanged))
        
        # 检查alpha是否做了更新，如果有更新，就一直更新，直到alpha不更新为止
        if alphaPairsChanged==0:
            iter+=1
        else:
            iter=0
        print('iteration number: %d'%iter)
    
    return b,alphas


smoSimple(dataArr,labelArr,0.6,0.001,40)

  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


L==H!
iter:0,下标i:1,alpha pairs changed:1
iter:0,下标i:2,alpha pairs changed:2
L==H!
j is not moving enough!
L==H!
j is not moving enough!
j is not moving enough!
j is not moving enough!
L==H!
j is not moving enough!
L==H!
L==H!
iter:0,下标i:20,alpha pairs changed:3
L==H!
L==H!
j is not moving enough!
j is not moving enough!
L==H!
j is not moving enough!
L==H!
j is not moving enough!
L==H!
j is not moving enough!
iter:0,下标i:46,alpha pairs changed:4
L==H!
L==H!
iter:0,下标i:53,alpha pairs changed:5
L==H!
L==H!
j is not moving enough!
j is not moving enough!
L==H!
j is not moving enough!
j is not moving enough!
j is not moving enough!
L==H!
L==H!
iteration number: 0
L==H!
j is not moving enough!
j is not moving enough!
j is not moving enough!
iter:0,下标i:10,alpha pairs changed:1
L==H!
j is not moving enough!
j is not moving enough!
iter:0,下标i:23,alpha pairs changed:2
j is not moving enough!
j is not moving enough!
j is not moving enough!
j is not moving enough!
j is not moving enough!
L==H!
j is

(matrix([[-3.83804274]]),
 matrix([[0.        ],
         [0.        ],
         [0.        ],
         [0.        ],
         [0.        ],
         [0.        ],
         [0.        ],
         [0.        ],
         [0.        ],
         [0.        ],
         [0.        ],
         [0.        ],
         [0.        ],
         [0.        ],
         [0.        ],
         [0.        ],
         [0.        ],
         [0.12738676],
         [0.        ],
         [0.        ],
         [0.        ],
         [0.        ],
         [0.        ],
         [0.        ],
         [0.        ],
         [0.        ],
         [0.        ],
         [0.        ],
         [0.        ],
         [0.24137521],
         [0.        ],
         [0.        ],
         [0.        ],
         [0.        ],
         [0.        ],
         [0.        ],
         [0.        ],
         [0.        ],
         [0.        ],
         [0.        ],
         [0.        ],
         [0.        ],
        