In [None]:
# author: xiaolin_daisy
# date: 2017/09/08
# site: XAJTU
from numpy import *
# from numpy import zeros

def loadDataset(filename):
    dataMat = []
    yMat = []
    fr = open(filename)
    for line in fr.readlines():
        line = line.strip().split(",")
        dataMat.append([float(line[0]),float(line[1]),float(line[2]), float(line[3])])
        yMat.append(float(line[4]))
    return dataMat, yMat

class diyStruct: # save all global variables
    def __init__(self,dataMat, yMat, C, toler, kernelParam):
        self.dataMat = dataMat
        self.yMat = yMat
        self.C = C
        self.toler = toler
        self.m = shape(dataMat)[0]
        self.E = mat(zeros((self.m, 2)))
        self.alphas = mat(zeros((self.m, 1)))
        self.b = 0
        self.K = mat(zeros((self.m, self.m)))
        for i in range(self.m):
            self.K[:,i] = transfer2Kernel(self.dataMat, self.dataMat[i,:], kernelParam)

def transfer2Kernel(X, Xi, kernelParam):
    m = shape(X)[0]
    Ktemp = mat(zeros((m, 1)))
    if kernelParam[0]=="rbf":
        for i in range(m):
            xdelta = X[i,:] - Xi
            Ktemp[i] = xdelta * xdelta.T
        Ktemp = exp(-Ktemp/kernelParam[1]**2)
    else: raise NameError("undefined kernel name!")
    return Ktemp

def clipAlpha(alphaJ, L, H):
    if(alphaJ < L):
        alphaJ = L
    if(alphaJ > H):
        alphaJ = H
    return alphaJ

def calcE(alphaI, diyObj):
    yI = float(diyObj.yMat[alphaI])
    gxI = float(multiply(diyObj.alphas, diyObj.yMat).T * diyObj.K[:,alphaI]
                + diyObj.b)
    EI = gxI - yI
    return EI

def selectJ(EI, alphaI, diyObj):
    nonzeroEIndex = nonzero(diyObj.E[:,0].A)[0]
    alphaJ = 0
    EJ = 0
    maxDelta = 0
    if len(nonzeroEIndex) > 1:
        for j in nonzeroEIndex:
            if alphaI == j: continue
            EJtemp = calcE(j, diyObj)
            deltaE = abs(EI - EJtemp)
            if(deltaE > maxDelta):
                maxDelta = deltaE
                alphaJ = j
                EJ = EJtemp
    else:
        alphaJ = alphaI
        while(alphaJ == alphaI):
            alphaJ = int(random.uniform(0, diyObj.m))
        EJ = calcE(alphaJ, diyObj)
    return alphaJ, EJ

def calcb(b1new, b2new):
    b = b1new
    if(b1new != b2new):
        b = (b1new + b2new) / 2
    return b

def iterL(alphaI, diyObj):
    yI = diyObj.yMat[alphaI]
    EI = calcE(alphaI, diyObj)
    diyObj.E[alphaI] = [1, EI]
    # if alpha1 violates KKT
    if((yI * EI > diyObj.toler and diyObj.alphas[alphaI] > 0) or
           (yI * EI < - diyObj.toler and diyObj.alphas[alphaI] < diyObj.C)):
        alphaJ, EJ = selectJ(EI, alphaI, diyObj)
        yJ = diyObj.yMat[alphaJ]
        alpha1old = diyObj.alphas[alphaI].copy()
        alpha2old = diyObj.alphas[alphaJ].copy()
        eta = diyObj.K[alphaI,alphaI] + diyObj.K[alphaJ, alphaJ] \
              - 2 * diyObj.K[alphaI, alphaJ]
        if eta <= 0: return 0
        alpha2newUnclip = alpha2old + yJ * (EI - EJ) / eta
        if(yI == yJ):
            L = max(0, alpha1old + alpha2old - diyObj.C)
            H = min(diyObj.C, alpha1old + alpha2old)
        else:
            L = max(0, alpha2old - alpha1old)
            H = min(diyObj.C, diyObj.C - alpha1old + alpha2old)
        if L == H: return 0
        alpha2new = clipAlpha(alpha2newUnclip, L, H)
        if abs(alpha2new - alpha2old) < 0.00001: return 0
        alpha1new = alpha1old + yI * yJ * (alpha2old - alpha2new)
        b1new = - EI - yI * diyObj.K[alphaI,alphaI] * (alpha1new - alpha1old) \
                - yJ * diyObj.K[alphaJ, alphaI] * (alpha2new - alpha2old) \
                + diyObj.b
        b2new = - EJ - yI * diyObj.K[alphaI,alphaJ] * (alpha1new - alpha1old) \
                - yJ * diyObj.K[alphaJ, alphaJ] * (alpha2new - alpha2old) \
                + diyObj.b
        b = calcb(b1new, b2new)
        diyObj.alphas[alphaI] = alpha1new
        diyObj.alphas[alphaJ] = alpha2new
        diyObj.b = b
        diyObj.E[alphaI] = [1, calcE(alphaI, diyObj)]
        diyObj.E[alphaJ] = [1, calcE(alphaJ, diyObj)]
        return 1
    else: return 0

def smo(dataMat, yMat, C, toler, iterNum, kernelParam):
    diyObj = diyStruct(mat(dataMat), mat(yMat).transpose(), C, toler, kernelParam)
    currentToler = 0
    changedAlphas = 0
    allSet = True
    while((currentToler < iterNum and changedAlphas > 0)) or (allSet):
        changedAlphas = 0
        if allSet:
            for i in range(diyObj.m):                                # alphas of entire dataMat
                changedAlphas += iterL(i, diyObj)
                print("iter:%d i:%d,pairs changed %d"
                      %(currentToler, i, changedAlphas))
            allSet = False
        else:
            alphaIs = nonzero((diyObj.alphas.A > 0) * (diyObj.alphas.A < C))[0] # alphas of support vectors
            for i in alphaIs:
                changedAlphas += iterL(i, diyObj)
                print("iter:%d i:%d,pairs changed %d"
                      %(currentToler, i, changedAlphas))
            if changedAlphas == 0:
                allSet = True
        currentToler += 1
        print("iteration number: %d" % currentToler)
    return diyObj.alphas, diyObj.b

def testSVM():
    dataMat,yMat = loadDataset("bloodTransfusion_noduplicated.txt")
    alphas,b = smo(dataMat, yMat, 200, 0.0001,100, ("rbf",20))

    #yi of testData: 1,1,1,-1,-1,-1,-1,1,1,-1
    testData = [[2,50,12500,98],[0,13,3250,28],[1,16,4000,35],[1,24,6000,77],[4,4,1000,4]
        ,[1,12,3000,35],[4,23,5750,58],[2,7,1750,14],[2,10,2500,28],[1,13,3250,47]]
    m, n = shape(testData)
    testmat = mat(testData)
    for i in range(m):
        kernelEval = transfer2Kernel(mat(dataMat), testmat[i,:],("rbf",20))
        predict = kernelEval.T * multiply(mat(yMat).transpose(), alphas) + b
        print(sign(predict))

if __name__=="__main__":
    print("hello svm")
    testSVM()