In [1]:
import numpy as np

In [2]:
def loadSimpData():
    datMat = np.matrix([[1.,2.1],
                        [2.,1.1],
                        [1.3,1.],
                        [1.,1.],
                        [2.,1.]])
    classLabels = [1.0,1.0,-1.0,-1.0,1.0]
    return datMat,classLabels

In [20]:
def stumpClassify(dataMatrix,dimen,threshVal,threshIneq):
    retArray = np.ones((np.shape(dataMatrix)[0],1))
    if threshIneq == 'It':
        retArray[dataMatrix[:,dimen] <= threshVal] = -1.0
    else:
        retArray[dataMatrix[:,dimen] > threshVal] = -1.0
    return retArray

In [32]:
def buildStump(dataArr,classLabels,D):
    dataMatrix = np.mat(dataArr);labelMat = np.mat(classLabels).T
    m,n = np.shape(dataMatrix)
    numSetps = 10.0
    bestStump  = {}
    bestClasEst = np.mat(np.zeros((m,1)))
    minError = np.inf
    for i in range(n):
        rangeMin = dataMatrix[:,i].min();rangeMax = dataMatrix[:,i].max();
        stepSize = (rangeMax - rangeMin)/numSetps
        for j in range(-1,int(numSetps) + 1):
            for inequal in ['lt','gt']:
                threshVal = (rangeMin + float(j) * stepSize)
                predictedVals = stumpClassify(dataMatrix,i,threshVal,inequal)
                errArr = np.mat(np.ones((m,1)))
                errArr[predictedVals == labelMat] = 0
                weightedError = D.T*errArr
                #print("split: dim %d,thresh %.2f,thresh ineqal: %s,the weighted error is %.3f" % (i,threshVal,inequal,weightedError))
                if weightedError < minError:
                    minError = weightedError
                    bestClasEst = predictedVals.copy()
                    bestStump['dim'] = i
                    bestStump['thresh'] = threshVal
                    bestStump['ineq'] = inequal
    return bestStump,minError,bestClasEst

In [22]:
dataMat,classLabel = loadSimpData()

In [23]:
D = np.mat(np.ones((5,1))/5);D

matrix([[0.2],
        [0.2],
        [0.2],
        [0.2],
        [0.2]])

In [24]:
buildStump(dataMat,classLabel,D)

split: dim 0,thresh 0.90,thresh ineqal: lt,the weighted error is 0.600
split: dim 0,thresh 0.90,thresh ineqal: gt,the weighted error is 0.600
split: dim 0,thresh 1.00,thresh ineqal: lt,the weighted error is 0.600
split: dim 0,thresh 1.00,thresh ineqal: gt,the weighted error is 0.600
split: dim 0,thresh 1.10,thresh ineqal: lt,the weighted error is 0.600
split: dim 0,thresh 1.10,thresh ineqal: gt,the weighted error is 0.600
split: dim 0,thresh 1.20,thresh ineqal: lt,the weighted error is 0.600
split: dim 0,thresh 1.20,thresh ineqal: gt,the weighted error is 0.600
split: dim 0,thresh 1.30,thresh ineqal: lt,the weighted error is 0.800
split: dim 0,thresh 1.30,thresh ineqal: gt,the weighted error is 0.800
split: dim 0,thresh 1.40,thresh ineqal: lt,the weighted error is 0.800
split: dim 0,thresh 1.40,thresh ineqal: gt,the weighted error is 0.800
split: dim 0,thresh 1.50,thresh ineqal: lt,the weighted error is 0.800
split: dim 0,thresh 1.50,thresh ineqal: gt,the weighted error is 0.800
split:

({'dim': 0, 'thresh': 2.0, 'ineq': 'lt'}, matrix([[0.4]]), array([[1.],
        [1.],
        [1.],
        [1.],
        [1.]]))

In [54]:
def adaBoostTrainDS(dataArr,classLabels,numIt = 40):
    weakClassArr = []
    m = np.shape(dataArr)[0]
    D = np.mat(np.ones((m,1))/m)
    aggClassEst = np.mat(np.zeros((m,1)))
    for i in range(numIt):
        bestStump,error,classEst = buildStump(dataArr,classLabels,D)
        print("D:",D.T)      
        alpha = float(0.5*np.log((1.0-error)/max(error,1e-16)))
        bestStump['alpha'] = alpha
        weakClassArr.append(bestStump)
        print("classEst: ",classEst.T)
        expon = np.multiply(-1*alpha*np.mat(classLabels).T,classEst)
        D = np.multiply(D,np.exp(expon))
        D = D/D.sum()
        aggClassEst += alpha * classEst
        print("aggClassEst: ",aggClassEst.T)
        aggErrors = np.multiply(np.sign(aggClassEst) != np.mat(classLabels).T,np.ones((m,1)))
        errorRate = aggErrors.sum()/m
        print("total error: ",errorRate,"\n")
        if errorRate == 0.0: break
    return weakClassArr

In [55]:
classifierArray = adaBoostTrainDS(dataMat,classLabel,9)

D: [[0.2 0.2 0.2 0.2 0.2]]
classEst:  [[1. 1. 1. 1. 1.]]
aggClassEst:  [[0.20273255 0.20273255 0.20273255 0.20273255 0.20273255]]
total error:  0.4 

D: [[0.16666667 0.16666667 0.25       0.25       0.16666667]]
classEst:  [[1. 1. 1. 1. 1.]]
aggClassEst:  [[0.20273255 0.20273255 0.20273255 0.20273255 0.20273255]]
total error:  0.4 

D: [[0.16666667 0.16666667 0.25       0.25       0.16666667]]
classEst:  [[-1. -1. -1. -1. -1.]]
aggClassEst:  [[0.20273255 0.20273255 0.20273255 0.20273255 0.20273255]]
total error:  0.4 

D: [[0.16666667 0.16666667 0.25       0.25       0.16666667]]
classEst:  [[-1. -1. -1. -1. -1.]]
aggClassEst:  [[0.20273255 0.20273255 0.20273255 0.20273255 0.20273255]]
total error:  0.4 

D: [[0.16666667 0.16666667 0.25       0.25       0.16666667]]
classEst:  [[-1. -1. -1. -1. -1.]]
aggClassEst:  [[0.20273255 0.20273255 0.20273255 0.20273255 0.20273255]]
total error:  0.4 

D: [[0.16666667 0.16666667 0.25       0.25       0.16666667]]
classEst:  [[-1. -1. -1. -1. -1.]