In [1]:
# Load libraries
from sklearn.ensemble import AdaBoostClassifier
from sklearn.linear_model import LogisticRegression
from sklearn import datasets
# Import train_test_split function
from sklearn.model_selection import train_test_split
#Import scikit-learn metrics module for accuracy calculation
from sklearn import metrics
import operator
import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt

In [2]:
# Load data
iris = datasets.load_iris()
train_idx,test_idx=train_test_split(range(iris.data.shape[0]),train_size=0.8)
X = iris.data[train_idx]
y = iris.target[train_idx]
test_X=iris.data[test_idx]
test_y=iris.target[test_idx]

In [3]:
test_y

array([0, 2, 1, 1, 2, 0, 2, 1, 0, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 1, 0, 1,
       1, 1, 0, 1, 0, 2, 0, 2])

In [4]:
#train weak learner
def trainfxi(reg, fx, i, x, y, weight):
    error = 0
    tempWeight=[]
    min_index, min_number = min(enumerate(weight), key=operator.itemgetter(1))
    for n in range(len(weight)):
        tempWeight.append(weight[n]*100)
    reg.fit(x,y,sample_weight=tempWeight)
    fx[i]['reg'] = reg
    error=calcErrorNum(reg, x, y, weight)
    alpha = calcAlpha(error)
    fx[i]['alpha'] = alpha
    newWeight = calcNewWeight(alpha, weight, reg, x, y)
    return newWeight

In [5]:
#calculate error rate
def calcErrorNum(reg, x, y, weight):
    error=0
    for i in range(len(x)):
        if reg.predict(x[i].reshape(1,x.shape[1]))!=y[i]:
            error=error+weight[i]
    return error

In [6]:
#calculate weak learner weight
def calcAlpha(error):
    if error!=0:
        alpha = 1/2 * math.log((1-error)/error)
    else:
        alpha=1000000000
    return alpha

In [7]:
#calculate new sample weight
def calcNewWeight(alpha, weight, reg, x, y):
    newWeight = np.zeros(len(weight))
    sumWeight = 0
    for i in range(len(weight)):
        flag = 1
        if reg.predict(x[i].reshape(1,x.shape[1]))!=y[i]: flag=-1
        weighti = weight[i]*math.exp(-alpha*flag)
        newWeight[i]=weighti
        sumWeight += weighti
    newWeight = newWeight / sumWeight
    newWeight=list(newWeight)
    return newWeight

In [8]:
#train adaboost model
def trainAdaBoost(x, y, errorThreshold, maxIterNum):
    fx = {}
    weight = []
    xNum = len(x)
    times=0
    for i in range(xNum):
        w = float(1/xNum)
        weight.append(w)
 
    for i in range(maxIterNum):
        fx[i] = {}
        newWeight = trainfxi(LogisticRegression(solver='liblinear',multi_class='auto'), fx, i, x, y, weight)
        fx[i]['weight']=newWeight
        weight = newWeight
        fxError = calcFxError(fx, x, y)
        fx[i]['error']=fxError
        times+=1
        if fxError<errorThreshold: break
    
    return fx,times

In [9]:
#calculate adaboost error rate
def calcFxError(fx, x, y):
    errorNum = 0
    for i in range(len(x)):
        fi = 0
        prey=np.zeros(len(np.unique(y)))
        for j in range(len(fx)):
            fxiAlpha = fx[j]['alpha']
            preyi=fx[j]['reg'].predict_proba(x[i].reshape(1,x.shape[1]))
            prey=prey+preyi[0]*fxiAlpha
        max_index, max_number = max(enumerate(prey), key=operator.itemgetter(1))
        if(max_index!=y[i]):errorNum += 1
    return errorNum/len(x)

In [10]:
#prediction function
def pre(fx,x):
    resulty=np.zeros(x.shape[0])
    for i in range(len(x)):
        fi = 0
        prey=np.zeros(len(fx[0]['reg'].coef_))
        for j in range(len(fx)):
            fxiAlpha = fx[j]['alpha']
            preyi=fx[j]['reg'].predict_proba(x[i].reshape(1,x.shape[1]))
            prey=prey+preyi[0]*fxiAlpha
        max_index, max_number = max(enumerate(prey), key=operator.itemgetter(1))
        resulty[i]=max_index
    return resulty

In [11]:

fx,times=trainAdaBoost(X,y,0.0001,50)
fx

{0: {'reg': LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                     intercept_scaling=1, l1_ratio=None, max_iter=100,
                     multi_class='auto', n_jobs=None, penalty='l2',
                     random_state=None, solver='liblinear', tol=0.0001, verbose=0,
                     warm_start=False),
  'alpha': 1.4722194895832201,
  'weight': [0.004385964912280699,
   0.004385964912280699,
   0.004385964912280699,
   0.004385964912280699,
   0.004385964912280699,
   0.004385964912280699,
   0.08333333333333326,
   0.004385964912280699,
   0.004385964912280699,
   0.004385964912280699,
   0.004385964912280699,
   0.004385964912280699,
   0.004385964912280699,
   0.004385964912280699,
   0.004385964912280699,
   0.08333333333333326,
   0.004385964912280699,
   0.004385964912280699,
   0.004385964912280699,
   0.004385964912280699,
   0.004385964912280699,
   0.004385964912280699,
   0.004385964912280699,
   0.004385964912280699,
   0.00438

In [12]:
print("training score:",metrics.accuracy_score(y, pre(fx,X)),"test score:",metrics.accuracy_score(test_y,pre(fx,test_X) ))

training score: 0.9833333333333333 test score: 0.9666666666666667


In [13]:
#调sklearn库测试
log=LogisticRegression(solver='liblinear',multi_class='auto')
abc = AdaBoostClassifier(algorithm="SAMME")
abc.fit(X,y)
y_pred=abc.predict(X)
print("training score:",metrics.accuracy_score(y, y_pred))

training score: 1.0


In [14]:
y_pred=abc.predict(test_X)
print("test score:",metrics.accuracy_score(test_y, y_pred))

test score: 0.9333333333333333
