In [1]:
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier

In [2]:
X = np.array([i for i in range(10)]).reshape(-1,1)
Y = np.array([1,1,1,-1,-1,-1,1,1,1,-1]).reshape(-1,1)

In [3]:
class AdaBoost:
    
    def __init__(self,n_estimators=10):
        self.n_estimators = n_estimators
        
    def fit(self,X,Y):
        self.clf_list = []
        self.alpha_list = []
        
        weight = [1/len(Y) for _ in range(len(Y))]
        df = pd.concat([pd.DataFrame(X),pd.DataFrame(Y,columns=['y']),\
                        pd.DataFrame({'weight':weight})],axis=1)
        
        for m in range(self.n_estimators):
            X = df.iloc[:,:-2].values
            Y = df.iloc[:,-2].values
            weight = df.iloc[:,-1].values.reshape(-1)
            
            clf = DecisionTreeClassifier(max_depth=1)
            clf.fit(X,Y,sample_weight = weight)
            
            df['predict'] = list(clf.predict(X))
            err = np.sum(df['predict']!=df['y'])/len(df)
            alpha = np.log((1-err)/err)/2
            
            df['weight'] = df['weight']*np.exp(-alpha*df['y']*df['predict'])
            df['weight'] = df['weight']/np.sum(df['weight'])
            del df['predict']
            
            self.clf_list.append(clf)
            self.alpha_list.append(alpha)
            
    def predict(self,new_X):
        tmp = np.array([clf.predict(new_X) for clf in self.clf_list])
        tmp = tmp*np.array(self.alpha_list).reshape(-1,1)
        return np.sign(np.sum(tmp,axis=0))
  

In [4]:
clf = [AdaBoost(n_estimators=i+1) for i in range(10)]
for c in clf:
    c.fit(X,Y)
    print('基分类器个数：',clf.index(c)+1,'准确率：',np.sum(c.predict(X)==Y.reshape(-1))/len(Y))

基分类器个数： 1 准确率： 0.7
基分类器个数： 2 准确率： 0.4
基分类器个数： 3 准确率： 1.0
基分类器个数： 4 准确率： 0.7
基分类器个数： 5 准确率： 0.7
基分类器个数： 6 准确率： 1.0
基分类器个数： 7 准确率： 1.0
基分类器个数： 8 准确率： 1.0
基分类器个数： 9 准确率： 1.0
基分类器个数： 10 准确率： 1.0
