In [4]:
import random
import matplotlib.pyplot as plt 
import numpy as np

# 交叉熵
def cross_entropy(y, y_hat, D):
    return - np.sum(D*y*np.log(y_hat+1e-16))

class Linear_classifier:
    
    def __init__(self,w,b,batch_size):
        self.w = w
        self.b = b
        self.batch_size = batch_size

    def softmax(x):
        x_exp = np.exp(x-np.max(x,axis = 0))
        total = np.sum(x_exp,axis=0) # 列
        return x_exp / total
        
    # 线性模型
    def linear_model(self,x):
        return Linear_classifier.softmax(np.dot(self.w.T,x)+self.b)

    # 梯度下降
    def sgd(self, w, b, x, y, D, alpha=0.01):
        batch_size = x.shape[1]
        lm = Linear_classifier.linear_model(self,x)
        out = D*(y - lm) # 带权重的softmax求导
        dw = np.dot(out,x.T)
        db = np.sum(out, axis = 1,keepdims = True) # aaa
        w_o = w + alpha*dw.T/batch_size
        b_o = b + alpha*db/batch_size
        return w_o,b_o

    def get_minibatch(self, epoch, x, y, D):
        rounds = 67349 // self.batch_size
        index1 = (epoch % rounds) * self.batch_size
        index2 = (epoch % rounds + 1) * self.batch_size
        return x[:,int(index1) :int(index2)], y[:,int(index1):int(index2)], D[:,int(index1):int(index2)]

    def train(self, x, y, D, epoches = 2000, learning_rate = 0.01):
        for epoch in range(epoches):
            # 获得用于训练的minibatch
            minibatch_x,minibatch_y,minibatch_D = Linear_classifier.get_minibatch(self, epoch, x, y, D)
            # 预测，计算loss和acc
            
            if epoch % 100 == 0:
                y_pred = Linear_classifier.linear_model(self, minibatch_x)
                loss = cross_entropy(minibatch_y, y_pred, minibatch_D)
                acc = (np.argmax(y_pred,axis = 0) == np.argmax(minibatch_y, axis = 0)).sum() / minibatch_x.shape[1]
                print("after %d terms, the train loss is %g, the train acc on training set is %g"%(epoch, loss, acc))
            
            # 梯度下降
            self.w,self.b = Linear_classifier.sgd(self, self.w, self.b, minibatch_x, minibatch_y, minibatch_D)
            
        return 


In [5]:

def add_linear_classifer(x, y, D, batch_size, epoches, learning_rate):
    # 初始化wb参数
    w_dat = np.random.randint(1,100,size=(784,10))
    w = w_dat / w_dat.sum(axis=0)
    b = np.zeros((10,1))

    # 建立模型
    model = Linear_classifier(w, b, batch_size)

    # 训练模型
    model.train(x,y,D,epoches,learning_rate)

    # 测试（训练集）
    y_pred = model.linear_model(x)

    # 获得one-hot形式的y_pred，y_low
    classEst = np.zeros((y_pred.shape))
    classEst[np.argmax(y_pred,axis=0), range(y_pred.shape[1])] = 1
    classEst_low = np.argmax(classEst,axis = 0).reshape(1,y_pred.shape[1])
    y_low = np.argmax(y,axis = 0).reshape(1,y_pred.shape[1])

    # 计算error
    error = ((classEst_low != y_low)*D[0].reshape(1,y_pred.shape[1])).sum() # 保证了error<1恒成立

    # 返回结果
    return model,error,classEst,classEst_low


# adaboost
def adaBoostTrainDS(x, y, numIt=20, learning_rate = 0.1):# aaa

    '''
    D : (1,m) m:样本个数
    aggClassEst：（1,m）每个样本分到的类
    '''
    weakClassArr = []                 # 保存弱分类器数组
    alphaArr = []
    DArr = []
    m = x.shape[1]
    D = np.ones((1, m)) / m           # D向量 每条样本所对应的一个权重
    aggClassEst = np.zeros((10, m))    # 统计类别估计累积值
    batch_size = 60000 # aaa
    print(y.shape)
    y_low = np.argmax(y, axis=0).reshape(1,m)
    train_loss = []
    train_acc  = []

    for i in range(numIt):
        # D1是一维D的按行扩展
        D1 = np.repeat(D.reshape(1,m),10,axis=0) 
        print(".....weak classifier %d is generating....."%i)

        linear_model, error, classEst, classEst_low = add_linear_classifer(x, y, D1, batch_size, 1000, 0.01)

        # 计算float
        alpha = float(learning_rate * (np.log((1.0 - error) / max(error, 1e-16)) + np.log(9)))

        # 记录弱分类器（参数）和alpha
        alphaArr.append(alpha)
        weakClassArr.append(linear_model)  

        # 计算e
        expon = alpha*(classEst_low != y_low)

        # 计算D
        D = np.multiply(D, np.exp(expon)) 
        D = D / D.sum() 
        DZ = np.ones((D1.shape))

        # 累加结果
        aggClassEst += alpha * classEst #error小则alpha大，对结果的占比高

        # acc和loss
        acc = (np.argmax(aggClassEst, axis = 0) == np.argmax(y, axis = 0)).sum() / m
        loss = cross_entropy(y, aggClassEst, DZ) / m
        train_acc.append(acc)
        train_loss.append(loss)
        print("...after generating weak classifier %d, the acc is %g, loss is %g."%(i, acc, loss))
        
    return weakClassArr, aggClassEst, alphaArr, DArr, train_loss, train_acc

def Test_adaBoost(x, y, weakClassArr, alphaArr, DArr, test_train = False):
    aggClassEst = np.zeros((y.shape[0],x.shape[1]))
    index = 0
    DZ = np.ones((y.shape[0],x.shape[1]))
    test_acc  = []
    test_loss = []
    for classifier in weakClassArr:
        y_pred = classifier.linear_model(x)
        classest = np.zeros((y_pred.shape))
        classest[np.argmax(y_pred,axis=0),range(y_pred.shape[1])] = 1
        aggClassEst += classest * alphaArr[index]
        index += 1
        acc = (np.argmax(aggClassEst,axis=0) == np.argmax(y,axis=0)).sum() / x.shape[1]
        loss = cross_entropy(y, aggClassEst, DZ) / x.shape[1]
        test_acc.append(acc)
        test_loss.append(loss)
    if test_train:
        setname = "TRAINING"
    else:
        setname = "TESTING"

    print("accuracy on " + setname + " set is : "+ str(acc))
    return test_loss, test_acc


In [6]:
# 初始化数据
x_train = np.load("x_train_number.npy",allow_pickle=True) 
y_train = np.load("y_train_number.npy",allow_pickle=True) 
x_test  = np.load("x_test_number.npy",allow_pickle=True)
y_test  = np.load("y_test_number.npy",allow_pickle=True)
x_train = x_train.T.astype("float")
x_test  = x_test.T.astype("float")

# 训练：
weakClassArr, aggClassEst, alphaArr, DArr, train_loss, train_acc = adaBoostTrainDS(x_train,y_train)
Test_adaBoost(x_train,y_train, weakClassArr, alphaArr, DArr, test_train = True)
# 测试
test_loss, test_acc = Test_adaBoost(x_test, y_test, weakClassArr, alphaArr, DArr)

# 画出损失函数图
plt.figure(1)
plt.plot(train_loss,color='b')
plt.plot(test_loss,color='r')
plt.xlabel("iterations")
plt.ylabel("loss")
plt.savefig('ada_loss_mnist.png')

# acc图
plt.figure(2)
plt.plot(train_acc,color='b')
plt.plot(test_acc,color='r')
plt.xlabel("iterations")
plt.ylabel("acc")
plt.savefig('ada_acc_mnist.png')


(10, 60000)
.....weak classifier 0 is generating.....
after 0 terms, the train loss is 3.27235, the train acc on training set is 0.117317
after 100 terms, the train loss is 1.98195, the train acc on training set is 0.317783
after 200 terms, the train loss is 1.49666, the train acc on training set is 0.5009
