Train model using coordinate descent (different initialization and iteration order than adaBoost): 
1. initialize alpha to any value
2. select classifier with least exponential loss
3. update alpha
4. repeat 2-3 until alpha converges 

File Paths:  
- training data: data/heart_train.data
- test data: data/heart_test.data

In [1]:
import numpy as np

#Decision Stump class (decision tree with single attribute split)
class DecisionStump: 
    def __init__(self): 
        self.attribute_index = None
        self.polarity = None
        self.threshold = None
        self.alpha = None
    
    #get pred_Y for decision stump 
    def predict(self, X): 
        attribute_set = X[:, self.attribute_index]
        pred_Y = np.zeros(len(attribute_set))
        
        for i in range(len(attribute_set)): 
            if self.polarity == -1: 
                if attribute_set[i] < self.threshold: 
                    pred_Y[i] = -1
                else: 
                    pred_Y[i] = 1
            else:   
                if attribute_set[i] < self.threshold: 
                    pred_Y[i] = 1
                else: 
                    pred_Y[i] = -1
        return pred_Y

#coordinate descent algorithm
class CoordinateDescent: 
    def __init__(self): 
        self.all_classifiers = []
        self.iterations = 0
    
    def train(self, X, Y): 
        num_data, num_attributes = X.shape
        #alpha initialization
        alpha = 0.5 

        while True:
            classifier = DecisionStump()
            min_loss = float('inf')

            #get sum of prediction without new iteration
            if self.iterations == 0: 
                prev_sum_predict = np.zeros(num_data)
            else: 
                prev_sum_predict = self.sum_of_predict(X)

            #iterate through all attributes 
            for i in range(num_attributes): 
                attribute_set = X[:, i]
                thresholds = [-1, 1]
                polarity = [-1, 1]

                #try every possible classification for each attribute 
                for p in polarity:
                    for t in thresholds: 
                        #get current prediction 
                        pred_Y = alpha * self.predict(attribute_set, t, p)
                        
                        #add current prediction with sum prediction
                        cur_sum_predict = prev_sum_predict + pred_Y
                        
                        #compute exponential loss
                        loss = np.sum(np.exp(-Y * cur_sum_predict))
                        
                        #get classifier with min loss
                        if loss < min_loss: 
                            min_loss = loss
                            classifier.attribute_index = i
                            classifier.polarity = p
                            classifier.threshold = t

            #get pred_Y for selected classifier 
            pred_Y = self.predict(X[:, classifier.attribute_index], classifier.threshold, classifier.polarity)
            
            #indices where pred_Y == Y
            corr_classified_indices = np.where(pred_Y == Y)[0]
            #indices where pred_Y != Y
            incorr_classified_indices = np.where(pred_Y != Y)[0]

            #update alpha
            sum_correct = np.sum(np.exp(-Y[corr_classified_indices] * prev_sum_predict[corr_classified_indices]))
            sum_incorrect = np.sum(np.exp(-Y[incorr_classified_indices] * prev_sum_predict[incorr_classified_indices]))
            alpha = 1/2 * np.log(sum_correct / sum_incorrect)
            classifier.alpha = alpha
            
            # print('index:', classifier.attribute_index)
            # print('alpha:', classifier.alpha)
            # print('min_loss', min_loss)

            #append classifer to list
            self.all_classifiers.append(classifier)

            #convergence criteria
            if self.iterations > 0: 
                #if alpha is not changing much, exit
                if abs(self.all_classifiers[self.iterations].alpha - self.all_classifiers[self.iterations-1].alpha) < 1e-5: 
                    break

            self.iterations += 1
        
        return alpha, min_loss

    #prediction of weak classifier 
    def predict(self, attribute_set, t, p): 
        pred_Y = np.zeros(len(attribute_set))
        
        for i in range(len(attribute_set)): 
            if p == -1: 
                if attribute_set[i] < t: 
                    pred_Y[i] = -1
                else: 
                    pred_Y[i] = 1
            else:   
                if attribute_set[i] < t: 
                    pred_Y[i] = 1
                else: 
                    pred_Y[i] = -1

        return pred_Y
    
    #sum of prediction 
    def sum_of_predict(self, X): 
        sum = 0
        for t in range(self.iterations):
            sum += self.all_classifiers[t].alpha * self.all_classifiers[t].predict(X)
        return sum
    

#read file and store data to X and Y 
def read_data(filename): 
    file = open(filename, 'r', encoding='utf-8-sig')
    dataset = []
    for line in file:
        data = line.split(',')
        y_data = int(data[0]) 
        x_data = [float(x) for x in data[1:23]] 
        dataset.append((x_data, y_data))

    X = np.array([x for x, y in dataset])
    Y = np.array([y for x, y in dataset])
    #replace 0 with -1
    X = np.where(X == 0, -1, X)
    Y = np.where(Y == 0, -1, Y) 

    return X, Y

#train dataset
train_X, train_Y = read_data('data/heart_train.data')

#train model
final_classifier = CoordinateDescent()
alpha, loss = final_classifier.train(train_X, train_Y)

print('optimal alpha:', alpha)
print('exponential loss:', loss)


321
optimal alpha: 0.019582970497356623
exponential loss: 43.05095129366278
