In [None]:
import numpy as np
from decision_trees import *

In [None]:
class gradient_boosting:
    
    def __init__(self, learning_rate, num_trees, max_depth, min_sample_size, type = "Classification"):
        """
        desc : Constructor for gradient_boosting.

        learning_rate : (float) specifies the learning rate for our GB.
        num_trees : (int) number of trees in our decision tree.
        max_depth : (int) maximum depth of our decision tree.
        type : (string) from : ['Classification','Regression']. specifies the type of problem.
        
        return : (None)
        """
        self.learning_rate = learning_rate
        self.num_trees = num_trees
        self.max_depth = max_depth
        self.min_sample_size = min_sample_size
        self.list_trees = []
        self.initial_guess = 0
        self.type = type
    
    def build(self, X, Y):
        """
        desc : Adds new decision trees to self.list_trees

        X : (numpy) dataset we have to classify without the target variable.
        Y : (list) target variable.
        
        return : (None) Adds new decision trees to self.list_trees.
        """
        # Here we add initial guess onto our list.
        if self.type == 'Classification':
            # initial guess is log(odds)
            self.initial_guess = np.log(sum(Y)/(len(Y)-sum(Y)))
            
            #initialising prediction list
            pred = [self.initial_guess for i in Y]
            
            for i in range(self.num_trees):
                # step 2(A) find the residuals
                prob = [np.exp(k)/(1+np.exp(k)) for k in pred]
                residuals = Y - prob
                # step 2(B) build dec_tree and step 2(C) output of each leaf_node saved in itself.
                t = decision_tree(self.max_depth,self.min_sample_size)
                t.build_tree(X, self.max_depth, residuals, self.type, prob)
                self.list_trees.append(t)
                # step 2(D) update pred list
                pred = pred + self.learning_rate*t.predict(X,self.type)
            
        else:
            # initial guess is mean
            self.initial_guess = sum(Y)/len(Y)
            
            # initialising prediction list
            pred = [self.initial_guess for i in Y]

            for i in range(self.num_trees):
                # step 2(A) find the residuals
                residuals = Y - pred
                # step 2(B) build dec_tree and step 2(C) output of each leaf_node saved in itself.
                t = decision_tree(self.max_depth,self.min_sample_size)
                t.build_tree(X, self.max_depth, residuals, self.type)
                self.list_trees.append(t)
                # step 2(D) update pred list
                pred = pred + self.learning_rate*t.predict(X,self.type)

    def predict(self,X_test):
        
        pred = [self.initial_guess for i in range(X_test.shape[0])]
        
        for t in self.list_trees:
            n = t.predict(X_test,self.type)
            pred = pred + self.learning_rate*n
            
        if self.type=='Classification':
            return [i>=0.5 for i in pred]
        
        return pred