# 6. Gradient Boosting Decision Trees (GBDT)
This lab is about the Gradient Boosting Decision Trees. We will demonstrate the GBDT algorithm using the `numpy` library with the Hubor loss function.
Your task of this lab is to have a folder named `lab6` in your repository as `labs/lab6`. Inside the folder, you should have the python file named `decision_tree_regression.py` and `gradient_boosting_decision_tree.py`. The `decision_tree_regression.py` file should contain the decision tree regression class and is from your previous lab 5, and the `gradient_boosting_decision_tree.py` file should contain the gradient boosting decision tree class.
* create the `decision_tree_regression.py` file and copy the decision tree regression class from your previous lab 5.
* use the following code and complete the function `mae_gradient_descent` and `mae_loss_gradient` in the `gradient_boosting_decision_tree.py` file.
* the goal is under the folder `labs/lab6`, you should be able to run the code by `python gradient_boosting_decision_tree.py` and get the output of the RMSE value.

```python

In [1]:
import numpy as np
from LAB05_DECISION_TREE_REG import my_DecisionTreeReg as decisiontree
from LAB05_DECISION_TREE_REG import RMSE

In [2]:
class GradientBoostingDecisionTree:
    def __init__(self, max_depth=8, n_estimators=10, lr=0.01, max_features=5, delta=1.0):
        self.max_depth = max_depth
        self.n_estimators = n_estimators
        self.lr = lr # Learning rate (shrinkage)
        self.delta = delta # Huber loss delta parameter

        self.gradient_coeff = []
        self.stop = n_estimators

        #self.tree0 = decisiontree(max_depth=self.max_depth, max_features=max_features)
        #self.trees = [decisiontree(max_depth=self.max_depth, max_features=max_features) for _ in range(self.n_estimators)]
        self.tree0 = decisiontree(max_depth=self.max_depth)
        self.trees = [decisiontree(max_depth=self.max_depth) for _ in range(self.n_estimators)]
        
    def fit(self, X, y):
        self.M, self.N = X.shape

        self.tree0.fit(X, y)
        y_pred = self.tree0.predict(X).reshape((self.M, 1))
        residue = y - y_pred

        for idx, itree in enumerate(self.trees):
            if np.linalg.norm(residue) < 1e-4:
                self.stop = idx
                break
                
            itree.fit(X, residue)
            ipred = itree.predict(X).reshape((self.M, 1))

            alpha = self.huber_gradient_descent(ipred, residue, self.lr, 150)
            self.gradient_coeff.append(alpha)

            y_pred = np.add(y_pred, self.lr * alpha * ipred)
            residue = y - y_pred
            # Update residue based on Huber loss gradient
            residue = self.huber_loss_gradient(residue, self.delta)

    def predict(self, X_test):
        y_pred = self.tree0.predict(X_test).reshape((-1, 1))
        for idx, itree in enumerate(self.trees):
            if idx == self.stop:
                break
            coeff = self.lr * self.gradient_coeff[idx]
            y_pred = np.add(y_pred, coeff * itree.predict(X_test).reshape((-1, 1)))
        return y_pred

    def huber_gradient_descent(self, a, b, lr, epochs):
        alpha = np.random.randn(1)[0]
        for epoch in range(epochs):
            # update alpha using the gradient of the Huber loss
            grad = np.sum(self.huber_loss_gradient(b - a * alpha, self.delta) * (-a))
            alpha -= lr * grad
        return alpha

    def huber_loss_gradient(self, a, delta):
        # Gradient of the Huber loss
        return np.where(np.abs(a) <= delta, -a, -delta * np.sign(a))



if __name__ == '__main__':
    import pandas as pd
    X_train = pd.read_csv('airfoil/airfoil_self_noise_X_train.csv').values
    y_train = pd.read_csv('airfoil/airfoil_self_noise_y_train.csv').values
    X_test  = pd.read_csv('airfoil/airfoil_self_noise_X_test.csv').values
    y_test  = pd.read_csv('airfoil/airfoil_self_noise_y_test.csv').values

    GBDT = GradientBoostingDecisionTree(n_estimators=200, max_depth=15)
    GBDT.fit(X_train, y_train)

    y_pred = GBDT.predict(X_test)
    print(RMSE(y_pred, y_test))


4.798461954479579


### MY CODE WITH MSE 
#### importing my regressors from the previous labs

In [3]:
import numpy as np
from LAB05_DECISION_TREE_REG import my_DecisionTreeReg as DTR
from LAB05_DECISION_TREE_REG import RMSE

####  loading my data set inside an independent set

In [None]:
X_train = pd.read_csv('airfoil/airfoil_self_noise_X_train.csv').values
y_train = pd.read_csv('airfoil/airfoil_self_noise_y_train.csv').values
X_test  = pd.read_csv('airfoil/airfoil_self_noise_X_test.csv').values
y_test  = pd.read_csv('airfoil/airfoil_self_noise_y_test.csv').values

### implimentation

In [4]:
class GradientBoostingRegressor:
    def __init__(self, n_estimators=100, learning_rate=0.1, max_depth=3):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.models = []
        
    def fit(self, X, y):
        # Initialize the residual as the target values
        residual = np.copy(y)
        
        # Train a sequence of decision trees
        for _ in range(self.n_estimators):
            # Train a decision tree to predict the residual
            tree = DTR(max_depth=self.max_depth)
            tree.fit(X, residual)
            
            # Update the predictions by adding the predictions of the new tree
            predictions = tree.predict(X)
            y_pred = np.sum([model.predict(X) for model in self.models], axis=0) + self.learning_rate * predictions
            
            # Update the residual
            residual -= predictions
                        
            # Add the trained tree to the list of models
            self.models.append(tree)
            
            # Adjust the learning rate
            self.adjust_learning_rate(y_pred, residual, mse_gradient_factor=0.1)
    
    def predict(self, X):
        # Make predictions by summing the predictions of all trees
        return np.sum([model.predict(X) for model in self.models], axis=0)
    
    
    def adjust_learning_rate(self, y_pred, residual, mse_gradient_factor=0.1):
        # Calculate the gradient of the mean squared error
        mse_gradient = self._calculate_mse_gradient(y_pred, residual)
        
        # Update the learning rate based on the gradient of the mean squared error
        self.learning_rate *= np.exp(-mse_gradient_factor * mse_gradient)
    
    def _calculate_mse_gradient(self, y_pred, residual):
               
        # Calculate the gradient of the mean squared error
        mse_gradient = np.mean(2 * (y_pred - residual))
        
        return mse_gradient

In [6]:
GBDT2 = GradientBoostingRegressor(n_estimators=200, max_depth=15)
GBDT2.fit(X_train, y_train.reshape(-1))
y_pred = GBDT2.predict(X_test)
print(RMSE(y_pred, y_test.reshape(-1)))

5.766644215547662


#### Just testining using other data sets that do not need reshaping

In [5]:
import numpy as np
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Generate random regression data
X, y = make_regression(n_samples=1000, n_features=10, noise=0.1, random_state=42)

# Split the data into training and testing sets
X_train1, X_test1, y_train1, y_test1 = train_test_split(X, y, test_size=0.2, random_state=42)

# Train your Gradient Boosting Regressor model
gbr = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3)
gbr.fit(X_train1, y_train1)

# Predict on the testing set
y_pred1 = gbr.predict(X_test1)

# Evaluate the model using mean squared error
mse = mean_squared_error(y_test1, y_pred1)
print("Mean Squared Error:", mse)


Mean Squared Error: 17251.487700933678


In [None]:
X.shape,X_train.shape,y.shape,y_train.shape