# Gradient Boost

#### What is Gradient Boosting?

Gradient Boosting is a machine learning ensemble technique that builds a strong predictive model by combining multiple weak models (typically decision trees). It works sequentially, where each new model corrects the errors of the previous one. The "gradient" in Gradient Boosting refers to the use of gradient descent to minimize errors.

Key Terms:

1. Ensemble Learning: Combining multiple models to improve performance.

2. Weak Learner: A simple model that performs slightly better than random guessing (e.g., shallow decision trees).

3. Residuals: The difference between the actual and predicted values (errors).

4. Gradient Descent: An optimization algorithm used to minimize a loss function by iteratively moving toward the steepest descent.

#### How Gradient Boosting Works:

Start with an initial prediction (e.g., the mean of the target variable).

Calculate the residuals (errors) of the current model.

Train a new weak learner (e.g., a decision tree) to predict the residuals.

Update the model by adding the predictions of the weak learner.

Repeat steps 2–4 until a stopping condition is met (e.g., a fixed number of trees).

In [1]:
class DecisionTree:
    def __init__(self, max_depth=3):
        self.max_depth = max_depth
        self.tree = None

    def _calculate_variance(self, y):
        """Calculate the variance of the target variable."""
        if len(y) == 0:
            return 0
        mean = sum(y) / len(y)
        return sum((yi - mean) ** 2 for yi in y) / len(y)

    def _split(self, X, y, feature_index, threshold):
        """Split the dataset based on a feature and threshold."""
        left_X, left_y, right_X, right_y = [], [], [], []
        for i in range(len(X)):
            if X[i][feature_index] <= threshold:
                left_X.append(X[i])
                left_y.append(y[i])
            else:
                right_X.append(X[i])
                right_y.append(y[i])
        return left_X, left_y, right_X, right_y

    def _find_best_split(self, X, y):
        """Find the best feature and threshold to split the data."""
        best_feature, best_threshold, best_variance = None, None, float('inf')
        for feature_index in range(len(X[0])):
            thresholds = set([x[feature_index] for x in X])
            for threshold in thresholds:
                left_X, left_y, right_X, right_y = self._split(X, y, feature_index, threshold)
                if len(left_y) == 0 or len(right_y) == 0:
                    continue
                total_variance = (len(left_y) * self._calculate_variance(left_y) +
                                  len(right_y) * self._calculate_variance(right_y)) / len(y)
                if total_variance < best_variance:
                    best_feature, best_threshold, best_variance = feature_index, threshold, total_variance
        return best_feature, best_threshold

    def _build_tree(self, X, y, depth):
        """Recursively build the decision tree."""
        if depth >= self.max_depth or len(y) <= 1:
            return {'prediction': sum(y) / len(y)}
        feature, threshold = self._find_best_split(X, y)
        if feature is None:
            return {'prediction': sum(y) / len(y)}
        left_X, left_y, right_X, right_y = self._split(X, y, feature, threshold)
        return {
            'feature_index': feature,
            'threshold': threshold,
            'left': self._build_tree(left_X, left_y, depth + 1),
            'right': self._build_tree(right_X, right_y, depth + 1)
        }

    def fit(self, X, y):
        """Fit the decision tree to the data."""
        self.tree = self._build_tree(X, y, 0)

    def _predict_sample(self, x, tree):
        """Predict a single sample using the decision tree."""
        if 'prediction' in tree:
            return tree['prediction']
        feature_index = tree['feature_index']
        threshold = tree['threshold']
        if x[feature_index] <= threshold:
            return self._predict_sample(x, tree['left'])
        else:
            return self._predict_sample(x, tree['right'])

    def predict(self, X):
        """Predict the target for multiple samples."""
        return [self._predict_sample(x, self.tree) for x in X]

In [2]:
class GradientBoostingRegressor:
    def __init__(self, n_estimators=100, lr=0.01, max_depth=3):
        self.estimators = n_estimators
        self.lr = lr
        self.max_depth = max_depth
        self.trees = []
        self.initial_predictions = None

    def fit(self, X, y):
        """ Fit the gradient boosting model to the data."""
        # step 1: initialise the model with the mean of the targer variable 
        self.initial_predictions = sum(y) / len(y)
        F = [self.initial_predictions] * len(y)

        for _ in range(self.estimators):
            # step 2: compute the residuals (negative gradient)
            residuals = [y[i] - F[i] for i in range(len(y))]

            # step 3: fit a decision tree to the residuals 
            tree = DecisionTree(max_depth=self.max_depth)
            tree.fit(X, residuals)

            # step 4: update the model predictions 
            predictions = tree.predict(X)
            for i in range(len(F)):
                F[i] += self.lr * predictions[i]

            # save the tree
            self.trees.append(tree)

    def predict(self, X):
        """ Predict the target for new data"""
        # start with the initial prediction
        predictions = [self.initial_predictions] * len(X)

        # add predictions from all trees
        for tree in self.trees:
            tree_predictions = tree.predict(X)
            for i in range(len(predictions)):
                predictions[i] += self.lr * tree_predictions[i]

        
        return predictions


In [8]:
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split

# Load the dataset
data = load_diabetes()
X, y = data.data, data.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("Training data shape:", X_train.shape)
print("Testing data shape:", X_test.shape)

Training data shape: (353, 10)
Testing data shape: (89, 10)


In [10]:
# Initialize the Gradient Boosting model
model = GradientBoostingRegressor(n_estimators=50, lr=0.1, max_depth=3)

# Train the model
model.fit(X_train, y_train)

In [11]:
# Make predictions on the test data
y_pred = model.predict(X_test)

In [13]:
# Calculate Mean Squared Error (MSE)
def mean_squared_error(y_true, y_pred):
    return sum((y_true[i] - y_pred[i]) ** 2 for i in range(len(y_true))) / len(y_true)

# Calculate R-squared (R²)
def r_squared(y_true, y_pred):
    mean_y = sum(y_true) / len(y_true)
    ss_total = sum((y_true[i] - mean_y) ** 2 for i in range(len(y_true)))
    ss_residual = sum((y_true[i] - y_pred[i]) ** 2 for i in range(len(y_true)))
    return 1 - (ss_residual / ss_total)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r_squared(y_test, y_pred)

print("Mean Squared Error (MSE):", mse)
print("R-squared (R²):", r2)

Mean Squared Error (MSE): 2858.724774704767
R-squared (R²): 0.4604298045058516
