<a href="https://colab.research.google.com/github/Sameersah/decision-trees-ensemble/blob/main/Gradient_Boosting_Machine_(GBM)_Implementation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Gradient Boosting Machine (GBM) Implementation

GBM builds an additive model in a forward stage-wise manner. The idea is to fit a sequence of weak learners (e.g., decision trees) to minimize a loss function.

Steps:
Define the Loss Function: Use Mean Squared Error for simplicity in regression tasks.
Initialize the Model: Start with a baseline model (e.g., the mean of the target variable for regression).
Iterative Training:
Compute residuals (errors) from the current model predictions.
Train a weak learner (decision tree) on the residuals.
Update the model by adding the predictions of the weak learner multiplied by a learning rate.
Repeat: Continue adding weak learners until convergence or a stopping criterion.

In [2]:
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeRegressor

class GradientBoostingMachine:
    def __init__(self, n_estimators=100, learning_rate=0.1, max_depth=3):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.models = []
        self.initial_value = None

    def fit(self, X, y):
        self.initial_value = np.mean(y)
        current_predictions = np.full(y.shape, self.initial_value)
        for _ in range(self.n_estimators):
            residuals = y - current_predictions
            tree = DecisionTreeRegressor(max_depth=self.max_depth)
            tree.fit(X, residuals)
            self.models.append(tree)
            current_predictions += self.learning_rate * tree.predict(X)

    def predict(self, X):
        predictions = np.full((X.shape[0],), self.initial_value)
        for tree in self.models:
            predictions += self.learning_rate * tree.predict(X)
        return predictions

# Demonstration with a simple dataset
from sklearn.datasets import make_regression
from sklearn.metrics import mean_squared_error

# Generate a simple regression dataset
X, y = make_regression(n_samples=100, n_features=1, noise=0.1, random_state=42)

# Split into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the GBM model
gbm = GradientBoostingMachine(n_estimators=50, learning_rate=0.1, max_depth=3)
gbm.fit(X_train, y_train)

# Predictions and evaluation
y_pred = gbm.predict(X_test)
print("Mean Squared Error:", mean_squared_error(y_test, y_pred))


Mean Squared Error: 1.3230540144415044
