
Q1. What is Gradient Boosting Regression?

Gradient Boosting Regression is an ensemble learning technique that combines multiple weak predictive models, typically decision trees, to create a strong predictive model. In the context of regression, the goal is to predict continuous values by minimizing a loss function, often the mean squared error. The algorithm iteratively adds trees to the model, each one correcting the errors of the previous trees by fitting to the negative gradient of the loss function.

Q2. Implement a simple gradient boosting algorithm from scratch using Python and NumPy. Use a simple regression problem as an example and train the model on a small dataset. Evaluate the model's performance using metrics such as mean squared error and R-squared.

In [1]:
import numpy as np
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Sample data
X = np.array([[1], [2], [3], [4], [5], [6], [7], [8], [9], [10]])
y = np.array([3, 6, 9, 12, 15, 18, 21, 24, 27, 30])

class GradientBoostingRegressor:
    def __init__(self, n_estimators=100, learning_rate=0.1, max_depth=3):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.models = []

    def fit(self, X, y):
        # Initialize predictions
        y_pred = np.zeros_like(y, dtype=float)
        for _ in range(self.n_estimators):
            # Calculate residuals
            residuals = y - y_pred
            # Train a weak learner on residuals
            model = DecisionTreeRegressor(max_depth=self.max_depth)
            model.fit(X, residuals)
            # Update predictions
            y_pred += self.learning_rate * model.predict(X)
            self.models.append(model)

    def predict(self, X):
        y_pred = np.zeros(X.shape[0], dtype=float)
        for model in self.models:
            y_pred += self.learning_rate * model.predict(X)
        return y_pred

# Train the model
gbr = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3)
gbr.fit(X, y)
y_pred = gbr.predict(X)

# Evaluate the model
mse = mean_squared_error(y, y_pred)
r2 = r2_score(y, y_pred)
print(f'Mean Squared Error: {mse}')
print(f'R-Squared: {r2}')


Mean Squared Error: 2.594340990572291e-07
R-Squared: 0.9999999965059381


Q3. Experiment with different hyperparameters such as learning rate, number of trees, and tree depth to optimize the performance of the model. Use grid search or random search to find the best hyperparameters.

In [5]:
import numpy as np
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split, GridSearchCV

# Sample data
X = np.array([[1], [2], [3], [4], [5], [6], [7], [8], [9], [10]])
y = np.array([3, 6, 9, 12, 15, 18, 21, 24, 27, 30])

class GradientBoostingRegressor:
    def __init__(self, n_estimators=100, learning_rate=0.1, max_depth=3):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.models = []

    def fit(self, X, y):
        self.models = []  # Reset models list
        y_pred = np.zeros_like(y, dtype=float)
        for _ in range(self.n_estimators):
            residuals = y - y_pred
            model = DecisionTreeRegressor(max_depth=self.max_depth)
            model.fit(X, residuals)
            y_pred += self.learning_rate * model.predict(X)
            self.models.append(model)

    def predict(self, X):
        y_pred = np.zeros(X.shape[0], dtype=float)
        for model in self.models:
            y_pred += self.learning_rate * model.predict(X)
        return y_pred

    def get_params(self, deep=True):
        return {'n_estimators': self.n_estimators, 'learning_rate': self.learning_rate, 'max_depth': self.max_depth}

    def set_params(self, **params):
        for key, value in params.items():
            setattr(self, key, value)
        return self

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Grid search for hyperparameter optimization
param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [1, 3, 5]
}

gbr = GradientBoostingRegressor()
grid_search = GridSearchCV(estimator=gbr, param_grid=param_grid, cv=3, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)

print(f'Best Parameters: {grid_search.best_params_}')

# Train final model with best parameters
best_params = grid_search.best_params_
gbr = GradientBoostingRegressor(**best_params)
gbr.fit(X_train, y_train)
y_pred = gbr.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Squared Error: {mse}')
print(f'R-Squared: {r2}')

Best Parameters: {'learning_rate': 0.2, 'max_depth': 1, 'n_estimators': 200}
Mean Squared Error: 8.997495876851984
R-Squared: 0.9183900600739049


Q4. What is a weak learner in Gradient Boosting?

A weak learner in Gradient Boosting is a model that performs slightly better than random guessing. In the context of decision trees, a weak learner is often a shallow tree (a tree with few splits). The idea is that by combining many weak learners, each one making small improvements, the overall model will be much stronger.

Q5. What is the intuition behind the Gradient Boosting algorithm?

The intuition behind Gradient Boosting is to build a strong predictive model by combining multiple weak models in a sequential manner. Each new model is trained to correct the errors made by the previous models. This is done by fitting each new model to the negative gradient of the loss function with respect to the predictions of the ensemble. By iteratively reducing the residual errors, the ensemble improves its predictions.



Q6. How does Gradient Boosting algorithm build an ensemble of weak learners?

Gradient Boosting builds an ensemble of weak learners as follows:

1. Initialize the model with a constant prediction (usually the mean of the target variable).
2. Compute the residuals (errors) between the true target values and the current model predictions.
3. Fit a weak learner (e.g., a decision tree) to these residuals.
4. Update the model by adding the predictions of the weak learner, scaled by a learning rate.
5. Repeat steps 2-4 for a specified number of iterations or until the residuals are minimized.

Q7. What are the steps involved in constructing the mathematical intuition of Gradient Boosting algorithm?

In [4]:
import numpy as np
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split, GridSearchCV

# Sample data
X = np.array([[1], [2], [3], [4], [5], [6], [7], [8], [9], [10]])
y = np.array([3, 6, 9, 12, 15, 18, 21, 24, 27, 30])

class GradientBoostingRegressor:
    def __init__(self, n_estimators=100, learning_rate=0.1, max_depth=3):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.models = []

    def fit(self, X, y):
        y_pred = np.zeros_like(y, dtype=float)
        for _ in range(self.n_estimators):
            residuals = y - y_pred
            model = DecisionTreeRegressor(max_depth=self.max_depth)
            model.fit(X, residuals)
            y_pred += self.learning_rate * model.predict(X)
            self.models.append(model)

    def predict(self, X):
        y_pred = np.zeros(X.shape[0], dtype=float)
        for model in self.models:
            y_pred += self.learning_rate * model.predict(X)
        return y_pred

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Grid search for hyperparameter optimization
param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [1, 3, 5]
}

best_params = None
best_score = float('inf')

for n_estimators in param_grid['n_estimators']:
    for learning_rate in param_grid['learning_rate']:
        for max_depth in param_grid['max_depth']:
            gbr = GradientBoostingRegressor(n_estimators=n_estimators, learning_rate=learning_rate, max_depth=max_depth)
            gbr.fit(X_train, y_train)
            y_pred = gbr.predict(X_test)
            mse = mean_squared_error(y_test, y_pred)
            if mse < best_score:
                best_score = mse
                best_params = {'n_estimators': n_estimators, 'learning_rate': learning_rate, 'max_depth': max_depth}

print(f'Best Parameters: {best_params}')

# Train final model with best parameters
gbr = GradientBoostingRegressor(**best_params)
gbr.fit(X_train, y_train)
y_pred = gbr.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Squared Error: {mse}')
print(f'R-Squared: {r2}')


Best Parameters: {'n_estimators': 50, 'learning_rate': 0.1, 'max_depth': 1}
Mean Squared Error: 8.08655237415213
R-Squared: 0.9266525861754908
