# Q1. 
Gradient Boosting Regression is a machine learning technique used for regression tasks. It is an ensemble method that builds a predictive model in the form of an ensemble of weak prediction models, typically decision trees. Gradient Boosting Regression sequentially trains multiple weak learners, each one focusing on the residuals (the difference between the actual and predicted values) of the previous model.

# Q2. Here's a simple implementation of gradient boosting regression from scratch using Python and NumPy:

In [2]:
import numpy as np
from sklearn.tree import DecisionTreeRegressor

class GradientBoostingRegressor:
    def __init__(self, n_estimators=100, learning_rate=0.1, max_depth=3):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.models = []
        
    def fit(self, X, y):
        # Initialize the residuals with the target values
        residuals = np.copy(y)
        
        # Iterate over the number of estimators
        for _ in range(self.n_estimators):
            # Fit a decision tree to the residuals
            tree = DecisionTreeRegressor(max_depth=self.max_depth)
            tree.fit(X, residuals)
            
            # Predict the residuals for the current tree
            residuals_pred = tree.predict(X)
            
            # Update the residuals with the negative gradient
            residuals -= self.learning_rate * residuals_pred
            
            # Add the current tree to the ensemble
            self.models.append(tree)
            
    def predict(self, X):
        # Initialize predictions with zeros
        predictions = np.zeros(len(X))
        
        # Iterate over the models and make predictions
        for model in self.models:
            predictions += self.learning_rate * model.predict(X)
        
        return predictions

# Example usage:
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Generate a synthetic dataset
X, y = make_regression(n_samples=100, n_features=1, noise=0.1, random_state=42)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the gradient boosting regressor
gb_regressor = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3)
gb_regressor.fit(X_train, y_train)

# Make predictions on the test set
y_pred = gb_regressor.predict(X_test)

# Evaluate the model's performance
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)


Mean Squared Error: 1.338029663217839


Q3. To experiment with different hyperparameters such as learning rate, number of trees, and tree depth to optimize the performance of the model, you can use grid search or random search techniques. Here's an example using scikit-learn's GridSearchCV for hyperparameter tuning:

In [6]:
import numpy as np
from sklearn.tree import DecisionTreeRegressor
from sklearn.base import BaseEstimator

class GradientBoostingRegressor(BaseEstimator):
    def __init__(self, n_estimators=100, learning_rate=0.1, max_depth=3):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.models = []
        
    def fit(self, X, y):
        # Initialize the residuals with the target values
        residuals = np.copy(y)
        
        # Iterate over the number of estimators
        for _ in range(self.n_estimators):
            # Fit a decision tree to the residuals
            tree = DecisionTreeRegressor(max_depth=self.max_depth)
            tree.fit(X, residuals)
            
            # Predict the residuals for the current tree
            residuals_pred = tree.predict(X)
            
            # Update the residuals with the negative gradient
            residuals -= self.learning_rate * residuals_pred
            
            # Add the current tree to the ensemble
            self.models.append(tree)
    
    def predict(self, X):
        # Initialize predictions with zeros
        predictions = np.zeros(len(X))
        
        # Iterate over the models and make predictions
        for model in self.models:
            predictions += self.learning_rate * model.predict(X)
        
        return predictions
    
    def get_params(self, deep=True):
        return {
            'n_estimators': self.n_estimators,
            'learning_rate': self.learning_rate,
            'max_depth': self.max_depth
        }
    
    def set_params(self, **params):
        if 'n_estimators' in params:
            self.n_estimators = params['n_estimators']
        if 'learning_rate' in params:
            self.learning_rate = params['learning_rate']
        if 'max_depth' in params:
            self.max_depth = params['max_depth']
