In [None]:
#Question 1

Gradient boosting regression trees are based on the idea of an ensemble method derived from a decision tree. The decision tree uses a tree structure. Starting from tree root, branching according to the conditions and heading toward the leaves, the goal leaf is the prediction result.

In [None]:
#Question 2

import numpy as np
from sklearn.metrics import mean_squared_error, r2_score
class GradientBoostingRegressor:
    def __init__(self, n_estimators=100, learning_rate=0.1, max_depth=3):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.models = []
        self.residuals = []

    def fit(self, X, y):
        # Initialize with the mean of y
        mean_y = np.mean(y)
        self.models.append(mean_y)
        self.residuals.append(y - mean_y)

        for i in range(self.n_estimators):
            # Fit a decision tree to the residuals
            tree = DecisionTreeRegressor(max_depth=self.max_depth)
            tree.fit(X, self.residuals[i])
            self.models.append(tree)
            
            # Predict the residuals and update them
            y_pred = tree.predict(X)
            self.residuals.append(self.residuals[i] - self.learning_rate * y_pred)

    def predict(self, X):
        y_pred = np.zeros(len(X))
        for tree in self.models[1:]:
            y_pred += self.learning_rate * tree.predict(X)
        return y_pred + self.models[0]

    def evaluate(self, X, y):
        y_pred = self.predict(X)
        mse = mean_squared_error(y, y_pred)
        r2 = r2_score(y, y_pred)
        return mse, r2
# Generate synthetic data
np.random.seed(0)
X = np.random.rand(100, 1) * 10
y = 2 * (X[:, 0] ** 2) + 1 + np.random.randn(100) * 2

# Instantiate and train the gradient boosting regressor
gb_regressor = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3)
gb_regressor.fit(X, y)

# Evaluate the model
mse, r2 = gb_regressor.evaluate(X, y)
print(f"Mean Squared Error: {mse:.4f}")
print(f"R-squared: {r2:.4f}")


In [None]:
#Question 3

import numpy as np
from sklearn.datasets import make_regression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer, mean_squared_error, r2_score
from sklearn.tree import DecisionTreeRegressor

# Generate synthetic data
X, y = make_regression(n_samples=100, n_features=1, noise=10, random_state=42)
class GradientBoostingRegressor:
    def __init__(self, n_estimators=100, learning_rate=0.1, max_depth=3):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.models = []
        self.residuals = []

    def fit(self, X, y):
        # Initialize with the mean of y
        mean_y = np.mean(y)
        self.models.append(mean_y)
        self.residuals.append(y - mean_y)

        for i in range(self.n_estimators):
            # Fit a decision tree to the residuals
            tree = DecisionTreeRegressor(max_depth=self.max_depth)
            tree.fit(X, self.residuals[i])
            self.models.append(tree)
            
            # Predict the residuals and update them
            y_pred = tree.predict(X)
            self.residuals.append(self.residuals[i] - self.learning_rate * y_pred)

    def predict(self, X):
        y_pred = np.zeros(len(X))
        for tree in self.models[1:]:
            y_pred += self.learning_rate * tree.predict(X)
        return y_pred + self.models[0]

    def score(self, X, y):
        y_pred = self.predict(X)
        return r2_score(y, y_pred)

# Define scorer for GridSearchCV
mse_scorer = make_scorer(mean_squared_error, greater_is_better=False)
# Parameter grid for grid search
param_grid = {
    'n_estimators': [50, 100, 150],
    'learning_rate': [0.01, 0.1, 0.5],
    'max_depth': [2, 3, 4]
}

# Create GradientBoostingRegressor object
gb_regressor = GradientBoostingRegressor()

# Perform grid search
grid_search = GridSearchCV(estimator=gb_regressor, param_grid=param_grid, cv=5, scoring=mse_scorer)
grid_search.fit(X, y)

# Print best parameters and best score
print("Best Parameters found by Grid Search:")
print(grid_search.best_params_)
print("Best Mean Squared Error:", -grid_search.best_score_)  # Convert back to positive since scorer is negative MSE


In [None]:
#Question 4

