In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split, GridSearchCV

# Create a simple dataset for regression
np.random.seed(42)
X = np.random.rand(100, 1) * 10
y = 3 * X.squeeze() + np.random.randn(100) * 2

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Define the gradient boosting regression class
class GradientBoostingRegressor:
    def __init__(self, n_estimators=100, learning_rate=0.1, max_depth=3):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.models = []
        self.init_prediction = np.mean(y_train)

    def fit(self, X, y):
        # Initialize predictions with the mean of y
        self.predictions = np.full_like(y, self.init_prediction, dtype=np.float64)
        
        for _ in range(self.n_estimators):
            residuals = y - self.predictions
            model = DecisionTreeRegressor(max_depth=self.max_depth)
            model.fit(X, residuals)
            prediction = model.predict(X)
            self.predictions += self.learning_rate * prediction
            self.models.append(model)

    def predict(self, X):
        prediction = np.full(X.shape[0], self.init_prediction, dtype=np.float64)
        for model in self.models:
            prediction += self.learning_rate * model.predict(X)
        return prediction

# Initialize and train the gradient boosting regressor
gb_regressor = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3)
gb_regressor.fit(X_train, y_train)

# Make predictions
y_pred = gb_regressor.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse:.2f}")
print(f"R-squared: {r2:.2f}")

# Hyperparameter tuning using Grid Search
from sklearn.ensemble import GradientBoostingRegressor as SklearnGBR

param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 5, 7]
}

grid_search = GridSearchCV(SklearnGBR(), param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)

print("\nBest Hyperparameters:")
print(grid_search.best_params_)

# Evaluate the best model from grid search
best_model = grid_search.best_estimator_
y_pred_best = best_model.predict(X_test)
mse_best = mean_squared_error(y_test, y_pred_best)
r2_best = r2_score(y_test, y_pred_best)

print(f"Best Mean Squared Error: {mse_best:.2f}")
print(f"Best R-squared: {r2_best:.2f}")

# Explanations
print("\nQ1. What is Gradient Boosting Regression?")
print("Gradient Boosting Regression is an ensemble learning technique that builds a model by combining the predictions of multiple weak learners, typically decision trees, to improve predictive performance. It sequentially fits new models to the residuals of the combined predictions of previous models.")

print("\nQ4. What is a weak learner in Gradient Boosting?")
print("A weak learner is a model that performs slightly better than random chance. In Gradient Boosting, weak learners are usually simple models, such as shallow decision trees, that are combined to create a strong model.")

print("\nQ5. What is the intuition behind the Gradient Boosting algorithm?")
print("The intuition behind Gradient Boosting is to iteratively improve the model by focusing on correcting the errors of the previous models. It does this by fitting new models to the residuals (errors) of the combined predictions of all previously trained models.")

print("\nQ6. How does Gradient Boosting algorithm build an ensemble of weak learners?")
print("Gradient Boosting builds an ensemble of weak learners by training them sequentially. Each new model is trained to predict the residuals of the combined predictions of the previous models. The final model is an aggregate of all the weak learners' predictions, weighted by their learning rates.")

print("\nQ7. What are the steps involved in constructing the mathematical intuition of Gradient Boosting algorithm?")
print("1. Initialize the model with a simple base prediction (e.g., mean of the target values).")
print("2. Compute the residuals (errors) between the actual target values and the current model's predictions.")
print("3. Fit a new model to the residuals.")
print("4. Update the predictions by adding the predictions of the new model, scaled by a learning rate.")
print("5. Repeat the process for a predefined number of iterations or until no significant improvement is observed.")

