In [None]:
# Q1. What is Gradient Boosting Regression?
# Gradient Boosting Regression is an ensemble learning technique that builds a model by combining multiple weak learners, typically decision trees.
# It works by training models sequentially, with each model focusing on the residuals (errors) of the previous models.
# In each iteration, it tries to minimize the loss function (e.g., mean squared error) by fitting a new model that corrects the residuals of the previous models.

# Q2. Implement a simple gradient boosting algorithm from scratch using Python and NumPy.
# Use a simple regression problem as an example and train the model on a small dataset.
# Evaluate the model's performance using metrics such as mean squared error and R-squared.

import numpy as np
from sklearn.metrics import mean_squared_error, r2_score

# Generate a simple regression dataset
np.random.seed(42)
X = np.random.rand(100, 1)  # 100 data points, 1 feature
y = 5 * X.squeeze() + np.random.randn(100) * 0.5  # Linear data with noise

# Simple Gradient Boosting implementation (from scratch)
class SimpleGradientBoosting:
    def __init__(self, n_estimators=50, learning_rate=0.1):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.models = []  # Store the weak learners (trees)

    def fit(self, X, y):
        # Initialize the model with the mean of the target variable
        y_pred = np.full_like(y, np.mean(y))
        for i in range(self.n_estimators):
            # Compute residuals
            residuals = y - y_pred
            # Fit a simple model (mean of residuals for simplicity, in practice we use decision trees)
            model = np.mean(residuals)  # Weak learner (tree would normally be used here)
            # Update the predictions
            y_pred += self.learning_rate * model  # Gradient step
            self.models.append(model)  # Save the model

    def predict(self, X):
        # Start with initial prediction (mean of y)
        y_pred = np.full_like(X, np.mean(y))
        for model in self.models:
            # Add the model's prediction to the overall prediction
            y_pred += self.learning_rate * model  # In practice, we'd use the weak learner's prediction
        return y_pred

# Create and train the model
model = SimpleGradientBoosting(n_estimators=50, learning_rate=0.1)
model.fit(X, y)

# Make predictions and evaluate the model
y_pred = model.predict(X)

# Evaluate performance
mse = mean_squared_error(y, y_pred)
r2 = r2_score(y, y_pred)
print("Mean Squared Error:", mse)
print("R-squared:", r2)

# Q3. Experiment with different hyperparameters such as learning rate, number of trees, and tree depth.
# Grid search or random search can be used to find the best hyperparameters. Here, we can manually experiment with different settings.

# For simplicity, let's experiment with different n_estimators and learning_rate
from sklearn.model_selection import GridSearchCV

# In practice, we would use scikit-learn's GradientBoostingRegressor or use cross-validation for hyperparameter tuning.
# Example: Grid Search for learning rate and number of estimators
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the model
gbr = GradientBoostingRegressor()

# Define hyperparameters for grid search
param_grid = {
    'n_estimators': [50, 100, 150],
    'learning_rate': [0.01, 0.05, 0.1],
    'max_depth': [3, 5, 7]
}

# Perform grid search
grid_search = GridSearchCV(gbr, param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)

# Print best hyperparameters
print("Best Hyperparameters from Grid Search:", grid_search.best_params_)

# Evaluate the model with the best parameters
best_model = grid_search.best_estimator_
y_pred_best = best_model.predict(X_test)
mse_best = mean_squared_error(y_test, y_pred_best)
r2_best = r2_score(y_test, y_pred_best)
print("Optimized Mean Squared Error:", mse_best)
print("Optimized R-squared:", r2_best)

# Q4. What is a weak learner in Gradient Boosting?
# A weak learner in Gradient Boosting is typically a simple model, like a shallow decision tree (also known as a stump),
# that performs slightly better than random guessing. These weak learners are iteratively trained and combined to form a stronger model.

# Q5. What is the intuition behind the Gradient Boosting algorithm?
# Gradient Boosting works by fitting a sequence of models to the residuals (errors) of the previous models.
# The idea is to correct the errors iteratively, with each model improving upon the predictions of the previous ones.
# The model learns by reducing the loss function (e.g., MSE) using gradient descent, where the gradient of the loss guides the updates.

# Q6. How does Gradient Boosting algorithm build an ensemble of weak learners?
# Gradient Boosting builds the ensemble by sequentially training weak learners (typically decision trees).
# Each weak learner tries to correct the mistakes made by the previous learners.
# The final prediction is the weighted sum of all the learners' predictions, where each learner is assigned a weight based on its performance.

# Q7. What are the steps involved in constructing the mathematical intuition of Gradient Boosting algorithm?
# The steps involved are:
# 1. Initialize the model with a baseline prediction, usually the mean of the target variable.
# 2. Calculate the residuals (errors) between the true values and the current predictions.
# 3. Fit a weak learner (e.g., decision tree) to the residuals.
# 4. Update the model's predictions by adding the weak learner's prediction, scaled by the learning rate.
# 5. Repeat steps 2-4 for a specified number of iterations or until convergence.
