<a href="https://colab.research.google.com/github/ChintPatel/CMPE255-Decision-trees-and-ensemble/blob/main/GBM_method.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Importing necessary libraries
import numpy as np
import pandas as pd
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.tree import DecisionTreeRegressor

# Generate a synthetic dataset
X, y = make_regression(n_samples=1000, n_features=5, noise=0.1, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Gradient Boosting Machine implementation
class GradientBoostingRegressor:
    def __init__(self, n_estimators=100, learning_rate=0.1, max_depth=3):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.trees = []

    def fit(self, X, y):
        # Initialize residuals and predictions
        self.base_prediction = np.mean(y)
        predictions = np.full_like(y, self.base_prediction, dtype=np.float64)

        for _ in range(self.n_estimators):
            residuals = y - predictions

            # Train a decision tree on the residuals
            tree = DecisionTreeRegressor(max_depth=self.max_depth)
            tree.fit(X, residuals)
            self.trees.append(tree)

            # Update predictions
            predictions += self.learning_rate * tree.predict(X)

    def predict(self, X):
        # Start with the base prediction
        predictions = np.full((X.shape[0],), self.base_prediction, dtype=np.float64)
        for tree in self.trees:
            predictions += self.learning_rate * tree.predict(X)
        return predictions

# Train the GBM model
gbm = GradientBoostingRegressor(n_estimators=50, learning_rate=0.1, max_depth=3)
gbm.fit(X_train, y_train)

# Make predictions and evaluate
train_predictions = gbm.predict(X_train)
test_predictions = gbm.predict(X_test)

train_mse = mean_squared_error(y_train, train_predictions)
test_mse = mean_squared_error(y_test, test_predictions)

print(f"Training MSE: {train_mse}")
print(f"Test MSE: {test_mse}")


Training MSE: 128.60308249922377
Test MSE: 258.10659373268237
