<a href="https://colab.research.google.com/github/ChintPatel/CMPE255-Decision-trees-and-ensemble/blob/main/GBM_method.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.tree import DecisionTreeRegressor

class GradientBoostingRegressorFromScratch:
    def __init__(self, n_estimators=50, learning_rate=0.1, max_depth=3):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.models = []
        self.initial_prediction = None

    def fit(self, X, y):
        # Initialize the prediction with the mean of y
        self.initial_prediction = np.mean(y)
        residuals = y - self.initial_prediction

        for _ in range(self.n_estimators):
            # Fit a decision tree to the residuals
            tree = DecisionTreeRegressor(max_depth=self.max_depth)
            tree.fit(X, residuals)
            self.models.append(tree)

            # Update residuals
            residuals -= self.learning_rate * tree.predict(X)

    def predict(self, X):
        # Start with the initial prediction
        y_pred = np.full(X.shape[0], self.initial_prediction)

        # Add contributions from each tree
        for tree in self.models:
            y_pred += self.learning_rate * tree.predict(X)

        return y_pred

# Load the California Housing dataset
data = fetch_california_housing(as_frame=True)
df = data.frame
X = df.drop(columns=["MedHouseVal"])  # Features
y = df["MedHouseVal"]  # Target

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the GBM from scratch
gbm = GradientBoostingRegressorFromScratch(n_estimators=50, learning_rate=0.1, max_depth=3)
gbm.fit(X_train.values, y_train.values)

# Make predictions
y_pred = gbm.predict(X_test.values)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse:.4f}")


Mean Squared Error: 0.3362
