In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Hard-coded XGBoost for regression

In [2]:
def XGBoostRegression(X, y, learning_rate, num_iters, lambda_):
    '''
    XGBoost function for regression tasks
    
    X                        Features matrix
    y                        Target
    learning_rate            Model learning rate
    num_iters                Number of boosting iterations
    lambda_                  Regularization parameter
    
    F_x                      Cumulative prediction at each boosting step
    gradients                First derivative of the loss function 
    hessians                 Second derivative of the Loss function (Number of residuals)
    h_x                      Prediction from each boosting step
    '''
    
    F_x = np.mean(y) * np.ones_like(y)                  # First prediction
    
    for i in range(num_iters):
        gradients = -(y - F_x)                           
        hessians = np.ones_like(y)                        
        h_x = -gradients / (hessians + lambda_)           # Fitting into new tree
        F_x += learning_rate * h_x                        # Updating 
        
    return F_x

### Demo 

In [3]:
np.random.seed(42)
X = np.random.rand(1000, 10)  # 1000 samples, 10 features
y = np.random.rand(1000)      # 1000 target values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

learning_rate = 0.1
num_iters = 100
lambda_ = 1.0

y_test_pred = XGBoostRegression(X_test, y_test, learning_rate, num_iters, lambda_)

# Evaluate the test performance
mse_test = mean_squared_error(y_test, y_test_pred)
print(f"Test Mean Squared Error: {mse_test}")

Test Mean Squared Error: 3.0498094248067077e-06


# XGBoost for regression (sklean)

In [4]:
from xgboost import XGBRegressor

In [5]:
xgb_model = XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)

# Train the model
xgb_model.fit(X_train, y_train)

# Make predictions on the test data
y_test_pred = xgb_model.predict(X_test)

# Evaluate the model's performance on the test set
mse_test = mean_squared_error(y_test, y_test_pred)
print(f"Test Mean Squared Error: {mse_test}")

Test Mean Squared Error: 0.09226430629844373
