In [24]:
%pip install git+https://github.com/JakeColtman/bartpy.git --no-deps
import importlib.util

packages_to_install = ['numpy', 'pandas', 'scikit-learn','tqdm', 'xgboost']

for package in packages_to_install:
    if importlib.util.find_spec(package) is None:
        print(f"Installing {package}...")
        %pip install {package}

import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LassoCV
from bartpy.sklearnmodel import SklearnModel
import xgboost as xgb

def ff(x):
    return 10 * np.sin(np.pi * x[0] * x[1]) + 20 * ((x[3] - 0.5) ** 2) + 10 * x[4] + 5 * x[5]

def simulation_run(n, p, sigma, num_reps=1000):
    def predict_rf(X_train, Y_train, X_test):
        rf = RandomForestRegressor()
        rf.fit(X_train, Y_train)
        return rf.predict(X_test)

    def predict_ll_regression_forest(X_train, Y_train, X_test, linear_correction_variables, ll_lambda):
        forest = RandomForestRegressor()
        forest.fit(X_train, Y_train)
        lasso = LassoCV(cv=5, random_state=42)
        lasso.fit(X_train[:, linear_correction_variables], Y_train)
        llf_preds = forest.predict(X_test) + lasso.predict(X_test[:, linear_correction_variables])
        return llf_preds

    def predict_bart(X_train, Y_train, X_test):
      bart_model = SklearnModel()
      bart_model.fit(X_train, Y_train)
      bart_preds = bart_model.predict(X_test)
      return bart_preds

    def predict_xgboost(X_train, Y_train, X_test, ntrees_max, num_search_rounds):
        boost_cv_fit = xgb.XGBRegressor(X_train, Y_train, ntrees_max, num_search_rounds)
        xg_preds = boost_cv_fit.predict(X_test)
        return xg_preds

    def predict_lassoRF(X_train, Y_train, X_test):
        lasso = LassoCV(cv=5)
        lasso.fit(X_train, Y_train)
        lasso_resids = Y_train - lasso.predict(X_train)
        rf = RandomForestRegressor()
        rf.fit(X_train, lasso_resids)
        return rf.predict(X_test) + lasso.predict(X_test)

    errors = []
    for _ in range(num_reps):
        #Simulate
        X_train = np.random.rand(n, p)
        Y_train = np.apply_along_axis(ff, 1, X_train) + sigma * np.random.normal(size=n)
        X_test = np.random.rand(num_test, p)
        truth = np.apply_along_axis(ff, 1, X_test)

        #Random Forest
        rf_preds = predict_rf(X_train, Y_train, X_test)
        rf_mse = np.mean((rf_preds - truth) ** 2)

        #Local Linear Forest
        llf_preds = predict_ll_regression_forest(X_train, Y_train, X_test, linear_correction_variables=np.arange(p), ll_lambda=1)
        llf_mse = np.mean((llf_preds - truth) ** 2)

        #Lasso Random Forest
        lasso_rf_preds = predict_lassoRF(X_train, Y_train, X_test)
        lasso_rf_mse = np.mean((lasso_rf_preds - truth) ** 2)

        #Bart
        #bart_preds = predict_bart(X_train, Y_train, X_test)
        #bart_mse = np.mean((bart_preds - truth) ** 2)

        #XGBoost
        #xg_preds = predict_xgboost(X_train, Y_train, X_test, 100, 5)
        #xg_mse = np.mean((xg_preds - truth) ** 2)

        #errors.append([llf_mse, rf_mse, lasso_rf_mse, bart_mse, xg_mse])
        errors.append([llf_mse, rf_mse, lasso_rf_mse])

    return np.mean(errors, axis=0)

efficient_run = True

ps = [10,30,50]
ns = [1000, 5000]
sigmas = [5, 20]

if efficient_run:
    ns = [1000]
    ps = [10, 30]

num_test = 1000

args = [(n, p, sigma) for n in ns for p in ps for sigma in sigmas]
full_results = []
for arguments in args:
    print(arguments)
    mses = simulation_run(*arguments)
    full_results.append([*arguments, *np.round(np.sqrt(mses), 3)])

full_results = np.array(full_results)

print(full_results)
print(f"One run took {(1/60) * (time.process_time() - ptm[0])} minutes")

Collecting git+https://github.com/JakeColtman/bartpy.git
  Cloning https://github.com/JakeColtman/bartpy.git to c:\users\jnoot\appdata\local\temp\pip-req-build-g8go9mlc
  Resolved https://github.com/JakeColtman/bartpy.git to commit 09e409e91dd1b9d44784c788c2d731dafb181eb0
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Installing backend dependencies: started
  Installing backend dependencies: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Note: you may need to restart the kernel to use updated packages.
Installing scikit-learn...


  Running command git clone --filter=blob:none --quiet https://github.com/JakeColtman/bartpy.git 'C:\Users\JNoot\AppData\Local\Temp\pip-req-build-g8go9mlc'


Note: you may need to restart the kernel to use updated packages.
(1000, 10, 5)
(1000, 10, 20)
(1000, 30, 5)
(1000, 30, 20)
[[1000.      10.       5.      15.009    2.143    1.743]
 [1000.      10.      20.      15.873    5.151    4.984]
 [1000.      30.       5.      14.945    2.341    1.834]
 [1000.      30.      20.      15.512    4.655    4.584]]


NameError: name 'time' is not defined