In [1]:
import os
import sys
import numpy as np
from sklearn.metrics import mean_squared_error
from tqdm import tqdm
current_dir = os.path.dirname(r"C:\Users\JNoot\Documents\University\Bachelor Thesis\New Code\simulation_study\simulation_2_and_3")
parent_dir = os.path.abspath(os.path.join(current_dir, os.pardir))
sys.path.insert(0, parent_dir)
sys.path.append(os.path.dirname(os.path.abspath(r"C:\Users\JNoot\Documents\University\Bachelor Thesis\New Code\simulation_study\simulation_2_and_3\simulation_2_and_3.ipynb")) + '/..')
from simulation_study.models.random_forest import train_test_rf
from simulation_study.models.lasso_rf import LassoRandomForest
from simulation_study.models.local_linear_forest import LocalLinearForestRegressor
from simulation_study.models.bart import predict_bart
from simulation_study.models.xgboost import predict_xgboost
from simulation_study.hypertuning.hypertune import hypertune_model

def friedman(x):
    return 10 * np.sin(np.pi * x[0] * x[1]) + 20 * ((x[3] - 0.5) ** 2) + 10 * x[4] + 5 * x[5]

def smoothness(x):
    return np.log(1 + np.exp(6 * x[0]))

def simulation_run(function, n, p, sigma, num_reps=50, num_test = 1000):
    errors = []
    tuning_size = 500
    X_tune = np.random.rand(tuning_size,p)
    Y_tune = np.apply_along_axis(function, 1, X_tune) + sigma * np.random.normal(size=tuning_size)
    LLF_parameters = hypertune_model("LocalLinearForest", X_tune, Y_tune, n_trials = 5)
    RF_parameters = hypertune_model("RandomForest", X_tune, Y_tune, n_trials = 5)
    LRF_parameters = hypertune_model("LASSO-RF", X_tune, Y_tune, n_trials = 5)
    XGB_parameters = hypertune_model("XGBoost", X_tune, Y_tune, n_trials = 5)
    BART_parameters = hypertune_model("BART", X_tune, Y_tune, n_trials = 5)

    for _ in tqdm(range(num_reps)):
      #Simulate
      X_train = np.random.rand(n, p)
      Y_train = np.apply_along_axis(function, 1, X_train) + sigma * np.random.normal(size=n)
      X_test = np.random.rand(num_test, p)
      truth = np.apply_along_axis(function, 1, X_test)

      #Random Forest
      RF_predictions = train_test_rf(X_train, Y_train, X_test, **RF_parameters)
      RF_mse = mean_squared_error(truth, RF_predictions)

      #Lasso Random Forest
      LRF = LassoRandomForest(**LRF_parameters)  
      LRF.fit(X_train, Y_train)
      LRF_preds = LRF.predict(X_test)
      LRF_mse = mean_squared_error(truth, LRF_preds)

      #Local Linear Forest
      LLF = LocalLinearForestRegressor(**LLF_parameters)
      LLF.fit(X_train, Y_train)
      LLF_predictions = LLF.predict_LLF(X_test)
      LLF_mse = mean_squared_error(truth, LLF_predictions)

      #Bayesian Additive Regression Trees
      BART_predictions = predict_bart(X_train, Y_train, X_test, **BART_parameters)
      BART_mse = mean_squared_error(truth, BART_predictions)

      #XGBoost
      XG_predictions = predict_xgboost(X_train, Y_train, X_test, **XGB_parameters)
      XG_mse = mean_squared_error(truth, XG_predictions)

      #Errors
      errors.append([LLF_mse, RF_mse, LRF_mse, BART_mse, XG_mse])

    return np.mean(errors, axis=0)

efficient_run = True
num_reps = 30
func = "smoothness"

if func == "friedman":
  function = friedman
  ps = [10,30,50]
  ns = [1000, 5000]
  sigmas = [5, 20]

  if efficient_run:
      ps = [30]
      ns = [5000]
      sigmas = [20]

if func == "smoothness":
  function = smoothness
  ps = [5,20]
  ns = [1000, 5000]
  sigmas = [0.1,1,2]

  if efficient_run:
    ps = [5]
    ns = [5000]
    sigmas = [2]

args = [(n, p, sigma) for n in ns for p in ps for sigma in sigmas]
full_results = []
for arguments in args:
    print(arguments)
    mses = simulation_run(function, *arguments, num_reps)
    full_results.append([*np.round(np.sqrt(mses), 3)])
    print([*np.round(np.sqrt(mses), 3)])

full_results = np.array(full_results)

print(full_results)

ModuleNotFoundError: No module named 'simulation_study'