In [None]:
import os
import sys
import numpy as np
from sklearn.metrics import mean_squared_error
from tqdm.auto import tqdm
#Adjust directories to own paths
current_dir = os.path.dirname(r"C:\Users\JNoot\Documents\University\Bachelor Thesis\New Code\simulation_study\simulation_2_and_3")
parent_dir = os.path.abspath(os.path.join(current_dir, os.pardir))
sys.path.insert(0, parent_dir)
sys.path.append(os.path.dirname(os.path.abspath(r"C:\Users\JNoot\Documents\University\Bachelor Thesis\New Code\simulation_study\simulation_2_and_3\simulation_2_and_3.ipynb")) + '/..')
from simulation_study.models.random_forest import train_test_rf
from simulation_study.models.lasso_rf import LassoRandomForest
from simulation_study.models.local_linear_forest import LocalLinearForestRegressor
from simulation_study.models.bart import predict_bart
from simulation_study.models.xgboost import predict_xgboost
from simulation_study.hypertuning.hypertune import hypertune_model

def friedman(x):
    return 10 * np.sin(np.pi * x[0] * x[1]) + 20 * ((x[3] - 0.5) ** 2) + 10 * x[4] + 5 * x[5]

def smoothness(x):
    return np.log(1 + np.exp(6 * x[0]))

def get_parameters(n, p, sigma,function, n_trials):
    X_tune = np.random.rand(n,p)
    Y_tune = np.apply_along_axis(function, 1, X_tune) + sigma * np.random.normal(size=n)
    LLF_parameters = hypertune_model("LocalLinearForest", X_tune, Y_tune, n_trials = n_trials)
    RF_parameters = hypertune_model("RandomForest", X_tune, Y_tune, n_trials = n_trials)
    LRF_parameters = hypertune_model("LASSO-RF", X_tune, Y_tune, n_trials = n_trials)
    XGB_parameters = hypertune_model("XGBoost", X_tune, Y_tune, n_trials = n_trials)
    BART_parameters = hypertune_model("BART", X_tune, Y_tune, n_trials = 20)
    parameters = {"LLF": LLF_parameters, "RF": RF_parameters, "LRF" : LRF_parameters, 
                   "XGB": XGB_parameters, "BART": BART_parameters}
    return parameters

def simulation_run(function, n, p, sigma, num_reps=50, num_test = 1000, parameters = None):
    errors_list = []
    
    for _ in tqdm(range(num_reps)):
      #Simulate
      X_train = np.random.rand(n, p)
      Y_train = np.apply_along_axis(function, 1, X_train) + sigma * np.random.normal(size=n)
      X_test = np.random.rand(num_test, p)
      truth = np.apply_along_axis(function, 1, X_test)

      #Random Forest
      RF_predictions = train_test_rf(X_train, Y_train, X_test, **parameters["RF"])
      RF_mse = mean_squared_error(truth, RF_predictions)

      #Lasso Random Forest
      LRF = LassoRandomForest(**parameters["LRF"])  
      LRF.fit(X_train, Y_train)
      LRF_preds = LRF.predict(X_test)
      LRF_mse = mean_squared_error(truth, LRF_preds)

      #Local Linear Forest
      LLF = LocalLinearForestRegressor(**parameters["LLF"])
      LLF.fit(X_train, Y_train)
      LLF_predictions = LLF.predict_LLF(X_test)
      LLF_mse = mean_squared_error(truth, LLF_predictions)

      #Bayesian Additive Regression Trees
      BART_predictions = predict_bart(X_train, Y_train, X_test, **parameters["BART"])
      BART_mse = mean_squared_error(truth, BART_predictions)

      #XGBoost
      XG_predictions = predict_xgboost(X_train, Y_train, X_test, **parameters["XGB"])
      XG_mse = mean_squared_error(truth, XG_predictions)

      #Errors
      errors = {
        "LLF": LLF_mse,
        "RF": RF_mse,
        "Lasso RF": LRF_mse,
        "BART": BART_mse,
        "XGBoost": XG_mse
      }
      errors_list.append(errors)

    mean_errors = {model: np.mean([errors[model] for errors in errors_list]) for model in errors_list[0]}
    return mean_errors

efficient_run = True
num_reps = 10
func = "friedman"

if func == "friedman":
  function = friedman
  ps = [10,30,50]
  ns = [1000, 5000]
  sigmas = [5, 20]

  if efficient_run:
      sigmas = [5]

if func == "smoothness":
  function = smoothness
  ps = [5,20]
  ns = [1000, 5000]
  sigmas = [0.1,1,2]

  if efficient_run:
    sigmas = [1, 2]

args = [(n, p, sigma) for n in ns for p in ps for sigma in sigmas]
mses_results = []
qlikes_results = []
for arguments in tqdm(args):
    print(arguments)
    model_parameters = get_parameters(*arguments, function, n_trials = 50)
    mses= simulation_run(function, *arguments, num_reps, num_test = 1000, parameters = model_parameters)
    mses_results.append([*np.round(np.sqrt(list(mses.values())), 3)])
    print([*np.round(np.sqrt(list(mses.values())), 3)])

mses_results = np.array(mses_results)
print("Mean RMSE: ", mses_results)