In [1]:
# Load Data
import pandas as pd
import ray

ray.shutdown()
ray.init(object_store_memory=4000000000) # set object store memory to 4GB

data = pd.read_pickle('C:\\Users\\manue\\switchdrive\\Mutual Funds Project\\data\\pickle_files\\full_dataset.pkl')
sample = data.sample(101, random_state=1)

# tansform 'Rating' from Categorical to float
import numpy as np
sample["Rating"] = pd.to_numeric(sample["Rating"], errors='coerce')

# drop all rows with inf/-inf values!
import numpy as np
sample = sample[(sample != np.inf).all(axis=1)]
sample = sample[(sample != -np.inf).all(axis=1)]

#get rid of whitespace to draw tree later
Eq_Stylebox = sample['Eq_Stylebox_Long'].astype('object').replace(' ','_', regex=True)
sample['Eq_Stylebox_Long'] = Eq_Stylebox.astype('category')

# shifting target variable to predict next month
sample['returns'] = sample.returns.shift(-1)
sample = sample.drop(sample.tail(1).index)


X = sample.drop('returns', axis=1)
y = sample['returns']

# create dummies in case of categorical data
dummy_needed = [#'Rating',
                'Financial_Health_Grade_Long',
                 'Growth_Grade_Long',
                 'Profitability_Grade_Long',
                 'Eq_Stylebox_Long']

X = pd.get_dummies(X, columns=dummy_needed)

# Split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 26)
#X_train = ray.put(X_train)
#y_train = ray.put(y_train)

2023-04-15 15:27:44,709	INFO worker.py:1553 -- Started a local Ray instance.


In [2]:
%%time
import xgboost as xgb
from hyperopt import hp, fmin, tpe, Trials
from ray import tune
from ray.tune.schedulers import AsyncHyperBandScheduler
from ray.tune.search.hyperopt import HyperOptSearch
from sklearn.model_selection import cross_val_score, KFold
import numpy as np


# Define the search space for hyperparameters
search_space = {
    "learning_rate": hp.loguniform("learning_rate", np.log(0.0001), np.log(0.9)),
    "max_depth": hp.choice("max_depth", range(1, 16)),
    "n_estimators": hp.choice("n_estimators", range(100, 10000)),
    "subsample": hp.uniform("subsample", 0.5, 1),
    "colsample_bytree": hp.uniform("colsample_bytree", 0.5, 1),
    "colsample_bylevel": hp.uniform("colsample_bylevel", 0.5, 1),
    "reg_alpha": hp.loguniform("reg_alpha", np.log(0.001), np.log(100)),
    "reg_lambda": hp.loguniform("reg_lambda", np.log(0.001), np.log(100)),
}


kfolds = KFold(n_splits=5, shuffle=True, random_state=26)

# Define the objective function to optimize
def objective(config):
    model = xgb.XGBRegressor(**config, n_jobs=-1)
    score = cross_val_score(model, X=ray.get(X_train), y=ray.get(y_train), scoring="neg_mean_squared_error", cv=kfolds)
    rmse = np.sqrt(-np.mean(score))
    tune.report(rmse=rmse)

# Define the search algorithm
search_alg = HyperOptSearch(space=search_space, metric="rmse", mode="min")
# to limit number of cores, uncomment and set max_concurrent 
# algo = ConcurrencyLimiter(algo, max_concurrent=10)
scheduler = AsyncHyperBandScheduler()

# Define the hyperparameter tuning trials object
trials = Trials()

# Define the configuration for Ray Tune
config = {
    'num_samples': 10,
    'config': search_space,
    'search_alg': search_alg,
    'scheduler': scheduler,
    'resources_per_trial': {'cpu': 1},
    'metric': 'rmse',
    'mode': 'min',
    'verbose': 1,
    'name': 'xgboost_tuning',
    'stop': {'training_iteration': 10},
    'local_dir': './ray_results',
}

# Start the hyperparameter tuning using Ray Tune
analysis = tune.run(objective, **config)

# Print the best hyperparameters found during the search
best_params = analysis.get_best_config(metric='rmse')
print("Best hyperparameters found:")
print(best_params)

0,1
Current time:,2023-04-15 15:29:34
Running for:,00:01:17.50
Memory:,8.6/15.7 GiB

Trial name,# failures,error file
objective_bb5b8fc1,1,"C:\Users\manue\switchdrive\Mutual Funds Project\ml_prediction\ray_results\xgboost_tuning\objective_bb5b8fc1_1_colsample_bylevel=0.8949,colsample_bytree=0.7125,learning_rate=0.0026,max_depth=5,n_estimators=3642,reg_alpha_2023-04-15_15-28-21\error.txt"
objective_9a68262f,1,"C:\Users\manue\switchdrive\Mutual Funds Project\ml_prediction\ray_results\xgboost_tuning\objective_9a68262f_2_colsample_bylevel=0.9858,colsample_bytree=0.8355,learning_rate=0.0002,max_depth=15,n_estimators=7269,reg_alph_2023-04-15_15-28-31\error.txt"
objective_1548b759,1,"C:\Users\manue\switchdrive\Mutual Funds Project\ml_prediction\ray_results\xgboost_tuning\objective_1548b759_3_colsample_bylevel=0.8083,colsample_bytree=0.7469,learning_rate=0.0005,max_depth=7,n_estimators=7170,reg_alpha_2023-04-15_15-28-34\error.txt"
objective_a966dd12,1,"C:\Users\manue\switchdrive\Mutual Funds Project\ml_prediction\ray_results\xgboost_tuning\objective_a966dd12_4_colsample_bylevel=0.9312,colsample_bytree=0.5091,learning_rate=0.4007,max_depth=1,n_estimators=8327,reg_alpha_2023-04-15_15-28-37\error.txt"
objective_957dbe26,1,"C:\Users\manue\switchdrive\Mutual Funds Project\ml_prediction\ray_results\xgboost_tuning\objective_957dbe26_5_colsample_bylevel=0.8806,colsample_bytree=0.9334,learning_rate=0.0032,max_depth=8,n_estimators=6172,reg_alpha_2023-04-15_15-28-49\error.txt"
objective_ce4aab15,1,"C:\Users\manue\switchdrive\Mutual Funds Project\ml_prediction\ray_results\xgboost_tuning\objective_ce4aab15_6_colsample_bylevel=0.5527,colsample_bytree=0.7654,learning_rate=0.1789,max_depth=11,n_estimators=8759,reg_alph_2023-04-15_15-28-52\error.txt"
objective_db4170ce,1,"C:\Users\manue\switchdrive\Mutual Funds Project\ml_prediction\ray_results\xgboost_tuning\objective_db4170ce_7_colsample_bylevel=0.9237,colsample_bytree=0.6934,learning_rate=0.1925,max_depth=12,n_estimators=7402,reg_alph_2023-04-15_15-29-01\error.txt"
objective_a9008590,1,"C:\Users\manue\switchdrive\Mutual Funds Project\ml_prediction\ray_results\xgboost_tuning\objective_a9008590_8_colsample_bylevel=0.7176,colsample_bytree=0.7672,learning_rate=0.2112,max_depth=13,n_estimators=5199,reg_alph_2023-04-15_15-29-11\error.txt"
objective_2c5bede9,1,"C:\Users\manue\switchdrive\Mutual Funds Project\ml_prediction\ray_results\xgboost_tuning\objective_2c5bede9_9_colsample_bylevel=0.7408,colsample_bytree=0.9965,learning_rate=0.0039,max_depth=5,n_estimators=4795,reg_alpha_2023-04-15_15-29-15\error.txt"
objective_e5d33a9b,1,"C:\Users\manue\switchdrive\Mutual Funds Project\ml_prediction\ray_results\xgboost_tuning\objective_e5d33a9b_10_colsample_bylevel=0.6709,colsample_bytree=0.9663,learning_rate=0.5783,max_depth=2,n_estimators=2969,reg_alph_2023-04-15_15-29-26\error.txt"

Trial name,status,loc,colsample_bylevel,colsample_bytree,learning_rate,max_depth,n_estimators,reg_alpha,reg_lambda,subsample
objective_bb5b8fc1,ERROR,127.0.0.1:20372,0.8949,0.712453,0.00257991,5,3642,0.0329368,1.16658,0.666029
objective_9a68262f,ERROR,127.0.0.1:8604,0.985777,0.835472,0.000173412,15,7269,0.0122288,0.167461,0.642125
objective_1548b759,ERROR,127.0.0.1:18840,0.808272,0.746942,0.000454984,7,7170,0.406775,8.47337,0.645359
objective_a966dd12,ERROR,127.0.0.1:25588,0.931209,0.509122,0.400696,1,8327,0.00188912,2.73568,0.523635
objective_957dbe26,ERROR,127.0.0.1:27200,0.880617,0.93338,0.00324613,8,6172,0.00354872,0.00203326,0.730872
objective_ce4aab15,ERROR,127.0.0.1:12576,0.552719,0.765447,0.178906,11,8759,0.0154599,0.10678,0.932481
objective_db4170ce,ERROR,127.0.0.1:9952,0.923694,0.693447,0.192454,12,7402,0.566774,0.610257,0.509154
objective_a9008590,ERROR,127.0.0.1:26844,0.71758,0.767246,0.21125,13,5199,0.00615445,0.473912,0.558816
objective_2c5bede9,ERROR,127.0.0.1:16108,0.740751,0.996489,0.00387391,5,4795,29.0498,1.10826,0.536911
objective_e5d33a9b,ERROR,127.0.0.1:5388,0.670944,0.966261,0.578286,2,2969,1.04228,0.00107675,0.888869


2023-04-15 15:28:31,730	ERROR trial_runner.py:1062 -- Trial objective_bb5b8fc1: Error processing event.
ray.exceptions.RayTaskError(ValueError): [36mray::ImplicitFunc.train()[39m (pid=20372, ip=127.0.0.1, repr=objective)
  File "python\ray\_raylet.pyx", line 857, in ray._raylet.execute_task
  File "python\ray\_raylet.pyx", line 861, in ray._raylet.execute_task
  File "python\ray\_raylet.pyx", line 803, in ray._raylet.execute_task.function_executor
  File "C:\Users\manue\anaconda3\lib\site-packages\ray\_private\function_manager.py", line 674, in actor_method_executor
    return method(__ray_actor, *args, **kwargs)
  File "C:\Users\manue\anaconda3\lib\site-packages\ray\util\tracing\tracing_helper.py", line 466, in _resume_span
    return method(self, *_args, **_kwargs)
  File "C:\Users\manue\anaconda3\lib\site-packages\ray\tune\trainable\trainable.py", line 368, in train
    raise skipped from exception_cause(skipped)
  File "C:\Users\manue\anaconda3\lib\site-packages\ray\tune\trainabl

2023-04-15 15:28:52,670	ERROR trial_runner.py:1062 -- Trial objective_9a68262f: Error processing event.
ray.exceptions.RayTaskError(ValueError): [36mray::ImplicitFunc.train()[39m (pid=8604, ip=127.0.0.1, repr=objective)
  File "python\ray\_raylet.pyx", line 857, in ray._raylet.execute_task
  File "python\ray\_raylet.pyx", line 861, in ray._raylet.execute_task
  File "python\ray\_raylet.pyx", line 803, in ray._raylet.execute_task.function_executor
  File "C:\Users\manue\anaconda3\lib\site-packages\ray\_private\function_manager.py", line 674, in actor_method_executor
    return method(__ray_actor, *args, **kwargs)
  File "C:\Users\manue\anaconda3\lib\site-packages\ray\util\tracing\tracing_helper.py", line 466, in _resume_span
    return method(self, *_args, **_kwargs)
  File "C:\Users\manue\anaconda3\lib\site-packages\ray\tune\trainable\trainable.py", line 368, in train
    raise skipped from exception_cause(skipped)
  File "C:\Users\manue\anaconda3\lib\site-packages\ray\tune\trainable

2023-04-15 15:29:14,287	ERROR worker.py:399 -- Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): [36mray::ImplicitFunc.train()[39m (pid=9952, ip=127.0.0.1, repr=objective)
  File "python\ray\_raylet.pyx", line 857, in ray._raylet.execute_task
  File "python\ray\_raylet.pyx", line 861, in ray._raylet.execute_task
  File "python\ray\_raylet.pyx", line 803, in ray._raylet.execute_task.function_executor
  File "C:\Users\manue\anaconda3\lib\site-packages\ray\_private\function_manager.py", line 674, in actor_method_executor
    return method(__ray_actor, *args, **kwargs)
  File "C:\Users\manue\anaconda3\lib\site-packages\ray\util\tracing\tracing_helper.py", line 466, in _resume_span
    return method(self, *_args, **_kwargs)
  File "C:\Users\manue\anaconda3\lib\site-packages\ray\tune\trainable\trainable.py", line 368, in train
    raise skipped from exception_cause(skipped)
  File "C:\Users\manue\anaconda3\lib\site-packages\ray\tune\trainable\function_trainable.py", line 337

2023-04-15 15:29:26,724	ERROR trial_runner.py:1062 -- Trial objective_2c5bede9: Error processing event.
ray.exceptions.RayTaskError(ValueError): [36mray::ImplicitFunc.train()[39m (pid=16108, ip=127.0.0.1, repr=objective)
  File "python\ray\_raylet.pyx", line 857, in ray._raylet.execute_task
  File "python\ray\_raylet.pyx", line 861, in ray._raylet.execute_task
  File "python\ray\_raylet.pyx", line 803, in ray._raylet.execute_task.function_executor
  File "C:\Users\manue\anaconda3\lib\site-packages\ray\_private\function_manager.py", line 674, in actor_method_executor
    return method(__ray_actor, *args, **kwargs)
  File "C:\Users\manue\anaconda3\lib\site-packages\ray\util\tracing\tracing_helper.py", line 466, in _resume_span
    return method(self, *_args, **_kwargs)
  File "C:\Users\manue\anaconda3\lib\site-packages\ray\tune\trainable\trainable.py", line 368, in train
    raise skipped from exception_cause(skipped)
  File "C:\Users\manue\anaconda3\lib\site-packages\ray\tune\trainabl

TuneError: ('Trials did not complete', [objective_bb5b8fc1, objective_9a68262f, objective_1548b759, objective_a966dd12, objective_957dbe26, objective_ce4aab15, objective_db4170ce, objective_a9008590, objective_2c5bede9, objective_e5d33a9b])

### Distributed Ray XGB
https://github.com/ray-project/xgboost_ray