In [1]:
import pandas as pd

# Read x_train.txt
X = pd.read_csv('../data/x_train.txt', sep=' ', header=None)

# Read y_train.txt
y = pd.read_csv('../data/y_train.txt', sep=' ', header=None)


# Display the data
print("x:")
print(X.head())

print("\ny:")
print(y.head())


x:
        0         1         2         3         4         5         6    \
0 -2.619773 -2.619533 -1.199350 -1.083335 -1.000910 -0.366967 -2.164037   
1 -1.415579 -1.782544 -2.880270 -1.958863  1.159968  0.273030 -1.628728   
2 -2.745092 -1.382945 -1.626015 -1.282560 -0.663146  0.052349 -2.403322   
3  0.618998  0.455364 -0.115081  0.649040 -0.862207  2.308504  0.526114   
4 -0.070694 -0.550509 -0.565556 -0.693065 -0.573089 -0.395862  0.003170   

        7         8         9    ...        490        491        492  \
0 -1.210001 -0.658311 -1.489539  ...  10.849925  10.343346  10.717519   
1 -0.175813 -0.916857 -0.570166  ...  11.489417   5.195818   3.494627   
2 -0.765073 -0.394354 -0.806624  ...  13.934934   9.267515   4.705604   
3 -1.094852  1.088656 -0.481210  ...  12.021328   3.852231  11.059702   
4 -0.981609 -0.505775 -0.758430  ...   7.537788  11.229665  11.318915   

        493        494        495        496        497        498        499  
0  7.709295   5.894554  12.

In [49]:
#vars=[100, 101, 102, 103, 104, 105, 6]

In [50]:
#X = X[vars]

In [2]:
import ray
from ray import tune

from ray.tune.schedulers.hb_bohb import HyperBandForBOHB
from ray.tune.search.bohb import TuneBOHB
from ray.train import report
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score
import xgboost as xgb
import pandas as pd
import numpy as np
import ConfigSpace as CS
import ConfigSpace.hyperparameters as CSH
import ast

# Assuming your data is in X and y dataframes
#X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
def train_xgboost(config):
    # Split data into training and validation sets
    vars = ast.literal_eval(config['colset'])
    Xloc = X[vars]

    X_train, X_val, y_train, y_val = train_test_split(Xloc, y, test_size=0.2, random_state=42)

    # Define the model with the given configuration
    model = XGBClassifier(
        eta=config['eta'],
        max_depth=config['max_depth'],
        min_child_weight=config['min_child_weight'],
        subsample=config['subsample'],
        colsample_bytree=config['colsample_bytree'],
        reg_lambda=config['lambda'],
        reg_alpha=config['alpha'],
        objective=config['objective'],
        eval_metric=config['eval_metric'],
        use_label_encoder=False,
        verbosity=0
    )
    
    # Train the model
    model.fit(X_train, y_train, eval_set=[(X_val, y_val)], verbose=False)

    # Predict on the validation set
    preds_proba = model.predict_proba(X_val)[:, 1]
    
    # Get the indices of the top 20% predictions
    top_20_percent_indices = np.argsort(preds_proba)[-int(0.2 * len(preds_proba)):]

    # Select the corresponding true labels and predicted labels for top 20%
    top_20_true = y_val.iloc[top_20_percent_indices].values
    top_20_preds = (preds_proba[top_20_percent_indices] >= 0.5).astype(int)
    
    # Calculate the number of correct class 1 predictions
    #correct_class_1_predictions = np.sum((top_20_true == 1) & (top_20_preds == 1))
    #correct_class_1_predictions = np.sum(np.logical_and(top_20_true == 1, top_20_preds == 1))
    correct_class_1_predictions = sum(1 for true, pred in zip(top_20_true, top_20_preds) if true == 1 and pred == 1)

    print(correct_class_1_predictions, len(X_val))
    
    # Calculate the number of features used
    num_features_used = Xloc.shape[1]

    #scaled score calculation
    customer_scaled = (correct_class_1_predictions / len(top_20_preds)) * 1000
    customer_gain = 10 * customer_scaled
    variable_cost = 200 * num_features_used
    custom_score = customer_gain - variable_cost
    
    # Compute the custom score
    #custom_score = 10 * correct_class_1_predictions - 200 * num_features_used
    report({
        "custom_score": custom_score, 
        "correct_class_1_predictions": correct_class_1_predictions, 
        #"len(X_val)": len(X_val), 
        # "sum top_20_true": np.sum(top_20_true),
        # "sum top_20_preds": np.sum(top_20_preds),
        # "sum top_20_true == 1": np.sum(top_20_true == 1),
        # "sum top_20_preds == 1": np.sum(top_20_preds == 1),
        # "sum aggregated": np.sum((top_20_true == 1) & (top_20_preds == 1)),
        # "logical":  np.sum(np.logical_and(top_20_true == 1, top_20_preds == 1)),
        #"preds_proba": preds_proba, 
        #"top_20_percent_indices": top_20_percent_indices, 
        # "top_20_true": top_20_true, 
        # "top_20_preds": top_20_preds, 
        # "a & b" : str(tt),
        "num_features_used": num_features_used
    })

# Define the search space using ConfigSpace
config_space = CS.ConfigurationSpace()

config_space.add_hyperparameter(CSH.UniformFloatHyperparameter('eta', lower=0.01, upper=0.1, log=True))
config_space.add_hyperparameter(CSH.UniformIntegerHyperparameter('max_depth', lower=3, upper=10))
config_space.add_hyperparameter(CSH.UniformIntegerHyperparameter('min_child_weight', lower=1, upper=5))
config_space.add_hyperparameter(CSH.UniformFloatHyperparameter('subsample', lower=0.5, upper=1.0))
config_space.add_hyperparameter(CSH.UniformFloatHyperparameter('colsample_bytree', lower=0.5, upper=1.0))
config_space.add_hyperparameter(CSH.UniformFloatHyperparameter('lambda', lower=1e-3, upper=10.0, log=True))
config_space.add_hyperparameter(CSH.UniformFloatHyperparameter('alpha', lower=1e-3, upper=10.0, log=True))
config_space.add_hyperparameter(CSH.CategoricalHyperparameter('colset', ['[101, 102, 103, 105]', '[101, 102, 103]', '[101, 102, 105]', '[101, 103, 105]', '[102, 103, 105]', '[104, 102, 103, 105]', '[100, 101, 102, 103, 105]', '[100, 101, 102, 103]', '[100, 101, 102, 105]', '[100, 101, 103, 105]', '[100, 102, 103, 105]', '[100, 104, 102, 103, 105]', '[100, 101, 102, 103, 104, 105]']))
config_space.add_hyperparameter(CSH.CategoricalHyperparameter('objective', ['binary:logistic']))
config_space.add_hyperparameter(CSH.CategoricalHyperparameter('eval_metric', ['error']))

# Set up the BOHB search algorithm
bohb_search = TuneBOHB(config_space, metric="custom_score", mode="max")

# Set up the HyperBandForBOHB scheduler
bohb_scheduler = HyperBandForBOHB(
    time_attr="training_iteration",
    metric="custom_score",
    mode="max"
)

# Function to create shorter directory names
def trial_dirname_creator(trial):
    return f"trial_{trial.trial_id}"

# Run the hyperparameter search
analysis = tune.run(
    train_xgboost,
    resources_per_trial={"cpu": 1, "gpu": 0},
    search_alg=bohb_search,
    scheduler=bohb_scheduler,
    num_samples=200,
    trial_dirname_creator=trial_dirname_creator
)

# Get the best result
best_config = analysis.get_best_config(metric="custom_score", mode="max")
print("Best config: ", best_config)


2024-05-29 13:06:04,002	INFO worker.py:1749 -- Started a local Ray instance.
2024-05-29 13:06:05,344	INFO tune.py:253 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `tune.run(...)`.
2024-05-29 13:06:05,346	INFO tune.py:614 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949


0,1
Current time:,2024-05-29 13:12:21
Running for:,00:06:15.83
Memory:,26.5/63.8 GiB

Trial name,status,loc,alpha,colsample_bytree,colset,eta,eval_metric,lambda,max_depth,min_child_weight,objective,subsample,iter,total time (s),custom_score,correct_class_1_pred ictions,num_features_used
train_xgboost_06334eee,TERMINATED,127.0.0.1:60188,0.00333344,0.596359,"[101, 102, 105]",0.0191392,error,0.0988809,7,2,binary:logistic,0.902236,1,0.142146,6500,142,3
train_xgboost_c88c53b4,TERMINATED,127.0.0.1:10912,0.341489,0.89622,"[100, 101, 102, 103]",0.0902793,error,0.110539,7,5,binary:logistic,0.85042,1,0.132,6550,147,4
train_xgboost_cb35cdd8,TERMINATED,127.0.0.1:6804,0.00256333,0.721823,"[100, 101, 102,_ff50",0.0134443,error,0.0054689,10,2,binary:logistic,0.663759,1,0.19,6800,156,5
train_xgboost_0043c127,TERMINATED,127.0.0.1:15496,0.0299109,0.738962,"[100, 101, 102, 103]",0.0127133,error,0.352018,5,1,binary:logistic,0.939125,1,0.0739994,6650,149,4
train_xgboost_088d1b6a,TERMINATED,127.0.0.1:45000,0.032126,0.732802,"[101, 102, 103]",0.0317989,error,0.0248156,9,5,binary:logistic,0.511093,1,0.102,6700,146,3
train_xgboost_ead43c46,TERMINATED,127.0.0.1:28660,1.80662,0.748768,"[100, 101, 102, 103]",0.0668575,error,0.0114032,9,4,binary:logistic,0.868638,1,0.104992,6400,144,4
train_xgboost_b2fa7de9,TERMINATED,127.0.0.1:33156,0.00168054,0.567272,"[101, 102, 103, 105]",0.0523394,error,7.381,6,2,binary:logistic,0.93044,1,0.104001,7000,156,4
train_xgboost_acd58799,TERMINATED,127.0.0.1:56644,0.016423,0.533183,"[102, 103, 105]",0.0164338,error,0.305881,10,5,binary:logistic,0.670072,1,0.0949998,6250,137,3
train_xgboost_e9b7c619,TERMINATED,127.0.0.1:46640,0.0461799,0.975399,"[104, 102, 103, 105]",0.0498222,error,0.0909972,7,5,binary:logistic,0.882885,1,0.100999,6600,148,4
train_xgboost_a0e4bc67,TERMINATED,127.0.0.1:53540,0.20863,0.611956,"[102, 103, 105]",0.0448569,error,3.50176,3,1,binary:logistic,0.704529,1,0.0619993,6900,150,3


[36m(train_xgboost pid=60188)[0m 142 1000


Trial name,correct_class_1_predictions,custom_score,num_features_used
train_xgboost_0043c127,149,6650,4
train_xgboost_02dc74a4,150,6700,4
train_xgboost_0589c01b,152,6800,4
train_xgboost_06334eee,142,6500,3
train_xgboost_07408a99,149,6650,4
train_xgboost_088d1b6a,146,6700,3
train_xgboost_09b8cbc5,154,6700,5
train_xgboost_0bdc02c7,148,6600,4
train_xgboost_0c2fc569,154,6900,4
train_xgboost_0d2596d2,144,6600,3


[36m(train_xgboost pid=15496)[0m 149 1000[32m [repeated 3x across cluster] (Ray deduplicates logs by default. Set RAY_DEDUP_LOGS=0 to disable log deduplication, or see https://docs.ray.io/en/master/ray-observability/user-guides/configure-logging.html#log-deduplication for more options.)[0m
[36m(train_xgboost pid=14884)[0m 156 1000[32m [repeated 3x across cluster][0m
[36m(train_xgboost pid=11500)[0m 150 1000[32m [repeated 3x across cluster][0m
[36m(train_xgboost pid=44916)[0m 145 1000[32m [repeated 3x across cluster][0m
[36m(train_xgboost pid=23608)[0m 145 1000[32m [repeated 3x across cluster][0m
[36m(train_xgboost pid=14644)[0m 148 1000[32m [repeated 3x across cluster][0m
[36m(train_xgboost pid=56548)[0m 159 1000[32m [repeated 3x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)


[36m(train_xgboost pid=33164)[0m 157 1000[32m [repeated 3x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)


[36m(train_xgboost pid=25852)[0m 150 1000[32m [repeated 3x across cluster][0m
[36m(train_xgboost pid=3448)[0m 158 1000[32m [repeated 3x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)


[36m(train_xgboost pid=15028)[0m 151 1000[32m [repeated 3x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)


[36m(train_xgboost pid=37796)[0m 151 1000[32m [repeated 3x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)


[36m(train_xgboost pid=30900)[0m 151 1000[32m [repeated 3x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)


[36m(train_xgboost pid=31884)[0m 154 1000[32m [repeated 3x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)


[36m(train_xgboost pid=16884)[0m 152 1000[32m [repeated 3x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)


[36m(train_xgboost pid=1892)[0m 155 1000[32m [repeated 3x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)


[36m(train_xgboost pid=25888)[0m 156 1000[32m [repeated 3x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)


[36m(train_xgboost pid=15400)[0m 149 1000[32m [repeated 3x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)


[36m(train_xgboost pid=54036)[0m 145 1000[32m [repeated 3x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)


[36m(train_xgboost pid=36112)[0m 152 1000[32m [repeated 3x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)


[36m(train_xgboost pid=59164)[0m 140 1000[32m [repeated 3x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)


[36m(train_xgboost pid=53576)[0m 143 1000[32m [repeated 3x across cluster][0m
[36m(train_xgboost pid=10504)[0m 145 1000[32m [repeated 3x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)


[36m(train_xgboost pid=60132)[0m 153 1000[32m [repeated 3x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)


[36m(train_xgboost pid=34024)[0m 148 1000[32m [repeated 3x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)


[36m(train_xgboost pid=8736)[0m 146 1000[32m [repeated 3x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)


[36m(train_xgboost pid=43044)[0m 151 1000[32m [repeated 3x across cluster][0m


2024-05-29 13:08:32,858	INFO hyperband.py:543 -- Restoring from a previous point in time. Previous=1; Now=1
2024-05-29 13:08:34,372	INFO hyperband.py:543 -- Restoring from a previous point in time. Previous=1; Now=1
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
2024-05-29 13:08:36,097	INFO hyperband.py:543 -- Restoring from a previous point in time. Previous=1; Now=1
2024-05-29 13:08:37,677	INFO hyperband.py:543 -- Restoring from a previous point in time. Previous=1; Now=1


[36m(train_xgboost pid=37740)[0m 153 1000[32m [repeated 5x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
2024-05-29 13:08:39,512	INFO hyperband.py:543 -- Restoring from a previous point in time. Previous=1; Now=1
2024-05-29 13:08:41,112	INFO hyperband.py:543 -- Restoring from a previous point in time. Previous=1; Now=1
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
2024-05-29 13:08:42,833	INFO hyperband.py:543 -- Restoring from a previous point in time. Previous=1; Now=1


[36m(train_xgboost pid=11296)[0m 148 1000[32m [repeated 5x across cluster][0m


2024-05-29 13:08:44,415	INFO hyperband.py:543 -- Restoring from a previous point in time. Previous=1; Now=1
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
2024-05-29 13:08:46,195	INFO hyperband.py:543 -- Restoring from a previous point in time. Previous=1; Now=1
2024-05-29 13:08:47,809	INFO hyperband.py:543 -- Restoring from a previous point in time. Previous=1; Now=1


[36m(train_xgboost pid=11900)[0m 148 1000[32m [repeated 4x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
2024-05-29 13:08:49,556	INFO hyperband.py:543 -- Restoring from a previous point in time. Previous=1; Now=1
2024-05-29 13:08:51,155	INFO hyperband.py:543 -- Restoring from a previous point in time. Previous=1; Now=1
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
2024-05-29 13:08:52,884	INFO hyperband.py:543 -- Restoring from a previous point in time. Previous=1; Now=1
2024-05-29 13:08:54,454	INFO hyperband.py:543 -- Restoring from a previous point in time. Previous=1; Now=1


[36m(train_xgboost pid=33468)[0m 152 1000[32m [repeated 6x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
2024-05-29 13:08:56,206	INFO hyperband.py:543 -- Restoring from a previous point in time. Previous=1; Now=1
2024-05-29 13:08:57,777	INFO hyperband.py:543 -- Restoring from a previous point in time. Previous=1; Now=1
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
2024-05-29 13:08:59,508	INFO hyperband.py:543 -- Restoring from a previous point in time. Previous=1; Now=1
2024-05-29 13:09:01,099	INFO hyperband.py:543 -- Restoring from a previous point in time. Previous=1; Now=1


[36m(train_xgboost pid=53052)[0m 151 1000[32m [repeated 6x across cluster][0m


2024-05-29 13:09:02,858	INFO hyperband.py:543 -- Restoring from a previous point in time. Previous=1; Now=1
2024-05-29 13:09:04,491	INFO hyperband.py:543 -- Restoring from a previous point in time. Previous=1; Now=1
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
2024-05-29 13:09:06,408	INFO hyperband.py:543 -- Restoring from a previous point in time. Previous=1; Now=1
2024-05-29 13:09:08,133	INFO hyperband.py:543 -- Restoring from a previous point in time. Previous=1; Now=1


[36m(train_xgboost pid=45336)[0m 153 1000[32m [repeated 6x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
2024-05-29 13:09:09,864	INFO hyperband.py:543 -- Restoring from a previous point in time. Previous=1; Now=1
2024-05-29 13:09:11,432	INFO hyperband.py:543 -- Restoring from a previous point in time. Previous=1; Now=1
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
2024-05-29 13:09:13,198	INFO hyperband.py:543 -- Restoring from a previous point in time. Previous=1; Now=1
2024-05-29 13:09:14,756	INFO hyperband.py:543 -- Restoring from a previous point in time. Previous=1; Now=1


[36m(train_xgboost pid=33516)[0m 153 1000[32m [repeated 6x across cluster][0m


2024-05-29 13:09:16,452	INFO hyperband.py:543 -- Restoring from a previous point in time. Previous=1; Now=1
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)


[36m(train_xgboost pid=47700)[0m 145 1000[32m [repeated 4x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)


[36m(train_xgboost pid=56540)[0m 154 1000[32m [repeated 3x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)


[36m(train_xgboost pid=51536)[0m 144 1000[32m [repeated 3x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)


[36m(train_xgboost pid=20456)[0m 152 1000[32m [repeated 3x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)


[36m(train_xgboost pid=62440)[0m 150 1000[32m [repeated 3x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)


[36m(train_xgboost pid=35844)[0m 141 1000[32m [repeated 3x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)


[36m(train_xgboost pid=13280)[0m 146 1000[32m [repeated 3x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)


[36m(train_xgboost pid=20272)[0m 151 1000[32m [repeated 3x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)


[36m(train_xgboost pid=12308)[0m 148 1000[32m [repeated 3x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)


[36m(train_xgboost pid=50856)[0m 155 1000[32m [repeated 3x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)


[36m(train_xgboost pid=18232)[0m 150 1000[32m [repeated 3x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)


[36m(train_xgboost pid=7200)[0m 148 1000[32m [repeated 3x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)


[36m(train_xgboost pid=33676)[0m 152 1000[32m [repeated 3x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)


[36m(train_xgboost pid=54644)[0m 150 1000[32m [repeated 3x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)


[36m(train_xgboost pid=46396)[0m 147 1000[32m [repeated 3x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)


[36m(train_xgboost pid=35284)[0m 147 1000[32m [repeated 3x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)


[36m(train_xgboost pid=26544)[0m 159 1000[32m [repeated 3x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)


[36m(train_xgboost pid=20432)[0m 148 1000[32m [repeated 3x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)


[36m(train_xgboost pid=62656)[0m 151 1000[32m [repeated 3x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)


[36m(train_xgboost pid=63464)[0m 142 1000[32m [repeated 3x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)


[36m(train_xgboost pid=16972)[0m 150 1000[32m [repeated 3x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)


[36m(train_xgboost pid=39312)[0m 151 1000[32m [repeated 3x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)


[36m(train_xgboost pid=11224)[0m 143 1000[32m [repeated 3x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)


[36m(train_xgboost pid=52160)[0m 148 1000[32m [repeated 3x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)


[36m(train_xgboost pid=38372)[0m 141 1000[32m [repeated 3x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)


[36m(train_xgboost pid=2108)[0m 143 1000[32m [repeated 3x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)


[36m(train_xgboost pid=10920)[0m 146 1000[32m [repeated 3x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)


[36m(train_xgboost pid=51808)[0m 146 1000[32m [repeated 3x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)


[36m(train_xgboost pid=2808)[0m 143 1000[32m [repeated 3x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)


[36m(train_xgboost pid=57952)[0m 154 1000[32m [repeated 3x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)


[36m(train_xgboost pid=7812)[0m 151 1000[32m [repeated 3x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)


[36m(train_xgboost pid=62836)[0m 148 1000[32m [repeated 3x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)


[36m(train_xgboost pid=57876)[0m 151 1000[32m [repeated 3x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)


[36m(train_xgboost pid=34000)[0m 142 1000[32m [repeated 3x across cluster][0m


  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
  kernel_value = np.ones(Xi.size) * h / (num_levels - 1)


[36m(train_xgboost pid=12076)[0m 146 1000[32m [repeated 3x across cluster][0m


You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.
You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).
2024-05-29 13:12:21,410	INFO tune.py:1007 -- Wrote the latest version of all result files and experiment state to 'C:/Users/Mieszko/ray_results/train_xgboost_2024-05-29_13-06-05' in 0.1450s.
2024-05-29 13:12:21,453	INFO tune.py:1039 -- Total run time: 376.11 seconds (375.68 seconds for the tuning loop).


Best config:  {'alpha': 0.03080700401819511, 'colsample_bytree': 0.7407150540666845, 'colset': '[100, 102, 103, 105]', 'eta': 0.03245934647791296, 'eval_metric': 'error', 'lambda': 0.07890255049708894, 'max_depth': 5, 'min_child_weight': 1, 'objective': 'binary:logistic', 'subsample': 0.6435415172876953}


In [3]:
df=analysis.dataframe()

In [4]:
df.to_csv('xgb-raytune-bohb.csv')

In [5]:
df.head()

Unnamed: 0,custom_score,correct_class_1_predictions,num_features_used,timestamp,checkpoint_dir_name,done,training_iteration,trial_id,date,time_this_iter_s,...,config/colsample_bytree,config/colset,config/eta,config/eval_metric,config/lambda,config/max_depth,config/min_child_weight,config/objective,config/subsample,logdir
0,6500.0,142,3,1716980767,,False,1,06334eee,2024-05-29_13-06-07,0.142146,...,0.596359,"[101, 102, 105]",0.019139,error,0.098881,7,2,binary:logistic,0.902236,06334eee
1,6550.0,147,4,1716980769,,False,1,c88c53b4,2024-05-29_13-06-09,0.132,...,0.89622,"[100, 101, 102, 103]",0.090279,error,0.110539,7,5,binary:logistic,0.85042,c88c53b4
2,6800.0,156,5,1716980771,,False,1,cb35cdd8,2024-05-29_13-06-11,0.19,...,0.721823,"[100, 101, 102, 103, 105]",0.013444,error,0.005469,10,2,binary:logistic,0.663759,cb35cdd8
3,6650.0,149,4,1716980773,,False,1,0043c127,2024-05-29_13-06-13,0.073999,...,0.738962,"[100, 101, 102, 103]",0.012713,error,0.352018,5,1,binary:logistic,0.939125,0043c127
4,6700.0,146,3,1716980775,,False,1,088d1b6a,2024-05-29_13-06-15,0.102,...,0.732802,"[101, 102, 103]",0.031799,error,0.024816,9,5,binary:logistic,0.511093,088d1b6a


In [6]:
df.sort_values('custom_score', ascending=False).head()

Unnamed: 0,custom_score,correct_class_1_predictions,num_features_used,timestamp,checkpoint_dir_name,done,training_iteration,trial_id,date,time_this_iter_s,...,config/colsample_bytree,config/colset,config/eta,config/eval_metric,config/lambda,config/max_depth,config/min_child_weight,config/objective,config/subsample,logdir
144,7150.0,159,4,1716981043,,False,1,30994418,2024-05-29_13-10-43,0.073999,...,0.740715,"[100, 102, 103, 105]",0.032459,error,0.078903,5,1,binary:logistic,0.643542,30994418
113,7100.0,158,4,1716980989,,False,1,8878bc6d,2024-05-29_13-09-49,0.114002,...,0.775045,"[100, 102, 103, 105]",0.054523,error,0.379331,9,5,binary:logistic,0.908542,8878bc6d
154,7100.0,154,3,1716981061,,False,1,eb45f49b,2024-05-29_13-11-01,0.068005,...,0.957826,"[101, 102, 105]",0.031165,error,0.006269,4,3,binary:logistic,0.517397,eb45f49b
160,7050.0,157,4,1716981071,,False,1,f1776d3d,2024-05-29_13-11-11,0.073004,...,0.842411,"[100, 101, 102, 103]",0.02112,error,0.066908,4,4,binary:logistic,0.590614,f1776d3d
24,7050.0,157,4,1716980924,,False,1,75b62c3b,2024-05-29_13-08-44,0.065001,...,0.689362,"[104, 102, 103, 105]",0.062816,error,2.690235,3,1,binary:logistic,0.618418,75b62c3b
