In [2]:
from pysr import PySRRegressor

Detected Jupyter notebook. Loading juliacall extension. Set `PYSR_AUTOLOAD_EXTENSIONS=no` to disable.


In [1]:
import pmlb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_pinball_loss
import uncertainty_toolbox

In [4]:
import torch

In [2]:
from pmlb import fetch_data, regression_dataset_names

In [6]:
import numpy as np

def get_feature_type(data_column):
  """
  This function determines the feature type (categorical or binary) for a NumPy array column.

  **Note:** This function assumes the data doesn't contain missing values.
          If your data might have missing values, you'll need to handle them
          before using this function (e.g., impute missing values or remove rows).

  Args:
      data_column: A NumPy array representing the data column.

  Returns:
      A string indicating the feature type: "categorical" or "binary".
  """

  # Check for distinct values and data type
  unique_values = np.unique(data_column)
  num_unique_values = len(unique_values)

  # Categorical data has a limited number of distinct values (adjust threshold as needed)
  if num_unique_values <= 10:  # Adjust threshold based on your data and analysis goals
    return "categorical"

  # Binary data has only two distinct values
  if num_unique_values == 2:
    return "binary"

  # If there are more than 10 distinct values and not binary, assume numerical
  # (This might need further refinement depending on your domain knowledge)
  return "numerical"  # Consider a different label for non-categoric

In [7]:
def get_categorical_features(X):
  """
  This function identifies the indices of categorical features in a NumPy array representing a dataset.

  **Note:** This function assumes the data doesn't contain missing values.
          If your data might have missing values, you'll need to handle them
          before using this function (e.g., impute missing values or remove rows).

  Args:
      X: A 2D NumPy array representing the dataset (n_samples x n_features).

  Returns:
      A list of integers representing the indices of categorical features in X.
  """

  categorical_features = []
  for i, col in enumerate(X.T):  # Enumerate to get column index (i)
    unique_values = np.unique(col)
    num_unique_values = len(unique_values)
    data_type = col.dtype

    # Categorical data has a limited number of distinct values (adjust threshold as needed)
    if num_unique_values <= 10:  # Adjust threshold based on your data and analysis goals
      categorical_features.append(i)

  return categorical_features

In [8]:
from sklearn.preprocessing import OneHotEncoder

def create_dummy_variables(X, categorical_features):
    """
    This function creates dummy variables for categorical features in a dataset.

    Args:
        X: A 2D NumPy array representing the dataset (n_samples x n_features).
        categorical_features: A list of integers representing the indices of categorical features in X.

    Returns:
        A 2D NumPy array representing the dataset with dummy variables for categorical features.
    """
    if categorical_features == []:
        return X
    else:
        # Select categorical features from the data
        X_categorical = X[:, categorical_features]

        # Create one-hot encoder
        encoder = OneHotEncoder(sparse=False)  # Set sparse=False for dense output

        # Fit the encoder on the categorical features
        encoder.fit(X_categorical)

        # Transform the categorical features into dummy variables
        X_dummy_categorical = encoder.transform(X_categorical)

        # Get the original non-categorical features (assuming they are numerical)
        X_numerical = np.delete(X, categorical_features, axis=1)  # Delete categorical feature columns

        # Combine the dummy variables and numerical features
        X_with_dummies = np.concatenate([X_numerical, X_dummy_categorical], axis=1)

        return X_with_dummies


In [10]:
regressiondatasets = regression_dataset_names

**SQR**

In [10]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold
import matplotlib.pyplot as plt
import seaborn as sb
from pysr import PySRRegressor
from pmlb import fetch_data, regression_dataset_names

def pinball_loss(y_true, y_pred, tau=0.9):
    residuals = y_true - y_pred
    # Calculate loss for each pair of true and predicted values
    loss = np.where(residuals >= 0, tau * residuals, (1 - tau) * -residuals)
    # Compute the mean loss across all predictions
    return np.mean(loss)

test_losses = []
test_coverage = []
test_expressions = []

for regression_dataset in regressiondatasets:
    print(regression_dataset)
    X1, y = fetch_data(regression_dataset, return_X_y=True)
    print(X1)
    X = create_dummy_variables(X1, get_categorical_features(X1))
    print(X)
    kf = KFold(n_splits=5, shuffle=True)
    losses_fold = np.zeros(5)
    coverage_fold = np.zeros(5)
    expressions = ["", "", "", "", ""]
    for i, (train_index, test_index) in enumerate(kf.split(X)):
        train_X, test_X = X[train_index], X[test_index]
        print(train_X)
        train_y, test_y = y[train_index], y[test_index]

        modelq = PySRRegressor(
        niterations=100,  # < Increase me for better results
        binary_operators=["+", "*", "/", "-"],
        unary_operators=["exp", "sin", "cos", "log", "square"],
        complexity_of_operators={"+": 1, "-": 1, "*": 1,  "/": 2, "exp": 4, "sin": 3, "cos": 3, "log": 3, "square": 2},
        # ^ Define operator for SymPy as well
        elementwise_loss="pinball_loss(y_true, y_pred) = max.(0.1 * (y_true - y_pred), (0.1 - 1) * (y_true - y_pred))"

, 
        # ^ Custom loss function (julia syntax)
        temp_equation_file=True
)

        modelq.fit(train_X, train_y)

        y_pred_symbolic = modelq.predict(test_X)
        losses_fold[i] = pinball_loss(test_y, y_pred_symbolic)
        coverage_fold[i] = np.mean(y_pred_symbolic >= test_y)
        expressions[i] = modelq.sympy()

    test_losses.append(losses_fold)
    test_coverage.append(coverage_fold)
    test_expressions.append(expressions)

# Display the results

print(test_losses)
print(test_coverage)
print(test_expressions)


1027_ESL
[[6. 5. 6. 6.]
 [5. 4. 5. 5.]
 [5. 3. 4. 5.]
 ...
 [4. 4. 6. 8.]
 [8. 6. 6. 7.]
 [7. 6. 5. 5.]]
[[0. 0. 0. ... 1. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 1.]
 [0. 0. 0. ... 0. 1. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
[[0. 0. 0. ... 1. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 1. 0. 0.]
 [0. 0. 0. ... 0. 1. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
Compiling Julia backend...


[ Info: Started!



Expressions evaluated per second: 2.100e+05
Head worker occupation: 31.1%. This is high, and will prevent efficient resource usage. Increase `ncyclesperiteration` to reduce load on head worker.
Progress: 430 / 1500 total iterations (28.667%)
Hall of Fame:
---------------------------------------------------------------------------------------------------
Complexity  Loss       Score     Equation
1           2.357e-01  1.594e+01  y = 7.0002
3           2.051e-01  6.934e-02  y = (7.0002 - x₃₀)
5           1.683e-01  9.888e-02  y = ((x₃₂ - -6.0025) + x₈)
7           1.520e-01  5.109e-02  y = ((x₃₂ - (-6.0025 - x₂₅)) + x₈)
9           1.368e-01  5.277e-02  y = (((x₃₂ - x₂₂) + x₂₅) + (x₈ - -6.0084))
11          1.224e-01  5.555e-02  y = (((x₃₂ - (x₄ + x₂₂)) + (x₈ - -6.0084)) + x₂₅)
13          1.163e-01  2.552e-02  y = (((x₃₂ - ((x₄ + x₂₂) - x₁₈)) + (x₈ - -6.0084)) + x₂₅)
15          1.107e-01  2.469e-02  y = ((((((x₃₂ + (x₈ - -6.0187)) + x₁₈) - x₂₉) - x₃₀) + x₇) - x...
                    

[ Info: Started!


[[0. 0. 0. ... 1. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 1.]
 [0. 0. 0. ... 0. 1. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


[ Info: Started!


In [11]:
regression_dataset_namestry = regression_dataset_names

**LIGHTGBM**

In [12]:
from sklearn.model_selection import KFold
import lightgbm as lgb
from pmlb import fetch_data
import optuna
import pandas as pd
from datetime import datetime
import optuna

def pinball_loss(y_true, y_pred, tau=0.9):
    residuals = y_true - y_pred
    # Calculate loss for each pair of true and predicted values
    loss = np.where(residuals >= 0, tau * residuals, (1 - tau) * -residuals)
    # Compute the mean loss across all predictions
    return np.mean(loss)

def objective(trial, train_X, train_y, val_X, val_y):
    params = {
        'objective': 'quantile',
        'alpha': 0.9,
        'num_leaves': trial.suggest_int('num_leaves', 2, 100),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.5, log=True),
        'max_depth': trial.suggest_int('max_depth', 1, 20),
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
    }
    if params['min_child_samples'] >= params['num_leaves']:
        raise optuna.exceptions.TrialPruned()

    model = lgb.LGBMRegressor(**params)
    model.fit(train_X, train_y)
    y_pred = model.predict(val_X)
    return pinball_loss(val_y, y_pred, tau=0.9)

results = []

for regression_dataset in regression_dataset_names:  # Make sure this variable is properly initialized
    X1, y = fetch_data(regression_dataset, return_X_y=True)
    print(X1)
    X = create_dummy_variables(X1, get_categorical_features(X1))
    print(X)
    kf = KFold(n_splits=5, shuffle=True)
    dataset_results = []

    for train_index, val_index in kf.split(X):
        train_X, val_X = X[train_index], X[val_index]
        train_y, val_y = y[train_index], y[val_index]

        study = optuna.create_study(direction='minimize')
        study.optimize(lambda trial: objective(trial, train_X, train_y, val_X, val_y), n_trials=10)

        best_params = study.best_params
        model = lgb.LGBMRegressor(objective='quantile', alpha=0.9, **best_params)
        model.fit(train_X, train_y)
        y_pred = model.predict(val_X)
        loss = pinball_loss(val_y, y_pred, tau=0.9)
        coverage = np.mean(y_pred >= val_y)
        dataset_results.append({'Loss': loss, 'Coverage': coverage})

    results.append({'Dataset': regression_dataset, 'Results': dataset_results})


[I 2024-07-12 10:57:56,777] A new study created in memory with name: no-name-ac26ddfa-a5a0-4921-9414-c1a6d89e8735


[[6. 5. 6. 6.]
 [5. 4. 5. 5.]
 [5. 3. 4. 5.]
 ...
 [4. 4. 6. 8.]
 [8. 6. 6. 7.]
 [7. 6. 5. 5.]]
[[0. 0. 0. ... 1. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 1.]
 [0. 0. 0. ... 0. 1. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


[I 2024-07-12 10:57:57,195] Trial 0 finished with value: 0.08441432664063735 and parameters: {'num_leaves': 95, 'learning_rate': 0.08773027378854753, 'max_depth': 5, 'min_child_samples': 18}. Best is trial 0 with value: 0.08441432664063735.
[I 2024-07-12 10:57:57,210] Trial 1 finished with value: 0.19489795918367347 and parameters: {'num_leaves': 75, 'learning_rate': 0.45158840574616704, 'max_depth': 3, 'min_child_samples': 68}. Best is trial 0 with value: 0.08441432664063735.
[I 2024-07-12 10:57:57,211] Trial 2 pruned. 
[I 2024-07-12 10:57:57,213] Trial 3 pruned. 
[I 2024-07-12 10:57:57,214] Trial 4 pruned. 
[I 2024-07-12 10:57:57,216] Trial 5 pruned. 
[I 2024-07-12 10:57:57,237] Trial 6 finished with value: 0.1001839224424603 and parameters: {'num_leaves': 83, 'learning_rate': 0.1203315753690219, 'max_depth': 18, 'min_child_samples': 29}. Best is trial 0 with value: 0.08441432664063735.
[I 2024-07-12 10:57:57,259] Trial 7 finished with value: 0.10546893131510846 and parameters: {'num

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000064 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 42
[LightGBM] [Info] Number of data points in the train set: 390, number of used features: 21
[LightGBM] [Info] Start training from score 7.000000
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000022 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 18
[LightGBM] [Info] Number of data points in the train set: 390, number of used features: 9
[LightGBM] [Info] Start training from score 7.000000
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000009 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you ca

[I 2024-07-12 10:57:57,357] Trial 4 finished with value: 0.11858845664204198 and parameters: {'num_leaves': 49, 'learning_rate': 0.06972949486539867, 'max_depth': 20, 'min_child_samples': 35}. Best is trial 4 with value: 0.11858845664204198.
[I 2024-07-12 10:57:57,360] Trial 5 pruned. 
[I 2024-07-12 10:57:57,383] Trial 6 finished with value: 0.15437901247553998 and parameters: {'num_leaves': 90, 'learning_rate': 0.23826631218859273, 'max_depth': 3, 'min_child_samples': 66}. Best is trial 4 with value: 0.11858845664204198.
[I 2024-07-12 10:57:57,385] Trial 7 pruned. 
[I 2024-07-12 10:57:57,409] Trial 8 finished with value: 0.17127731635337323 and parameters: {'num_leaves': 52, 'learning_rate': 0.3055304213624483, 'max_depth': 12, 'min_child_samples': 44}. Best is trial 4 with value: 0.11858845664204198.
[I 2024-07-12 10:57:57,421] Trial 9 finished with value: 0.15584215532540585 and parameters: {'num_leaves': 61, 'learning_rate': 0.03350654652525427, 'max_depth': 2, 'min_child_samples':

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000018 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 18
[LightGBM] [Info] Number of data points in the train set: 390, number of used features: 9
[LightGBM] [Info] Start training from score 7.000000
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000022 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 32
[LightGBM] [Info] Number of data points in the train set: 390, number of used features: 16
[LightGBM] [Info] Start training from score 7.000000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000046 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 22
[Li

[I 2024-07-12 10:57:57,587] Trial 6 finished with value: 0.11496623792216192 and parameters: {'num_leaves': 74, 'learning_rate': 0.04712672938066656, 'max_depth': 17, 'min_child_samples': 44}. Best is trial 6 with value: 0.11496623792216192.
[I 2024-07-12 10:57:57,611] Trial 7 pruned. 
[I 2024-07-12 10:57:57,613] Trial 8 pruned. 
[I 2024-07-12 10:57:57,615] Trial 9 pruned. 
[I 2024-07-12 10:57:57,631] A new study created in memory with name: no-name-96c7e4c6-babf-4b2b-93d7-e984f1334ab4
[I 2024-07-12 10:57:57,634] Trial 0 pruned. 
[I 2024-07-12 10:57:57,636] Trial 1 pruned. 
[I 2024-07-12 10:57:57,637] Trial 2 pruned. 
[I 2024-07-12 10:57:57,638] Trial 3 pruned. 
[I 2024-07-12 10:57:57,642] Trial 4 pruned. 
[I 2024-07-12 10:57:57,645] Trial 5 pruned. 
[I 2024-07-12 10:57:57,647] Trial 6 pruned. 
[I 2024-07-12 10:57:57,678] Trial 7 finished with value: 0.1188072195603842 and parameters: {'num_leaves': 85, 'learning_rate': 0.15936538964598923, 'max_depth': 9, 'min_child_samples': 16}. Bes

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000012 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 32
[LightGBM] [Info] Number of data points in the train set: 391, number of used features: 16
[LightGBM] [Info] Start training from score 7.000000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000029 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 32
[LightGBM] [Info] Number of data points in the train set: 391, number of used features: 16
[LightGBM] [Info] Start training from score 7.000000
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000010 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 42
[L

[I 2024-07-12 10:57:59,709] A new study created in memory with name: no-name-617e8cac-e526-40b8-bd5c-aa04f2eca313
[I 2024-07-12 10:57:59,719] Trial 0 finished with value: 0.11299999999999998 and parameters: {'num_leaves': 86, 'learning_rate': 0.040584355082974054, 'max_depth': 12, 'min_child_samples': 7}. Best is trial 0 with value: 0.11299999999999998.
[I 2024-07-12 10:57:59,720] Trial 1 pruned. 
[I 2024-07-12 10:57:59,734] Trial 2 finished with value: 0.11299999999999998 and parameters: {'num_leaves': 55, 'learning_rate': 0.02282468308676364, 'max_depth': 13, 'min_child_samples': 19}. Best is trial 0 with value: 0.11299999999999998.
[I 2024-07-12 10:57:59,735] Trial 3 pruned. 
[I 2024-07-12 10:57:59,737] Trial 4 pruned. 
[I 2024-07-12 10:57:59,739] Trial 5 pruned. 
[I 2024-07-12 10:57:59,752] Trial 6 finished with value: 0.11299999999999998 and parameters: {'num_leaves': 42, 'learning_rate': 0.07581633738993415, 'max_depth': 13, 'min_child_samples': 30}. Best is trial 0 with value: 0

[[2. 1. 1. ... 2. 1. 1.]
 [1. 2. 3. ... 1. 3. 3.]
 [3. 3. 2. ... 3. 3. 3.]
 ...
 [1. 2. 1. ... 1. 3. 2.]
 [2. 3. 3. ... 2. 2. 3.]
 [2. 3. 3. ... 2. 3. 3.]]
[[0. 1. 0. ... 1. 0. 0.]
 [1. 0. 0. ... 0. 0. 1.]
 [0. 0. 1. ... 0. 0. 1.]
 ...
 [1. 0. 0. ... 0. 1. 0.]
 [0. 1. 0. ... 0. 0. 1.]
 [0. 1. 0. ... 0. 0. 1.]]
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000105 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 62
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 31
[LightGBM] [Info] Start training from score 5.000000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000137 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 62
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 31
[LightGBM] [Info] Start training from score 5.000000
[LightGBM] [

[I 2024-07-12 10:57:59,888] Trial 8 pruned. 
[I 2024-07-12 10:57:59,890] Trial 9 pruned. 
[I 2024-07-12 10:57:59,900] A new study created in memory with name: no-name-f97ced7f-3283-43ba-8c18-85bd5f0f5481
[I 2024-07-12 10:57:59,913] Trial 0 finished with value: 0.12499999999999999 and parameters: {'num_leaves': 86, 'learning_rate': 0.04158687563009428, 'max_depth': 6, 'min_child_samples': 48}. Best is trial 0 with value: 0.12499999999999999.
[I 2024-07-12 10:57:59,925] Trial 1 finished with value: 0.12499999999999999 and parameters: {'num_leaves': 64, 'learning_rate': 0.29998538943200975, 'max_depth': 15, 'min_child_samples': 26}. Best is trial 0 with value: 0.12499999999999999.
[I 2024-07-12 10:57:59,927] Trial 2 pruned. 
[I 2024-07-12 10:57:59,936] Trial 3 finished with value: 0.12499999999999999 and parameters: {'num_leaves': 83, 'learning_rate': 0.013901703624820246, 'max_depth': 18, 'min_child_samples': 37}. Best is trial 0 with value: 0.12499999999999999.
[I 2024-07-12 10:57:59,94

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000094 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 62
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 31
[LightGBM] [Info] Start training from score 5.000000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000090 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 62
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 31
[LightGBM] [Info] Start training from score 5.000000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000087 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 62
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 31
[LightGBM] [Info] Start training from s

[I 2024-07-12 10:58:02,109] A new study created in memory with name: no-name-2abf7528-317a-4c36-bac9-b3414c7b1e80
[I 2024-07-12 10:58:02,211] Trial 0 finished with value: 0.11059411324612203 and parameters: {'num_leaves': 61, 'learning_rate': 0.07557620649706313, 'max_depth': 15, 'min_child_samples': 19}. Best is trial 0 with value: 0.11059411324612203.
[I 2024-07-12 10:58:02,225] Trial 1 finished with value: 0.15049999999999997 and parameters: {'num_leaves': 84, 'learning_rate': 0.0635157732369555, 'max_depth': 1, 'min_child_samples': 69}. Best is trial 0 with value: 0.11059411324612203.
[I 2024-07-12 10:58:02,227] Trial 2 pruned. 
[I 2024-07-12 10:58:02,228] Trial 3 pruned. 
[I 2024-07-12 10:58:02,241] Trial 4 finished with value: 0.14782064888942797 and parameters: {'num_leaves': 84, 'learning_rate': 0.15760890237211211, 'max_depth': 2, 'min_child_samples': 58}. Best is trial 0 with value: 0.11059411324612203.


[[4. 2. 3. 0.]
 [3. 3. 0. 3.]
 [2. 4. 1. 0.]
 ...
 [0. 0. 1. 4.]
 [0. 2. 1. 3.]
 [2. 0. 3. 4.]]
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 1. 0.]
 [0. 0. 1. ... 0. 0. 0.]
 ...
 [1. 0. 0. ... 0. 0. 1.]
 [1. 0. 0. ... 0. 1. 0.]
 [0. 0. 1. ... 0. 0. 1.]]
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000226 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 40
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 20
[LightGBM] [Info] Start training from score 3.000000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000066 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 40
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 20
[LightGBM] [Info] Start training from score 3.000000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead o

[I 2024-07-12 10:58:02,315] Trial 5 finished with value: 0.11503802392576155 and parameters: {'num_leaves': 65, 'learning_rate': 0.22894897935344444, 'max_depth': 17, 'min_child_samples': 8}. Best is trial 0 with value: 0.11059411324612203.
[I 2024-07-12 10:58:02,316] Trial 6 pruned. 
[I 2024-07-12 10:58:02,351] Trial 7 finished with value: 0.10932885617769621 and parameters: {'num_leaves': 48, 'learning_rate': 0.03989158658411733, 'max_depth': 17, 'min_child_samples': 20}. Best is trial 7 with value: 0.10932885617769621.
[I 2024-07-12 10:58:02,381] Trial 8 finished with value: 0.10698676145460227 and parameters: {'num_leaves': 65, 'learning_rate': 0.04622883888656175, 'max_depth': 6, 'min_child_samples': 29}. Best is trial 8 with value: 0.10698676145460227.
[I 2024-07-12 10:58:02,411] Trial 9 finished with value: 0.10310420009231601 and parameters: {'num_leaves': 36, 'learning_rate': 0.060145854702349844, 'max_depth': 9, 'min_child_samples': 28}. Best is trial 9 with value: 0.10310420

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000053 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 40
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 20
[LightGBM] [Info] Start training from score 3.000000
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000084 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 40
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 20
[LightGBM] [Info] Start training from score 3.000000
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000022 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 40
[L

[I 2024-07-12 10:58:02,517] Trial 2 finished with value: 0.10117443055959788 and parameters: {'num_leaves': 84, 'learning_rate': 0.03218017329175967, 'max_depth': 4, 'min_child_samples': 49}. Best is trial 1 with value: 0.09791232389625418.
[I 2024-07-12 10:58:02,554] Trial 3 finished with value: 0.09275824525357468 and parameters: {'num_leaves': 89, 'learning_rate': 0.09996734654999892, 'max_depth': 19, 'min_child_samples': 33}. Best is trial 3 with value: 0.09275824525357468.
[I 2024-07-12 10:58:02,557] Trial 4 pruned. 
[I 2024-07-12 10:58:02,604] Trial 5 finished with value: 0.10558945325796792 and parameters: {'num_leaves': 100, 'learning_rate': 0.017108425424366466, 'max_depth': 19, 'min_child_samples': 15}. Best is trial 3 with value: 0.09275824525357468.
[I 2024-07-12 10:58:02,650] Trial 6 finished with value: 0.09043649364411376 and parameters: {'num_leaves': 71, 'learning_rate': 0.11481784638057274, 'max_depth': 19, 'min_child_samples': 13}. Best is trial 6 with value: 0.09043

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001192 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 40
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 20
[LightGBM] [Info] Start training from score 3.000000
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000025 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 40
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 20
[LightGBM] [Info] Start training from score 3.000000
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000015 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 40
[L

[I 2024-07-12 10:58:02,697] Trial 9 pruned. 
[I 2024-07-12 10:58:02,751] A new study created in memory with name: no-name-42265029-800e-46bc-921a-303ed83b2d90
[I 2024-07-12 10:58:02,753] Trial 0 pruned. 
[I 2024-07-12 10:58:02,754] Trial 1 pruned. 
[I 2024-07-12 10:58:02,767] Trial 2 finished with value: 0.16299999999999998 and parameters: {'num_leaves': 82, 'learning_rate': 0.02904854613591652, 'max_depth': 1, 'min_child_samples': 69}. Best is trial 2 with value: 0.16299999999999998.
[I 2024-07-12 10:58:02,802] Trial 3 finished with value: 0.12322679898885416 and parameters: {'num_leaves': 78, 'learning_rate': 0.04560180238981456, 'max_depth': 11, 'min_child_samples': 35}. Best is trial 3 with value: 0.12322679898885416.
[I 2024-07-12 10:58:02,839] Trial 4 finished with value: 0.11329573048189419 and parameters: {'num_leaves': 51, 'learning_rate': 0.32172910353239353, 'max_depth': 19, 'min_child_samples': 28}. Best is trial 4 with value: 0.11329573048189419.
[I 2024-07-12 10:58:02,842

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000015 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 40
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 20
[LightGBM] [Info] Start training from score 3.000000
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000032 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 40
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 20
[LightGBM] [Info] Start training from score 3.000000
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000015 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you c

[I 2024-07-12 10:58:02,917] Trial 8 finished with value: 0.10888346887035003 and parameters: {'num_leaves': 63, 'learning_rate': 0.42894860209574853, 'max_depth': 15, 'min_child_samples': 22}. Best is trial 8 with value: 0.10888346887035003.
[I 2024-07-12 10:58:02,919] Trial 9 pruned. 
[I 2024-07-12 10:58:02,959] A new study created in memory with name: no-name-a13e8d09-12fa-45e6-8a6b-98c1b499ef1f
[I 2024-07-12 10:58:03,069] Trial 0 finished with value: 0.1252160419063688 and parameters: {'num_leaves': 75, 'learning_rate': 0.013067098189135892, 'max_depth': 13, 'min_child_samples': 29}. Best is trial 0 with value: 0.1252160419063688.
[I 2024-07-12 10:58:03,093] Trial 1 finished with value: 0.11268036168349899 and parameters: {'num_leaves': 97, 'learning_rate': 0.20544541807644923, 'max_depth': 11, 'min_child_samples': 93}. Best is trial 1 with value: 0.11268036168349899.
[I 2024-07-12 10:58:03,094] Trial 2 pruned. 
[I 2024-07-12 10:58:03,126] Trial 3 finished with value: 0.118146452914

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000103 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 40
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 20
[LightGBM] [Info] Start training from score 3.000000
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000016 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 40
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 20
[LightGBM] [Info] Start training from score 3.000000
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000017 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 40
[L

[I 2024-07-12 10:58:03,128] Trial 4 pruned. 
[I 2024-07-12 10:58:03,130] Trial 5 pruned. 
[I 2024-07-12 10:58:03,164] Trial 6 finished with value: 0.12026455883794795 and parameters: {'num_leaves': 84, 'learning_rate': 0.06437762397938873, 'max_depth': 20, 'min_child_samples': 76}. Best is trial 1 with value: 0.11268036168349899.
[I 2024-07-12 10:58:03,191] Trial 7 finished with value: 0.12301135441279279 and parameters: {'num_leaves': 64, 'learning_rate': 0.020779435540480493, 'max_depth': 5, 'min_child_samples': 38}. Best is trial 1 with value: 0.11268036168349899.
[I 2024-07-12 10:58:03,217] Trial 8 finished with value: 0.11432188342597184 and parameters: {'num_leaves': 88, 'learning_rate': 0.055810223370709386, 'max_depth': 5, 'min_child_samples': 43}. Best is trial 1 with value: 0.11268036168349899.
[I 2024-07-12 10:58:03,218] Trial 9 pruned. 
[I 2024-07-12 10:58:03,249] A new study created in memory with name: no-name-9b341e2e-eb92-4dd7-8fea-e61ab042056b
[I 2024-07-12 10:58:03,27

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000017 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 40
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 20
[LightGBM] [Info] Start training from score 3.000000
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000020 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 40
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 20
[LightGBM] [Info] Start training from score 3.000000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000054 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 40
[L

[I 2024-07-12 10:58:03,324] Trial 2 pruned. 
[I 2024-07-12 10:58:03,326] Trial 3 pruned. 
[I 2024-07-12 10:58:03,346] Trial 4 finished with value: 0.12431128217793425 and parameters: {'num_leaves': 86, 'learning_rate': 0.016572152672330573, 'max_depth': 6, 'min_child_samples': 37}. Best is trial 0 with value: 0.11283660712168554.
[I 2024-07-12 10:58:03,403] Trial 5 finished with value: 0.1086832695718939 and parameters: {'num_leaves': 77, 'learning_rate': 0.18320780482598206, 'max_depth': 16, 'min_child_samples': 9}. Best is trial 5 with value: 0.1086832695718939.
[I 2024-07-12 10:58:03,429] Trial 6 finished with value: 0.1102226059756989 and parameters: {'num_leaves': 95, 'learning_rate': 0.09850274110811492, 'max_depth': 19, 'min_child_samples': 27}. Best is trial 5 with value: 0.1086832695718939.
[I 2024-07-12 10:58:03,430] Trial 7 pruned. 
[I 2024-07-12 10:58:03,496] Trial 8 finished with value: 0.10514076577023106 and parameters: {'num_leaves': 57, 'learning_rate': 0.0416673759144

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000012 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 40
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 20
[LightGBM] [Info] Start training from score 3.000000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000040 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 40
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 20
[LightGBM] [Info] Start training from score 3.000000
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000015 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 40
[L

[I 2024-07-12 10:58:03,546] Trial 9 finished with value: 0.11756476539354235 and parameters: {'num_leaves': 62, 'learning_rate': 0.15062531882220465, 'max_depth': 18, 'min_child_samples': 10}. Best is trial 8 with value: 0.10514076577023106.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000014 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 40
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 20
[LightGBM] [Info] Start training from score 3.000000


[I 2024-07-12 10:58:05,699] A new study created in memory with name: no-name-c3d577b8-4b54-409d-92ef-80496b03fef1
[I 2024-07-12 10:58:05,720] Trial 0 finished with value: 0.3001884678893835 and parameters: {'num_leaves': 96, 'learning_rate': 0.01900767207269195, 'max_depth': 4, 'min_child_samples': 14}. Best is trial 0 with value: 0.3001884678893835.
[I 2024-07-12 10:58:05,747] Trial 1 finished with value: 0.31612949417930225 and parameters: {'num_leaves': 67, 'learning_rate': 0.05378374853082665, 'max_depth': 5, 'min_child_samples': 53}. Best is trial 0 with value: 0.3001884678893835.
[I 2024-07-12 10:58:05,778] Trial 2 finished with value: 0.32550273015608866 and parameters: {'num_leaves': 70, 'learning_rate': 0.0671435719608114, 'max_depth': 8, 'min_child_samples': 31}. Best is trial 0 with value: 0.3001884678893835.
[I 2024-07-12 10:58:05,780] Trial 3 pruned. 
[I 2024-07-12 10:58:05,782] Trial 4 pruned. 
[I 2024-07-12 10:58:05,806] Trial 5 finished with value: 0.3046963272798117 an

[[13.  6. 12.  2.]
 [ 2.  7.  9.  2.]
 [12.  8.  7.  6.]
 ...
 [ 1.  2. 12.  4.]
 [ 1.  2. 12.  6.]
 [10.  3.  6. 14.]]
[[13.  6. 12.  2.]
 [ 2.  7.  9.  2.]
 [12.  8.  7.  6.]
 ...
 [ 1.  2. 12.  4.]
 [ 1.  2. 12.  6.]
 [10.  3.  6. 14.]]
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000013 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 55
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 4
[LightGBM] [Info] Start training from score 7.000000
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000013 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 55
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 4
[LightGBM] 

[I 2024-07-12 10:58:05,900] Trial 1 pruned. 
[I 2024-07-12 10:58:05,902] Trial 2 pruned. 
[I 2024-07-12 10:58:05,903] Trial 3 pruned. 
[I 2024-07-12 10:58:05,904] Trial 4 pruned. 
[I 2024-07-12 10:58:05,907] Trial 5 pruned. 
[I 2024-07-12 10:58:05,932] Trial 6 finished with value: 0.3069303006218319 and parameters: {'num_leaves': 91, 'learning_rate': 0.017110580909530132, 'max_depth': 6, 'min_child_samples': 74}. Best is trial 6 with value: 0.3069303006218319.
[I 2024-07-12 10:58:06,001] Trial 7 finished with value: 0.3127530856285637 and parameters: {'num_leaves': 42, 'learning_rate': 0.019465642871928666, 'max_depth': 15, 'min_child_samples': 9}. Best is trial 6 with value: 0.3069303006218319.
[I 2024-07-12 10:58:06,031] Trial 8 finished with value: 0.32554231885996177 and parameters: {'num_leaves': 85, 'learning_rate': 0.22969954312992902, 'max_depth': 16, 'min_child_samples': 31}. Best is trial 6 with value: 0.3069303006218319.
[I 2024-07-12 10:58:06,032] Trial 9 pruned. 
[I 2024-0

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000046 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 55
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 4
[LightGBM] [Info] Start training from score 7.000000
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000012 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 55
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 4
[LightGBM] [Info] Start training from score 7.000000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000412 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 55
[Lig

[I 2024-07-12 10:58:06,140] Trial 2 finished with value: 0.2906384011623721 and parameters: {'num_leaves': 48, 'learning_rate': 0.18220969819213154, 'max_depth': 20, 'min_child_samples': 29}. Best is trial 1 with value: 0.2821471018024176.
[I 2024-07-12 10:58:06,234] Trial 3 finished with value: 0.29127283698502543 and parameters: {'num_leaves': 84, 'learning_rate': 0.044924824325650724, 'max_depth': 11, 'min_child_samples': 7}. Best is trial 1 with value: 0.2821471018024176.
[I 2024-07-12 10:58:06,236] Trial 4 pruned. 
[I 2024-07-12 10:58:06,238] Trial 5 pruned. 
[I 2024-07-12 10:58:06,270] Trial 6 finished with value: 0.2931735842570178 and parameters: {'num_leaves': 35, 'learning_rate': 0.10656864091397039, 'max_depth': 20, 'min_child_samples': 25}. Best is trial 1 with value: 0.2821471018024176.
[I 2024-07-12 10:58:06,271] Trial 7 pruned. 
[I 2024-07-12 10:58:06,273] Trial 8 pruned. 
[I 2024-07-12 10:58:06,274] Trial 9 pruned. 


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000039 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 55
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 4
[LightGBM] [Info] Start training from score 7.000000
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000011 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 55
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 4
[LightGBM] [Info] Start training from score 7.000000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000115 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 55
[LightGBM] [Info] Number of data points in the train set: 800, numbe

[I 2024-07-12 10:58:06,395] A new study created in memory with name: no-name-9d7d0d26-0950-4cdb-942b-b46142242d2a
[I 2024-07-12 10:58:06,432] Trial 0 finished with value: 0.3015913285801797 and parameters: {'num_leaves': 97, 'learning_rate': 0.26375842368036123, 'max_depth': 6, 'min_child_samples': 33}. Best is trial 0 with value: 0.3015913285801797.
[I 2024-07-12 10:58:06,433] Trial 1 pruned. 
[I 2024-07-12 10:58:06,434] Trial 2 pruned. 
[I 2024-07-12 10:58:06,436] Trial 3 pruned. 
[I 2024-07-12 10:58:06,437] Trial 4 pruned. 
[I 2024-07-12 10:58:06,439] Trial 5 pruned. 
[I 2024-07-12 10:58:06,463] Trial 6 finished with value: 0.2830866987314773 and parameters: {'num_leaves': 92, 'learning_rate': 0.042855041745402396, 'max_depth': 4, 'min_child_samples': 6}. Best is trial 6 with value: 0.2830866987314773.
[I 2024-07-12 10:58:06,465] Trial 7 pruned. 
[I 2024-07-12 10:58:06,466] Trial 8 pruned. 
[I 2024-07-12 10:58:06,467] Trial 9 pruned. 
[I 2024-07-12 10:58:06,492] A new study created 

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000060 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 55
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 4
[LightGBM] [Info] Start training from score 7.000000
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000011 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 55
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 4
[LightGBM] [Info] Start training from score 7.000000
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000011 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 55
[Lig

[I 2024-07-12 10:58:06,560] Trial 7 pruned. 
[I 2024-07-12 10:58:06,580] Trial 8 finished with value: 0.2838028870141133 and parameters: {'num_leaves': 57, 'learning_rate': 0.02967046982089, 'max_depth': 4, 'min_child_samples': 31}. Best is trial 4 with value: 0.2754973574041286.
[I 2024-07-12 10:58:06,581] Trial 9 pruned. 


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000047 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 55
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 4
[LightGBM] [Info] Start training from score 7.000000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000038 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 55
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 4
[LightGBM] [Info] Start training from score 7.000000


[I 2024-07-12 10:58:08,687] A new study created in memory with name: no-name-c5256c60-1cfa-4870-a6d6-9e5c12e45d2e
[I 2024-07-12 10:58:08,695] Trial 0 finished with value: 6.81 and parameters: {'num_leaves': 51, 'learning_rate': 0.07544912418976756, 'max_depth': 6, 'min_child_samples': 39}. Best is trial 0 with value: 6.81.
[I 2024-07-12 10:58:08,696] Trial 1 pruned. 
[I 2024-07-12 10:58:08,708] Trial 2 finished with value: 3.930215932643688 and parameters: {'num_leaves': 32, 'learning_rate': 0.07839249639418297, 'max_depth': 2, 'min_child_samples': 10}. Best is trial 2 with value: 3.930215932643688.
[I 2024-07-12 10:58:08,711] Trial 3 pruned. 
[I 2024-07-12 10:58:08,719] Trial 4 finished with value: 6.81 and parameters: {'num_leaves': 98, 'learning_rate': 0.210557151976235, 'max_depth': 11, 'min_child_samples': 42}. Best is trial 2 with value: 3.930215932643688.
[I 2024-07-12 10:58:08,728] Trial 5 finished with value: 6.81 and parameters: {'num_leaves': 63, 'learning_rate': 0.027850384

[[7.90999985e+01 1.51000000e+02 1.00000000e+00 9.10000000e+01
  5.80000000e+01 5.60000000e+01 5.10000000e+02 9.50000000e+02
  3.30000000e+01 3.01000000e+02 1.08000000e+02 4.10000000e+01
  3.94000000e+02]
 [1.63500000e+02 1.43000000e+02 0.00000000e+00 1.13000000e+02
  1.03000000e+02 9.50000000e+01 5.83000000e+02 1.01200000e+03
  1.30000000e+01 1.02000000e+02 9.60000000e+01 3.60000000e+01
  5.57000000e+02]
 [5.77999992e+01 1.42000000e+02 1.00000000e+00 8.90000000e+01
  4.50000000e+01 4.40000000e+01 5.33000000e+02 9.69000000e+02
  1.80000000e+01 2.19000000e+02 9.40000000e+01 3.30000000e+01
  3.18000000e+02]
 [1.96899994e+02 1.36000000e+02 0.00000000e+00 1.21000000e+02
  1.49000000e+02 1.41000000e+02 5.77000000e+02 9.94000000e+02
  1.57000000e+02 8.00000000e+01 1.02000000e+02 3.90000000e+01
  6.73000000e+02]
 [1.23400002e+02 1.41000000e+02 0.00000000e+00 1.21000000e+02
  1.09000000e+02 1.01000000e+02 5.91000000e+02 9.85000000e+02
  1.80000000e+01 3.00000000e+01 9.10000000e+01 2.00000000e+0

[I 2024-07-12 10:58:08,880] Trial 3 finished with value: 7.14111107720269 and parameters: {'num_leaves': 40, 'learning_rate': 0.06124854679640018, 'max_depth': 17, 'min_child_samples': 37}. Best is trial 1 with value: 2.5609643042544934.
[I 2024-07-12 10:58:08,896] Trial 4 finished with value: 7.101247768202949 and parameters: {'num_leaves': 45, 'learning_rate': 0.3726646760802079, 'max_depth': 5, 'min_child_samples': 7}. Best is trial 1 with value: 2.5609643042544934.
[I 2024-07-12 10:58:08,902] Trial 5 finished with value: 7.14111107720269 and parameters: {'num_leaves': 75, 'learning_rate': 0.07151551125122686, 'max_depth': 15, 'min_child_samples': 59}. Best is trial 1 with value: 2.5609643042544934.
[I 2024-07-12 10:58:08,915] Trial 6 finished with value: 6.641539489278191 and parameters: {'num_leaves': 44, 'learning_rate': 0.1500808663828537, 'max_depth': 14, 'min_child_samples': 19}. Best is trial 1 with value: 2.5609643042544934.
[I 2024-07-12 10:58:08,922] Trial 7 finished with 

[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 38, number of used features: 0
[LightGBM] [Info] Start training from score 250.300003
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000029 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 163
[LightGBM] [Info] Number of data points in the train set: 38, number of used features: 14
[LightGBM] [Info] Start training from score 250.300003
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 38, number of used features: 0
[LightGBM] [Info] Start training from score 250.300003
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000025 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 26
[LightGBM] [Info] Number of data points in the train set: 38, number of used features: 2
[LightGBM] [Info] Start tra

[I 2024-07-12 10:58:09,065] A new study created in memory with name: no-name-84aa6f34-1ae8-4b62-a77e-2124d3766f7e
[I 2024-07-12 10:58:09,067] Trial 0 pruned. 
[I 2024-07-12 10:58:09,068] Trial 1 pruned. 
[I 2024-07-12 10:58:09,069] Trial 2 pruned. 
[I 2024-07-12 10:58:09,077] Trial 3 finished with value: 7.301110500759547 and parameters: {'num_leaves': 98, 'learning_rate': 0.011776851023867873, 'max_depth': 1, 'min_child_samples': 21}. Best is trial 3 with value: 7.301110500759547.
[I 2024-07-12 10:58:09,078] Trial 4 pruned. 
[I 2024-07-12 10:58:09,080] Trial 5 pruned. 
[I 2024-07-12 10:58:09,082] Trial 6 pruned. 
[I 2024-07-12 10:58:09,083] Trial 7 pruned. 
[I 2024-07-12 10:58:09,084] Trial 8 pruned. 
[I 2024-07-12 10:58:09,092] Trial 9 finished with value: 7.301110500759547 and parameters: {'num_leaves': 79, 'learning_rate': 0.016338186496671212, 'max_depth': 20, 'min_child_samples': 73}. Best is trial 3 with value: 7.301110500759547.


[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 38, number of used features: 0
[LightGBM] [Info] Start training from score 251.199997
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 38, number of used features: 0
[LightGBM] [Info] Start training from score 251.899994
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 38, number of used features: 0
[LightGBM] [Info] Start training from score 251.899994
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 38, number of used features: 0
[LightGBM] [Info] Start training from score 251.899994


[I 2024-07-12 10:58:11,158] A new study created in memory with name: no-name-406bf3a1-2f94-4520-a662-c26e230948fd
[I 2024-07-12 10:58:11,165] Trial 0 finished with value: 0.827400016784668 and parameters: {'num_leaves': 24, 'learning_rate': 0.020179577428739007, 'max_depth': 10, 'min_child_samples': 20}. Best is trial 0 with value: 0.827400016784668.
[I 2024-07-12 10:58:11,166] Trial 1 pruned. 
[I 2024-07-12 10:58:11,167] Trial 2 pruned. 
[I 2024-07-12 10:58:11,171] Trial 3 finished with value: 0.827400016784668 and parameters: {'num_leaves': 83, 'learning_rate': 0.011999674314079004, 'max_depth': 11, 'min_child_samples': 80}. Best is trial 0 with value: 0.827400016784668.
[I 2024-07-12 10:58:11,173] Trial 4 pruned. 
[I 2024-07-12 10:58:11,180] Trial 5 finished with value: 0.827400016784668 and parameters: {'num_leaves': 86, 'learning_rate': 0.042597745925300606, 'max_depth': 1, 'min_child_samples': 65}. Best is trial 0 with value: 0.827400016784668.
[I 2024-07-12 10:58:11,181] Trial 6

[[ 0.         64.47000122 83.         57.5       ]
 [ 0.         59.20000076 78.90000153 49.70000076]
 [ 0.         58.77000046 75.40000153 51.40000153]
 [ 0.         55.93999863 70.19999695 50.79999924]
 [ 0.         75.54000092 93.30000305 70.        ]
 [ 0.         63.00999832 79.30000305 55.        ]
 [ 0.         57.65000153 70.95999908 52.34999847]
 [ 0.         54.90999985 71.59999847 47.79999924]
 [ 0.         54.00999832 70.30000305 45.70000076]
 [ 0.         58.93999863 75.5        51.70000076]
 [ 0.         51.33000183 63.5        46.        ]
 [ 1.         55.86000061 69.5        50.40000153]
 [ 0.         71.08999634 91.19999695 64.40000153]
 [ 0.         53.68999863 64.90000153 49.59999847]
 [ 0.         72.33000183 90.5        64.09999847]
 [ 1.         59.04999924 73.09999847 54.        ]
 [ 0.         66.52999878 92.69999695 54.90000153]
 [ 0.         53.95000076 67.09999847 49.70000076]
 [ 1.         54.70999908 70.19999695 47.90000153]
 [ 1.         67.16000366 86.90

[I 2024-07-12 10:58:11,349] Trial 0 pruned. 
[I 2024-07-12 10:58:11,351] Trial 1 pruned. 
[I 2024-07-12 10:58:11,352] Trial 2 pruned. 
[I 2024-07-12 10:58:11,368] Trial 3 finished with value: 0.4837836286790392 and parameters: {'num_leaves': 20, 'learning_rate': 0.06266398071656328, 'max_depth': 6, 'min_child_samples': 6}. Best is trial 3 with value: 0.4837836286790392.
[I 2024-07-12 10:58:11,369] Trial 4 pruned. 
[I 2024-07-12 10:58:11,375] Trial 5 finished with value: 0.7079999160766595 and parameters: {'num_leaves': 99, 'learning_rate': 0.022410695894264695, 'max_depth': 6, 'min_child_samples': 86}. Best is trial 3 with value: 0.4837836286790392.
[I 2024-07-12 10:58:11,377] Trial 6 pruned. 
[I 2024-07-12 10:58:11,379] Trial 7 pruned. 
[I 2024-07-12 10:58:11,436] Trial 8 finished with value: 0.7079999160766595 and parameters: {'num_leaves': 47, 'learning_rate': 0.015320734020920532, 'max_depth': 19, 'min_child_samples': 25}. Best is trial 3 with value: 0.4837836286790392.
[I 2024-07-

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000048 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 48
[LightGBM] [Info] Number of data points in the train set: 40, number of used features: 5
[LightGBM] [Info] Start training from score 50.090000
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 40, number of used features: 0
[LightGBM] [Info] Start training from score 50.090000
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 40, number of used features: 0
[LightGBM] [Info] Start training from score 50.090000
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 40, number of used features: 0
[LightGBM] [Info] Start training from score 50.090000
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of tes

[I 2024-07-12 10:58:27,622] A new study created in memory with name: no-name-adc20373-d475-43ba-b7b9-c6f34bd8a57a
[I 2024-07-12 10:58:27,626] Trial 0 pruned. 
[I 2024-07-12 10:58:27,627] Trial 1 pruned. 
[I 2024-07-12 10:58:27,629] Trial 2 pruned. 


[[ 31.75653076   0.72064298 204.77520752 ...   0.           1.
    0.        ]
 [ 44.62687302  12.94769669 324.13531494 ...   0.           0.
    0.        ]
 [ 56.92020798  12.80589867 314.97396851 ...   0.           0.
    0.        ]
 ...
 [ 33.83095169  13.90605545 312.3135376  ...   1.           0.
    0.        ]
 [ 70.52314758  10.12849712 212.76675415 ...   0.           0.
    0.        ]
 [ 62.18841171   9.89219093 404.37527466 ...   0.           0.
    0.        ]]
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.027359 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2588
[LightGBM] [Info] Number of data points in the train set: 800000, number of used features: 29
[LightGBM] [Info] Start training from score 3625.664551


[I 2024-07-12 10:58:31,867] Trial 3 finished with value: 153.77863220411885 and parameters: {'num_leaves': 75, 'learning_rate': 0.14014377712083012, 'max_depth': 12, 'min_child_samples': 53}. Best is trial 3 with value: 153.77863220411885.
[I 2024-07-12 10:58:31,869] Trial 4 pruned. 
[I 2024-07-12 10:58:31,871] Trial 5 pruned. 
[I 2024-07-12 10:58:31,873] Trial 6 pruned. 


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.028338 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2588
[LightGBM] [Info] Number of data points in the train set: 800000, number of used features: 29
[LightGBM] [Info] Start training from score 3625.664551


[I 2024-07-12 10:58:34,061] Trial 7 finished with value: 181.9455522038287 and parameters: {'num_leaves': 100, 'learning_rate': 0.0173391991866547, 'max_depth': 2, 'min_child_samples': 49}. Best is trial 3 with value: 153.77863220411885.
[I 2024-07-12 10:58:34,062] Trial 8 pruned. 


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.029360 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2588
[LightGBM] [Info] Number of data points in the train set: 800000, number of used features: 29
[LightGBM] [Info] Start training from score 3625.664551


[I 2024-07-12 10:58:39,601] Trial 9 finished with value: 159.15306731982847 and parameters: {'num_leaves': 64, 'learning_rate': 0.034950184340372954, 'max_depth': 10, 'min_child_samples': 35}. Best is trial 3 with value: 153.77863220411885.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.026402 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2588
[LightGBM] [Info] Number of data points in the train set: 800000, number of used features: 29
[LightGBM] [Info] Start training from score 3625.664551


[I 2024-07-12 10:58:44,142] A new study created in memory with name: no-name-9a1c779a-12a1-4f90-94c9-7e09fd488cc2


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.023912 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2588
[LightGBM] [Info] Number of data points in the train set: 800000, number of used features: 29
[LightGBM] [Info] Start training from score 3625.723877


[I 2024-07-12 10:58:46,787] Trial 0 finished with value: 165.20397525690208 and parameters: {'num_leaves': 53, 'learning_rate': 0.08160908150066266, 'max_depth': 3, 'min_child_samples': 41}. Best is trial 0 with value: 165.20397525690208.
[I 2024-07-12 10:58:46,788] Trial 1 pruned. 


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.028871 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2588
[LightGBM] [Info] Number of data points in the train set: 800000, number of used features: 29
[LightGBM] [Info] Start training from score 3625.723877


[I 2024-07-12 10:58:49,903] Trial 2 finished with value: 166.44499923360084 and parameters: {'num_leaves': 94, 'learning_rate': 0.03513773745534651, 'max_depth': 4, 'min_child_samples': 20}. Best is trial 0 with value: 165.20397525690208.
[I 2024-07-12 10:58:49,905] Trial 3 pruned. 


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.029859 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2588
[LightGBM] [Info] Number of data points in the train set: 800000, number of used features: 29
[LightGBM] [Info] Start training from score 3625.723877


[I 2024-07-12 10:58:52,503] Trial 4 finished with value: 174.47625814864568 and parameters: {'num_leaves': 94, 'learning_rate': 0.11161629863872309, 'max_depth': 1, 'min_child_samples': 54}. Best is trial 0 with value: 165.20397525690208.
[I 2024-07-12 10:58:52,505] Trial 5 pruned. 
[I 2024-07-12 10:58:52,507] Trial 6 pruned. 
[I 2024-07-12 10:58:52,508] Trial 7 pruned. 


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.030217 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2588
[LightGBM] [Info] Number of data points in the train set: 800000, number of used features: 29
[LightGBM] [Info] Start training from score 3625.723877


[I 2024-07-12 10:58:58,394] Trial 8 finished with value: 176.0325515620538 and parameters: {'num_leaves': 86, 'learning_rate': 0.010748754030434212, 'max_depth': 13, 'min_child_samples': 37}. Best is trial 0 with value: 165.20397525690208.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.027374 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2588
[LightGBM] [Info] Number of data points in the train set: 800000, number of used features: 29
[LightGBM] [Info] Start training from score 3625.723877


[I 2024-07-12 10:59:02,785] Trial 9 finished with value: 165.87984734400365 and parameters: {'num_leaves': 27, 'learning_rate': 0.02563621907534507, 'max_depth': 19, 'min_child_samples': 26}. Best is trial 0 with value: 165.20397525690208.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.031645 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2588
[LightGBM] [Info] Number of data points in the train set: 800000, number of used features: 29
[LightGBM] [Info] Start training from score 3625.723877


[I 2024-07-12 10:59:05,801] A new study created in memory with name: no-name-8f72065e-b706-4547-9974-cbd18ef9878b
[I 2024-07-12 10:59:05,803] Trial 0 pruned. 
[I 2024-07-12 10:59:05,805] Trial 1 pruned. 


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.030865 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2588
[LightGBM] [Info] Number of data points in the train set: 800000, number of used features: 29
[LightGBM] [Info] Start training from score 3625.940430


[I 2024-07-12 10:59:11,345] Trial 2 finished with value: 154.25785219812687 and parameters: {'num_leaves': 98, 'learning_rate': 0.15569084298939956, 'max_depth': 8, 'min_child_samples': 85}. Best is trial 2 with value: 154.25785219812687.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.036096 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2588
[LightGBM] [Info] Number of data points in the train set: 800000, number of used features: 29
[LightGBM] [Info] Start training from score 3625.940430


[I 2024-07-12 10:59:17,423] Trial 3 finished with value: 177.68538933583233 and parameters: {'num_leaves': 53, 'learning_rate': 0.010695845956088237, 'max_depth': 14, 'min_child_samples': 45}. Best is trial 2 with value: 154.25785219812687.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.029950 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2588
[LightGBM] [Info] Number of data points in the train set: 800000, number of used features: 29
[LightGBM] [Info] Start training from score 3625.940430


[I 2024-07-12 10:59:19,798] Trial 4 finished with value: 193.13668142635177 and parameters: {'num_leaves': 62, 'learning_rate': 0.016509730029166617, 'max_depth': 1, 'min_child_samples': 40}. Best is trial 2 with value: 154.25785219812687.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.029347 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2588
[LightGBM] [Info] Number of data points in the train set: 800000, number of used features: 29
[LightGBM] [Info] Start training from score 3625.940430


[I 2024-07-12 10:59:24,691] Trial 5 finished with value: 153.79428389282066 and parameters: {'num_leaves': 96, 'learning_rate': 0.22657439276150515, 'max_depth': 14, 'min_child_samples': 88}. Best is trial 5 with value: 153.79428389282066.
[I 2024-07-12 10:59:24,694] Trial 6 pruned. 
[I 2024-07-12 10:59:24,721] Trial 7 pruned. 
[I 2024-07-12 10:59:24,724] Trial 8 pruned. 


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.033746 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2588
[LightGBM] [Info] Number of data points in the train set: 800000, number of used features: 29
[LightGBM] [Info] Start training from score 3625.940430


[W 2024-07-12 10:59:27,313] Trial 9 failed with parameters: {'num_leaves': 88, 'learning_rate': 0.12051379970670703, 'max_depth': 9, 'min_child_samples': 45} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "c:\Users\k100814\Anaconda\Lib\site-packages\optuna\study\_optimize.py", line 196, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "C:\Users\k100814\AppData\Local\Temp\ipykernel_85672\4043318971.py", line 49, in <lambda>
    study.optimize(lambda trial: objective(trial, train_X, train_y, val_X, val_y), n_trials=10)
                                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\k100814\AppData\Local\Temp\ipykernel_85672\4043318971.py", line 30, in objective
    model.fit(train_X, train_y)
  File "c:\Users\k100814\Anaconda\Lib\site-packages\lightgbm\sklearn.py", line 1092, in fit
    super().fit(
  File "c:\Users\k100814\Anaconda\Lib\site-packages\lightgbm\sklearn

KeyboardInterrupt: 

**QDT**

In [13]:
from sklearn.tree import DecisionTreeRegressor
import optuna
from pmlb import fetch_data, regression_dataset_names
from sklearn.model_selection import KFold
import pandas as pd

class QuantileDecisionTreeRegressor:
    def __init__(self, quantile=0.9, min_samples_leaf=5):
        self.quantile = quantile
        self.min_samples_leaf = min_samples_leaf
        self.tree = DecisionTreeRegressor(min_samples_leaf=min_samples_leaf)

    def fit(self, X, y):
        self.tree.fit(X, y)
        self._add_quantile_info(X, y)

    def _add_quantile_info(self, X, y):
        leaf_indices = self.tree.apply(X)
        unique_leaves = np.unique(leaf_indices)
        self.quantile_values = {}
        for leaf in unique_leaves:
            leaf_y = y[leaf_indices == leaf]
            self.quantile_values[leaf] = np.percentile(leaf_y, self.quantile * 100)

    def predict(self, X):
        leaf_indices = self.tree.apply(X)
        predictions = np.array([self.quantile_values[leaf] for leaf in leaf_indices])
        return predictions

def pinball_loss(y_true, y_pred, tau=0.9):
    residuals = y_true - y_pred
    loss = np.where(residuals >= 0, tau * residuals, (1 - tau) * -residuals)
    return np.mean(loss)

def objective(trial, train_X, train_y, val_X, val_y):
    quantile = 0.9
    min_samples_leaf = trial.suggest_int('min_samples_leaf', 5, 100)

    model = QuantileDecisionTreeRegressor(quantile=quantile, min_samples_leaf=min_samples_leaf)
    model.fit(train_X, train_y)
    y_pred = model.predict(val_X)

    return pinball_loss(val_y, y_pred, tau=quantile)

# Example dataset usage
regression_dataset_tryout = regression_dataset_names
results = []

for regression_dataset in regression_dataset_tryout:
    X, y = fetch_data(regression_dataset, return_X_y=True)
    kf = KFold(n_splits=5, shuffle=True)
    dataset_results = []

    for train_index, val_index in kf.split(X):
        train_X, val_X = X[train_index], X[val_index]
        train_y, val_y = y[train_index], y[val_index]

        study = optuna.create_study(direction='minimize')
        study.optimize(lambda trial: objective(trial, train_X, train_y, val_X, val_y), n_trials=10)

        best_params = study.best_params
        best_model = QuantileDecisionTreeRegressor(**best_params)
        best_model.fit(train_X, train_y)
        y_pred = best_model.predict(val_X)

        loss = pinball_loss(val_y, y_pred, tau=best_model.quantile)
        coverage = np.mean(y_pred >= val_y)

        dataset_results.append({'Loss': loss, 'Coverage': coverage})

    results.append({'Dataset': regression_dataset, 'Results': dataset_results})


[I 2024-07-12 10:59:43,417] A new study created in memory with name: no-name-ba6d8598-040a-413f-a4d6-527da263cf07
[I 2024-07-12 10:59:43,421] Trial 0 finished with value: 0.14795918367346936 and parameters: {'min_samples_leaf': 58}. Best is trial 0 with value: 0.14795918367346936.
[I 2024-07-12 10:59:43,424] Trial 1 finished with value: 0.2071428571428571 and parameters: {'min_samples_leaf': 99}. Best is trial 0 with value: 0.14795918367346936.
[I 2024-07-12 10:59:43,427] Trial 2 finished with value: 0.1259183673469388 and parameters: {'min_samples_leaf': 27}. Best is trial 2 with value: 0.1259183673469388.
[I 2024-07-12 10:59:43,430] Trial 3 finished with value: 0.14489795918367343 and parameters: {'min_samples_leaf': 74}. Best is trial 2 with value: 0.1259183673469388.
[I 2024-07-12 10:59:43,434] Trial 4 finished with value: 0.11061224489795918 and parameters: {'min_samples_leaf': 36}. Best is trial 4 with value: 0.11061224489795918.
[I 2024-07-12 10:59:43,438] Trial 5 finished with 

**LINEAR**

In [4]:
regressiondatasettryout = regression_dataset_names

In [9]:
import pandas as pd
from pmlb import fetch_data
import optuna
from sklearn.model_selection import KFold
from sklearn.preprocessing import OneHotEncoder
import statsmodels.api as sm
from statsmodels.regression.quantile_regression import QuantReg

def create_dummy_variables(X, categorical_features):
    if not categorical_features:
        return sm.add_constant(X, has_constant='add')
    encoder = OneHotEncoder(sparse=False)
    X_categorical = encoder.fit_transform(X[:, categorical_features])
    X_numerical = np.delete(X, categorical_features, axis=1)
    X_with_dummies = np.concatenate([X_numerical, X_categorical], axis=1)
    return sm.add_constant(X_with_dummies, has_constant='add')

def pinball_loss(y_true, y_pred, tau=0.5):
    residuals = y_true - y_pred
    return np.mean(np.where(residuals >= 0, tau * residuals, (1 - tau) * -residuals))

def objective(trial, train_X, train_y, val_X, val_y, tau=0.5):
    max_iter = trial.suggest_int('max_iter', 1000, 5000)
    model = QuantReg(train_y, train_X)
    results = model.fit(q=tau, max_iter=max_iter)
    y_pred = results.predict(val_X)
    return pinball_loss(val_y, y_pred, tau)

resultslinear = []

for regression_dataset in regressiondatasettryout:
    print(regression_dataset)
    X1, y = fetch_data(regression_dataset, return_X_y=True)

    # Apply subsampling for large datasets
    if X1.shape[0] > 500000:
        idx = np.random.choice(X1.shape[0], 100000, replace=False)
        X1 = X1[idx]
        y = y[idx]

    categorical_features = get_categorical_features(X1)
    X = create_dummy_variables(X1, categorical_features)
    kf = KFold(n_splits=3 if X1.shape[0] > 1000000 else 5, shuffle=True)
    dataset_results = []

    for train_index, val_index in kf.split(X):
        train_X, val_X = X[train_index], X[val_index]
        train_y, val_y = y[train_index], y[val_index]

        study = optuna.create_study(direction='minimize')
        study.optimize(lambda trial: objective(trial, train_X, train_y, val_X, val_y, tau=0.5), n_trials=5 if X1.shape[0] > 500000 else 10)

        best_params = study.best_trial.params
        final_model = QuantReg(train_y, train_X).fit(q=0.5, max_iter=best_params['max_iter'])
        y_pred = final_model.predict(val_X)
        loss = pinball_loss(val_y, y_pred, tau=0.5)
        coverage = np.mean(y_pred >= val_y)
        dataset_results.append({'Loss': loss, 'Coverage': coverage})

    resultslinear.append({'Dataset': regression_dataset, 'Results': dataset_results})


1027_ESL


[I 2024-07-12 11:05:27,020] A new study created in memory with name: no-name-979b88ba-b2cd-4c74-ae14-68eede821d8c
[I 2024-07-12 11:05:27,575] Trial 0 finished with value: 0.19423839004682994 and parameters: {'max_iter': 4308}. Best is trial 0 with value: 0.19423839004682994.
[I 2024-07-12 11:05:27,691] Trial 1 finished with value: 0.19423839004682994 and parameters: {'max_iter': 1021}. Best is trial 0 with value: 0.19423839004682994.
[I 2024-07-12 11:05:27,811] Trial 2 finished with value: 0.19423839004682994 and parameters: {'max_iter': 4727}. Best is trial 0 with value: 0.19423839004682994.
[I 2024-07-12 11:05:27,943] Trial 3 finished with value: 0.19423839004682994 and parameters: {'max_iter': 3813}. Best is trial 0 with value: 0.19423839004682994.
[I 2024-07-12 11:05:28,061] Trial 4 finished with value: 0.19423839004682994 and parameters: {'max_iter': 4316}. Best is trial 0 with value: 0.19423839004682994.
[I 2024-07-12 11:05:28,181] Trial 5 finished with value: 0.19423839004682994

1028_SWD


[I 2024-07-12 11:05:37,004] A new study created in memory with name: no-name-a8d0ab40-b7ef-41d6-9e92-2714ef6b5e64
[I 2024-07-12 11:05:37,105] Trial 0 finished with value: 0.23443284726829178 and parameters: {'max_iter': 2924}. Best is trial 0 with value: 0.23443284726829178.
[I 2024-07-12 11:05:37,187] Trial 1 finished with value: 0.23443284726829178 and parameters: {'max_iter': 2979}. Best is trial 0 with value: 0.23443284726829178.
[I 2024-07-12 11:05:37,273] Trial 2 finished with value: 0.23443284726829178 and parameters: {'max_iter': 3818}. Best is trial 0 with value: 0.23443284726829178.
[I 2024-07-12 11:05:37,358] Trial 3 finished with value: 0.23443284726829178 and parameters: {'max_iter': 4169}. Best is trial 0 with value: 0.23443284726829178.
[I 2024-07-12 11:05:37,448] Trial 4 finished with value: 0.23443284726829178 and parameters: {'max_iter': 3876}. Best is trial 0 with value: 0.23443284726829178.
[I 2024-07-12 11:05:37,534] Trial 5 finished with value: 0.23443284726829178

KeyboardInterrupt: 