In [38]:
%load_ext autoreload
%autoreload 2

import functools
import gc
import logging
import pickle
import sys
from pathlib import Path
from typing import Dict, List, Tuple, Callable

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
#import tensorflow_addons as tfa
from IPython.display import clear_output
#from keras.regularizers import L1L2
from multiprocess import Pool
from multiprocess.dummy import Pool as ThreadPool
from tqdm import tqdm
import lightgbm as lgb


logging.basicConfig(level=logging.DEBUG, format='%(asctime)s :: %(funcName)s :: %(message)s')

from ccf.models_cust import ModelLGB
from ccf.datasets import get_sample, get_initial_setting, get_left_right_name, get_std
from sklearn.metrics import r2_score
from ccf.utils import delete_objs, cuttoff_ts_in_df, natural_reindex, sort_df, create_block_vars_from_separate
from ccf.analytics_cluster import get_analytics_row, get_pairs_rel_analytics, paired_test_greedy_fs
#from ccf.callbacks import FrequencyCallback, FrequencyEpoch
from ccf.preprocess import (
    get_sample_2d_lgb,
    get_sample_2d_bin,
    get_sample_2d_lgb_paired,
    get_sample_2d_no_scale_paired,
)
from tensorflow.keras.callbacks import EarlyStopping
pd.set_option('display.max_columns', 10_000)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [39]:
base = '../../Storage/alber'
target_type = 'ret_1_10_folds_exp'

list_candidates = pd.read_csv(f'{base}/{target_type}/analytics/one_factor_block_vars_vars_60_corr_05.csv')
list_candidates = list(list_candidates.new_var)


current_feature_list = ['stoch_k_price_21_1']
list_candidates = list(set(list_candidates) - set(current_feature_list))

In [40]:
len(current_feature_list), current_feature_list

(1, ['stoch_k_price_21_1'])

In [43]:
len(list_candidates), list_candidates

(28,
 ['Money',
  'ask_size2',
  'ask_spread',
  'bid_ask_spread2',
  'bid_ask_w_spread2',
  'bid_price2',
  'bid_size2',
  'bid_spread',
  'log_return_mean_price',
  'rel_order_count_1_80',
  'rel_order_count_20_40',
  'rel_order_count_40_80',
  'rel_order_count_5_80',
  'rel_price_10_40',
  'rel_price_1_5',
  'rel_price_40_80',
  'rel_price_5_10',
  'rel_size_10_80',
  'rel_size_40_80',
  'rel_stoch_price_21_1_3',
  'rel_volume_ask',
  'rel_volume_ask_bid',
  'rel_volume_ask_bid1',
  'stoch_d_size_42_3',
  'stoch_k_size_14_1',
  'total_volume',
  'volume_imbalance',
  'wap_balance'])

In [44]:
#current_feature_list = []
folds = []
for i in range(10):
    folds.append([
        Path(f'{base}/{target_type}/X_{i+1}.parquet.gzip'), 
        Path(f'{base}/{target_type}/train_val_{i+1}.parquet.gzip'), 
        Path(f'{base}/{target_type}/val_{i+1}.parquet.gzip'),
        Path(f'{base}/features.parquet.gzip')
    ])


count_obs_train = 1_500_000
count_obs_val_train = 535_000
count_obs_val = None

features_path = Path(f'{base}/features.parquet.gzip')
list_candidates.sort()

selection_rule = {"field_name": "rel_diff_macro_lower_boot_95", "ascending": False}

dict_fields, _, _ = get_initial_setting(
    features_path,
    count_cuttoff = 0
)

experiment_name = f'fs_fs_main_set_1_{target_type}'

analytics_path = Path(f'{base}/{target_type}/analytics/')


get_sample_func = lambda possible_feature_list, base_path, count_obs, scaler, features_path: get_sample_2d_lgb(
    possible_feature_list,
    base_path,
    count_obs,
    features_path,
    categoricals=[],
    experiment_name=None,
    keys=["time"],
)

logging.debug(f"experiment_name == {experiment_name}")
curr_setting = {
    "verbose_eval": 50,
    "num_boost_round": 500,
    "early_stopping_rounds": 50,
    "params": {
        "num_leaves": 131_072,
        "max_bin": 256,
        "learning_rate": 0.01,
        "objective": "regression",
        "metric": "rmse",
        "max_depth": 6,
        "feature_fraction": 1.0,
        "feature_fraction_bynode": 0.6,
        "bagging_fraction": 1.0
    },
}


model_class = lambda train_matrix_shape, name: ModelLGB(
    save_path = Path(f'{base}') / Path("saved_models"), 
    name = name,
    metric = r2_score,
    learning_setting = curr_setting
)

pred_iter_perf=eval(
    pd.read_csv(f'{base}/{target_type}/analytics/block_vars_fs_fs_main_set_0_ret_1_10_folds_exp.csv')\
    .query('best_in_iter == 1')\
    .sort_values(by=['count_vars', 'count_boot_le_one', 'rel_diff_macro_lower_boot_95'], 
                 ascending=[False, True, False]).reset_index(drop=True)\
    .loc[0, "macro"])

2022-10-31 00:48:24,102 :: <module> :: experiment_name == fs_fs_main_set_1_ret_1_10_folds_exp


In [None]:
paired_test_greedy_fs(
    current_feature_list,
    list_candidates,
    dict_fields,
    folds,
    count_obs_train,
    count_obs_val,
    experiment_name,
    model_class,
    analytics_path,
    selection_rule,
    get_sample_func,
    pred_iter_perf,
    count_obs_val_train,
    print_iteration=True,
    count_iteration=5,
)

2022-11-01 16:31:37,766 :: prepare_output :: 20/21 :: stoch_k_size_14_1 :: 22
2022-11-01 16:31:37,767 :: get_specific_features :: data.columns Index(['stoch_k_price_21_1', 'Money', 'ask_size2', 'ask_spread',
       'bid_ask_spread2', 'bid_ask_w_spread2', 'bid_price2', 'bid_size2',
       'bid_spread', 'log_return_mean_price', 'rel_order_count_1_80',
       'rel_order_count_20_40', 'rel_order_count_40_80',
       'rel_order_count_5_80', 'rel_price_10_40', 'rel_price_1_5',
       'rel_price_40_80', 'rel_price_5_10', 'rel_size_10_80', 'rel_size_40_80',
       'rel_stoch_price_21_1_3', 'rel_volume_ask', 'rel_volume_ask_bid',
       'rel_volume_ask_bid1', 'stoch_d_size_42_3', 'stoch_k_size_14_1',
       'total_volume', 'volume_imbalance', 'wap_balance'],
      dtype='object'), possible_feature_list == ['stoch_k_price_21_1', 'wap_balance', 'volume_imbalance', 'rel_order_count_1_80', 'rel_price_5_10', 'log_return_mean_price', 'bid_spread', 'ask_spread', 'stoch_k_size_14_1']
2022-11-01 16:31:3

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2148
[LightGBM] [Info] Number of data points in the train set: 1500000, number of used features: 9
[LightGBM] [Info] Start training from score 0.000003
Training until validation scores don't improve for 50 rounds
[50]	training's rmse: 0.000742107	valid_1's rmse: 0.000743722
[100]	training's rmse: 0.000739332	valid_1's rmse: 0.000741296


[150]	training's rmse: 0.000737681	valid_1's rmse: 0.000740029
[200]	training's rmse: 0.000736562	valid_1's rmse: 0.000739331
[250]	training's rmse: 0.000735732	valid_1's rmse: 0.000738889


[300]	training's rmse: 0.000735007	valid_1's rmse: 0.00073858
[350]	training's rmse: 0.000734376	valid_1's rmse: 0.000738356


[400]	training's rmse: 0.000733788	valid_1's rmse: 0.000738217
[450]	training's rmse: 0.000733276	valid_1's rmse: 0.000738111


2022-11-01 16:31:51,799 :: paired_test_greedy_fs :: After train.
2022-11-01 16:31:51,800 :: get_specific_features :: data.columns Index(['stoch_k_price_21_1', 'Money', 'ask_size2', 'ask_spread',
       'bid_ask_spread2', 'bid_ask_w_spread2', 'bid_price2', 'bid_size2',
       'bid_spread', 'log_return_mean_price', 'rel_order_count_1_80',
       'rel_order_count_20_40', 'rel_order_count_40_80',
       'rel_order_count_5_80', 'rel_price_10_40', 'rel_price_1_5',
       'rel_price_40_80', 'rel_price_5_10', 'rel_size_10_80', 'rel_size_40_80',
       'rel_stoch_price_21_1_3', 'rel_volume_ask', 'rel_volume_ask_bid',
       'rel_volume_ask_bid1', 'stoch_d_size_42_3', 'stoch_k_size_14_1',
       'total_volume', 'volume_imbalance', 'wap_balance'],
      dtype='object'), possible_feature_list == ['stoch_k_price_21_1', 'wap_balance', 'volume_imbalance', 'rel_order_count_1_80', 'rel_price_5_10', 'log_return_mean_price', 'bid_spread', 'ask_spread', 'stoch_k_size_14_1']
2022-11-01 16:31:51,805 :: get_

[500]	training's rmse: 0.000732841	valid_1's rmse: 0.000738049
Did not meet early stopping. Best iteration is:
[500]	training's rmse: 0.000732841	valid_1's rmse: 0.000738049
(440000,) [-1.56270544e-04 -1.67965234e-04 -8.84271722e-05 -1.01523125e-04
 -3.32195123e-05  1.28685133e-04  7.48713501e-05  7.44034041e-05
  7.34675123e-05  7.34675123e-05  7.81469716e-05  8.00187554e-05
  8.09546473e-05  8.09546473e-05  1.37576106e-04  1.37576106e-04
  1.42255565e-04  1.42255565e-04  1.97941132e-04  1.97941132e-04
  1.99812916e-04  2.28825564e-04  2.28825564e-04  3.27562157e-04
  3.73901741e-04  4.27717401e-04  4.25844744e-04  4.23972117e-04
  4.24438680e-04  5.04926662e-04  4.95561457e-04  4.97432367e-04
  7.09879969e-04  7.24385085e-04  7.22980185e-04  7.27659673e-04
  6.47587469e-04  6.19495462e-04  6.22301712e-04  6.02168904e-04
  7.65970151e-04  2.02605515e-04  1.73124045e-05  3.60291888e-05
  4.67909376e-06  4.67909376e-06  2.80745098e-06  3.27535645e-06
  3.18177481e-05  3.18177481e-05 -6.

2022-11-01 16:31:52,538 :: paired_test_greedy_fs :: performances == [0.034872403282289355]
2022-11-01 16:31:52,539 :: __init__ :: 
Initial random seed for lgb == {'num_leaves': 131072, 'max_bin': 256, 'learning_rate': 0.01, 'objective': 'regression', 'metric': 'rmse', 'max_depth': 6, 'feature_fraction': 1.0, 'feature_fraction_bynode': 0.6, 'bagging_fraction': 1.0, 'seed': None, 'data_random_seed': 5116, 'feature_fraction_seed': 5116}
2022-11-01 16:31:52,539 :: paired_test_greedy_fs :: Before train.
2022-11-01 16:31:52,540 :: train :: 
Random seed for lgb == 3278,             data_random_seed == 3278,             feature_fraction_seed == 3278


delete model 1
delete model 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2152
[LightGBM] [Info] Number of data points in the train set: 1500000, number of used features: 9
[LightGBM] [Info] Start training from score 0.000003
Training until validation scores don't improve for 50 rounds
[50]	training's rmse: 0.00074212	valid_1's rmse: 0.000743735
[100]	training's rmse: 0.000739311	valid_1's rmse: 0.000741292


[150]	training's rmse: 0.000737677	valid_1's rmse: 0.000740023
[200]	training's rmse: 0.000736566	valid_1's rmse: 0.000739306


[250]	training's rmse: 0.000735687	valid_1's rmse: 0.000738846
[300]	training's rmse: 0.00073496	valid_1's rmse: 0.000738531


[350]	training's rmse: 0.000734338	valid_1's rmse: 0.00073832
[400]	training's rmse: 0.000733756	valid_1's rmse: 0.000738172
[450]	training's rmse: 0.000733219	valid_1's rmse: 0.000738049




2022-11-01 16:32:06,132 :: paired_test_greedy_fs :: After train.


[500]	training's rmse: 0.000732787	valid_1's rmse: 0.000737952
Did not meet early stopping. Best iteration is:
[500]	training's rmse: 0.000732787	valid_1's rmse: 0.000737952
(440000,) [-1.56270544e-04 -1.67965234e-04 -8.84271722e-05 -1.01523125e-04
 -3.32195123e-05  1.28685133e-04  7.48713501e-05  7.44034041e-05
  7.34675123e-05  7.34675123e-05  7.81469716e-05  8.00187554e-05
  8.09546473e-05  8.09546473e-05  1.37576106e-04  1.37576106e-04
  1.42255565e-04  1.42255565e-04  1.97941132e-04  1.97941132e-04
  1.99812916e-04  2.28825564e-04  2.28825564e-04  3.27562157e-04
  3.73901741e-04  4.27717401e-04  4.25844744e-04  4.23972117e-04
  4.24438680e-04  5.04926662e-04  4.95561457e-04  4.97432367e-04
  7.09879969e-04  7.24385085e-04  7.22980185e-04  7.27659673e-04
  6.47587469e-04  6.19495462e-04  6.22301712e-04  6.02168904e-04
  7.65970151e-04  2.02605515e-04  1.73124045e-05  3.60291888e-05
  4.67909376e-06  4.67909376e-06  2.80745098e-06  3.27535645e-06
  3.18177481e-05  3.18177481e-05 -6.

2022-11-01 16:32:06,765 :: paired_test_greedy_fs :: performances == [0.034872403282289355, 0.03493560476595747]
2022-11-01 16:32:06,766 :: __init__ :: 
Initial random seed for lgb == {'num_leaves': 131072, 'max_bin': 256, 'learning_rate': 0.01, 'objective': 'regression', 'metric': 'rmse', 'max_depth': 6, 'feature_fraction': 1.0, 'feature_fraction_bynode': 0.6, 'bagging_fraction': 1.0, 'seed': None, 'data_random_seed': 3278, 'feature_fraction_seed': 3278}
2022-11-01 16:32:06,766 :: paired_test_greedy_fs :: Before train.
2022-11-01 16:32:06,767 :: train :: 
Random seed for lgb == 8096,             data_random_seed == 8096,             feature_fraction_seed == 8096


delete model 1
delete model 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2151
[LightGBM] [Info] Number of data points in the train set: 1500000, number of used features: 9
[LightGBM] [Info] Start training from score 0.000003
Training until validation scores don't improve for 50 rounds
[50]	training's rmse: 0.000742098	valid_1's rmse: 0.000743713
[100]	training's rmse: 0.000739294	valid_1's rmse: 0.000741276


[150]	training's rmse: 0.000737669	valid_1's rmse: 0.000740032
[200]	training's rmse: 0.000736562	valid_1's rmse: 0.000739324


[250]	training's rmse: 0.000735722	valid_1's rmse: 0.000738868
[300]	training's rmse: 0.000735031	valid_1's rmse: 0.000738572
[350]	training's rmse: 0.000734419	valid_1's rmse: 0.000738349


[400]	training's rmse: 0.000733826	valid_1's rmse: 0.000738199
[450]	training's rmse: 0.000733293	valid_1's rmse: 0.000738074




2022-11-01 16:32:20,674 :: paired_test_greedy_fs :: After train.


[500]	training's rmse: 0.000732852	valid_1's rmse: 0.000737994
Did not meet early stopping. Best iteration is:
[500]	training's rmse: 0.000732852	valid_1's rmse: 0.000737994
(440000,) [-1.56270544e-04 -1.67965234e-04 -8.84271722e-05 -1.01523125e-04
 -3.32195123e-05  1.28685133e-04  7.48713501e-05  7.44034041e-05
  7.34675123e-05  7.34675123e-05  7.81469716e-05  8.00187554e-05
  8.09546473e-05  8.09546473e-05  1.37576106e-04  1.37576106e-04
  1.42255565e-04  1.42255565e-04  1.97941132e-04  1.97941132e-04
  1.99812916e-04  2.28825564e-04  2.28825564e-04  3.27562157e-04
  3.73901741e-04  4.27717401e-04  4.25844744e-04  4.23972117e-04
  4.24438680e-04  5.04926662e-04  4.95561457e-04  4.97432367e-04
  7.09879969e-04  7.24385085e-04  7.22980185e-04  7.27659673e-04
  6.47587469e-04  6.19495462e-04  6.22301712e-04  6.02168904e-04
  7.65970151e-04  2.02605515e-04  1.73124045e-05  3.60291888e-05
  4.67909376e-06  4.67909376e-06  2.80745098e-06  3.27535645e-06
  3.18177481e-05  3.18177481e-05 -6.

2022-11-01 16:32:21,353 :: paired_test_greedy_fs :: performances == [0.034872403282289355, 0.03493560476595747, 0.03482881084691358]
2022-11-01 16:32:21,354 :: __init__ :: 
Initial random seed for lgb == {'num_leaves': 131072, 'max_bin': 256, 'learning_rate': 0.01, 'objective': 'regression', 'metric': 'rmse', 'max_depth': 6, 'feature_fraction': 1.0, 'feature_fraction_bynode': 0.6, 'bagging_fraction': 1.0, 'seed': None, 'data_random_seed': 8096, 'feature_fraction_seed': 8096}
2022-11-01 16:32:21,354 :: paired_test_greedy_fs :: Before train.
2022-11-01 16:32:21,355 :: train :: 
Random seed for lgb == 3484,             data_random_seed == 3484,             feature_fraction_seed == 3484


delete model 1
delete model 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2151
[LightGBM] [Info] Number of data points in the train set: 1500000, number of used features: 9
[LightGBM] [Info] Start training from score 0.000003
Training until validation scores don't improve for 50 rounds
[50]	training's rmse: 0.000742109	valid_1's rmse: 0.000743735
[100]	training's rmse: 0.000739307	valid_1's rmse: 0.000741291


[150]	training's rmse: 0.000737642	valid_1's rmse: 0.000740003
[200]	training's rmse: 0.000736532	valid_1's rmse: 0.000739297


[250]	training's rmse: 0.000735687	valid_1's rmse: 0.000738829
[300]	training's rmse: 0.000734988	valid_1's rmse: 0.000738522
[350]	training's rmse: 0.000734344	valid_1's rmse: 0.000738306


[400]	training's rmse: 0.000733779	valid_1's rmse: 0.000738171
[450]	training's rmse: 0.00073325	valid_1's rmse: 0.00073805




2022-11-01 16:32:34,791 :: paired_test_greedy_fs :: After train.


[500]	training's rmse: 0.000732806	valid_1's rmse: 0.000737963
Did not meet early stopping. Best iteration is:
[500]	training's rmse: 0.000732806	valid_1's rmse: 0.000737963
(440000,) [-1.56270544e-04 -1.67965234e-04 -8.84271722e-05 -1.01523125e-04
 -3.32195123e-05  1.28685133e-04  7.48713501e-05  7.44034041e-05
  7.34675123e-05  7.34675123e-05  7.81469716e-05  8.00187554e-05
  8.09546473e-05  8.09546473e-05  1.37576106e-04  1.37576106e-04
  1.42255565e-04  1.42255565e-04  1.97941132e-04  1.97941132e-04
  1.99812916e-04  2.28825564e-04  2.28825564e-04  3.27562157e-04
  3.73901741e-04  4.27717401e-04  4.25844744e-04  4.23972117e-04
  4.24438680e-04  5.04926662e-04  4.95561457e-04  4.97432367e-04
  7.09879969e-04  7.24385085e-04  7.22980185e-04  7.27659673e-04
  6.47587469e-04  6.19495462e-04  6.22301712e-04  6.02168904e-04
  7.65970151e-04  2.02605515e-04  1.73124045e-05  3.60291888e-05
  4.67909376e-06  4.67909376e-06  2.80745098e-06  3.27535645e-06
  3.18177481e-05  3.18177481e-05 -6.

2022-11-01 16:32:35,478 :: paired_test_greedy_fs :: performances == [0.034872403282289355, 0.03493560476595747, 0.03482881084691358, 0.03486585517598895]
2022-11-01 16:32:35,479 :: __init__ :: 
Initial random seed for lgb == {'num_leaves': 131072, 'max_bin': 256, 'learning_rate': 0.01, 'objective': 'regression', 'metric': 'rmse', 'max_depth': 6, 'feature_fraction': 1.0, 'feature_fraction_bynode': 0.6, 'bagging_fraction': 1.0, 'seed': None, 'data_random_seed': 3484, 'feature_fraction_seed': 3484}
2022-11-01 16:32:35,480 :: paired_test_greedy_fs :: Before train.
2022-11-01 16:32:35,481 :: train :: 
Random seed for lgb == 7689,             data_random_seed == 7689,             feature_fraction_seed == 7689


delete model 1
delete model 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2151
[LightGBM] [Info] Number of data points in the train set: 1500000, number of used features: 9
[LightGBM] [Info] Start training from score 0.000003
Training until validation scores don't improve for 50 rounds
[50]	training's rmse: 0.000742109	valid_1's rmse: 0.000743742
[100]	training's rmse: 0.000739301	valid_1's rmse: 0.000741286


[150]	training's rmse: 0.000737658	valid_1's rmse: 0.00074002
[200]	training's rmse: 0.000736556	valid_1's rmse: 0.000739303
[250]	training's rmse: 0.000735705	valid_1's rmse: 0.000738844


[300]	training's rmse: 0.000735013	valid_1's rmse: 0.000738531
[350]	training's rmse: 0.000734388	valid_1's rmse: 0.000738337


[400]	training's rmse: 0.000733787	valid_1's rmse: 0.000738161
[450]	training's rmse: 0.00073326	valid_1's rmse: 0.000738025




2022-11-01 16:32:49,506 :: paired_test_greedy_fs :: After train.


[500]	training's rmse: 0.000732815	valid_1's rmse: 0.000737929
Did not meet early stopping. Best iteration is:
[500]	training's rmse: 0.000732815	valid_1's rmse: 0.000737929
(440000,) [-1.56270544e-04 -1.67965234e-04 -8.84271722e-05 -1.01523125e-04
 -3.32195123e-05  1.28685133e-04  7.48713501e-05  7.44034041e-05
  7.34675123e-05  7.34675123e-05  7.81469716e-05  8.00187554e-05
  8.09546473e-05  8.09546473e-05  1.37576106e-04  1.37576106e-04
  1.42255565e-04  1.42255565e-04  1.97941132e-04  1.97941132e-04
  1.99812916e-04  2.28825564e-04  2.28825564e-04  3.27562157e-04
  3.73901741e-04  4.27717401e-04  4.25844744e-04  4.23972117e-04
  4.24438680e-04  5.04926662e-04  4.95561457e-04  4.97432367e-04
  7.09879969e-04  7.24385085e-04  7.22980185e-04  7.27659673e-04
  6.47587469e-04  6.19495462e-04  6.22301712e-04  6.02168904e-04
  7.65970151e-04  2.02605515e-04  1.73124045e-05  3.60291888e-05
  4.67909376e-06  4.67909376e-06  2.80745098e-06  3.27535645e-06
  3.18177481e-05  3.18177481e-05 -6.

2022-11-01 16:32:50,140 :: paired_test_greedy_fs :: performances == [0.034872403282289355, 0.03493560476595747, 0.03482881084691358, 0.03486585517598895, 0.034961446779415706]
2022-11-01 16:32:50,141 :: get_specific_features :: data.columns Index(['stoch_k_price_21_1', 'Money', 'ask_size2', 'ask_spread',
       'bid_ask_spread2', 'bid_ask_w_spread2', 'bid_price2', 'bid_size2',
       'bid_spread', 'log_return_mean_price', 'rel_order_count_1_80',
       'rel_order_count_20_40', 'rel_order_count_40_80',
       'rel_order_count_5_80', 'rel_price_10_40', 'rel_price_1_5',
       'rel_price_40_80', 'rel_price_5_10', 'rel_size_10_80', 'rel_size_40_80',
       'rel_stoch_price_21_1_3', 'rel_volume_ask', 'rel_volume_ask_bid',
       'rel_volume_ask_bid1', 'stoch_d_size_42_3', 'stoch_k_size_14_1',
       'total_volume', 'volume_imbalance', 'wap_balance'],
      dtype='object'), possible_feature_list == ['stoch_k_price_21_1', 'wap_balance', 'volume_imbalance', 'rel_order_count_1_80', 'rel_price_5

delete model 1
delete model 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2149
[LightGBM] [Info] Number of data points in the train set: 1500000, number of used features: 9
[LightGBM] [Info] Start training from score 0.000002
Training until validation scores don't improve for 50 rounds
[50]	training's rmse: 0.000732316	valid_1's rmse: 0.000734552
[100]	training's rmse: 0.000729506	valid_1's rmse: 0.000732155


[150]	training's rmse: 0.00072782	valid_1's rmse: 0.000730933
[200]	training's rmse: 0.000726666	valid_1's rmse: 0.00073025


[250]	training's rmse: 0.000725792	valid_1's rmse: 0.000729821
[300]	training's rmse: 0.000725065	valid_1's rmse: 0.000729538


[350]	training's rmse: 0.000724447	valid_1's rmse: 0.000729358
[400]	training's rmse: 0.000723915	valid_1's rmse: 0.000729238


[450]	training's rmse: 0.00072343	valid_1's rmse: 0.000729147


2022-11-01 16:33:03,715 :: paired_test_greedy_fs :: After train.
2022-11-01 16:33:03,716 :: get_specific_features :: data.columns Index(['stoch_k_price_21_1', 'Money', 'ask_size2', 'ask_spread',
       'bid_ask_spread2', 'bid_ask_w_spread2', 'bid_price2', 'bid_size2',
       'bid_spread', 'log_return_mean_price', 'rel_order_count_1_80',
       'rel_order_count_20_40', 'rel_order_count_40_80',
       'rel_order_count_5_80', 'rel_price_10_40', 'rel_price_1_5',
       'rel_price_40_80', 'rel_price_5_10', 'rel_size_10_80', 'rel_size_40_80',
       'rel_stoch_price_21_1_3', 'rel_volume_ask', 'rel_volume_ask_bid',
       'rel_volume_ask_bid1', 'stoch_d_size_42_3', 'stoch_k_size_14_1',
       'total_volume', 'volume_imbalance', 'wap_balance'],
      dtype='object'), possible_feature_list == ['stoch_k_price_21_1', 'wap_balance', 'volume_imbalance', 'rel_order_count_1_80', 'rel_price_5_10', 'log_return_mean_price', 'bid_spread', 'ask_spread', 'stoch_k_size_14_1']
2022-11-01 16:33:03,721 :: get_

[500]	training's rmse: 0.000723033	valid_1's rmse: 0.00072905
Did not meet early stopping. Best iteration is:
[500]	training's rmse: 0.000723033	valid_1's rmse: 0.00072905
(440000,) [ 1.39920987e-04  1.39920987e-04  1.37949988e-04  1.37949988e-04
  1.35979019e-04  1.08878798e-04  1.06415158e-04 -4.77864778e-05
 -4.77864778e-05 -9.50758113e-05 -7.78340836e-05 -4.87708749e-05
 -4.87708749e-05 -6.50265210e-05  9.36057859e-06  4.60146111e-04
  8.24715884e-04  8.23729730e-04  1.03458879e-03  1.08588114e-03
  9.41472070e-04  9.19298327e-04  8.97620979e-04  7.59676623e-04
  7.12314562e-04  7.11821951e-04  7.19704083e-04  7.19704083e-04
  7.19704083e-04  7.82265619e-04  7.82265619e-04  7.29556312e-04
  8.45319766e-04  8.54186772e-04  8.54186772e-04  8.54186772e-04
  8.54186772e-04  8.53694160e-04  8.53694160e-04  8.59112886e-04
  8.59112886e-04  7.26108032e-04  7.26108032e-04  7.84236065e-04
  7.84236065e-04  7.84236065e-04  7.84236065e-04  7.84235657e-04
  7.84235657e-04  7.90146994e-04  7.90

2022-11-01 16:33:04,471 :: paired_test_greedy_fs :: performances == [0.034872403282289355, 0.03493560476595747, 0.03482881084691358, 0.03486585517598895, 0.034961446779415706, 0.032021796662922264]
2022-11-01 16:33:04,471 :: __init__ :: 
Initial random seed for lgb == {'num_leaves': 131072, 'max_bin': 256, 'learning_rate': 0.01, 'objective': 'regression', 'metric': 'rmse', 'max_depth': 6, 'feature_fraction': 1.0, 'feature_fraction_bynode': 0.6, 'bagging_fraction': 1.0, 'seed': None, 'data_random_seed': 9110, 'feature_fraction_seed': 9110}
2022-11-01 16:33:04,472 :: paired_test_greedy_fs :: Before train.
2022-11-01 16:33:04,472 :: train :: 
Random seed for lgb == 8161,             data_random_seed == 8161,             feature_fraction_seed == 8161


delete model 1
delete model 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2150
[LightGBM] [Info] Number of data points in the train set: 1500000, number of used features: 9
[LightGBM] [Info] Start training from score 0.000002
Training until validation scores don't improve for 50 rounds
[50]	training's rmse: 0.000732338	valid_1's rmse: 0.000734542
[100]	training's rmse: 0.000729521	valid_1's rmse: 0.000732142


[150]	training's rmse: 0.00072785	valid_1's rmse: 0.000730924
[200]	training's rmse: 0.000726713	valid_1's rmse: 0.000730248


[250]	training's rmse: 0.00072585	valid_1's rmse: 0.000729836
[300]	training's rmse: 0.000725114	valid_1's rmse: 0.000729576


[350]	training's rmse: 0.000724492	valid_1's rmse: 0.00072942
[400]	training's rmse: 0.000723959	valid_1's rmse: 0.000729301


[450]	training's rmse: 0.000723536	valid_1's rmse: 0.000729231
[500]	training's rmse: 0.000723135	valid_1's rmse: 0.000729154
Did not meet early stopping. Best iteration is:
[500]	training's rmse: 0.000723135	valid_1's rmse: 0.000729154


2022-11-01 16:33:19,375 :: paired_test_greedy_fs :: After train.


(440000,) [ 1.39920987e-04  1.39920987e-04  1.37949988e-04  1.37949988e-04
  1.35979019e-04  1.08878798e-04  1.06415158e-04 -4.77864778e-05
 -4.77864778e-05 -9.50758113e-05 -7.78340836e-05 -4.87708749e-05
 -4.87708749e-05 -6.50265210e-05  9.36057859e-06  4.60146111e-04
  8.24715884e-04  8.23729730e-04  1.03458879e-03  1.08588114e-03
  9.41472070e-04  9.19298327e-04  8.97620979e-04  7.59676623e-04
  7.12314562e-04  7.11821951e-04  7.19704083e-04  7.19704083e-04
  7.19704083e-04  7.82265619e-04  7.82265619e-04  7.29556312e-04
  8.45319766e-04  8.54186772e-04  8.54186772e-04  8.54186772e-04
  8.54186772e-04  8.53694160e-04  8.53694160e-04  8.59112886e-04
  8.59112886e-04  7.26108032e-04  7.26108032e-04  7.84236065e-04
  7.84236065e-04  7.84236065e-04  7.84236065e-04  7.84235657e-04
  7.84235657e-04  7.90146994e-04  7.90146994e-04  9.06508416e-04
  9.70555702e-04  9.53295443e-04  9.53295443e-04  9.48857283e-04
  9.40975209e-04  9.40975209e-04  9.41468345e-04  9.61174665e-04
  4.89973056e-0

2022-11-01 16:33:20,147 :: paired_test_greedy_fs :: performances == [0.034872403282289355, 0.03493560476595747, 0.03482881084691358, 0.03486585517598895, 0.034961446779415706, 0.032021796662922264, 0.032221437354181814]
2022-11-01 16:33:20,148 :: __init__ :: 
Initial random seed for lgb == {'num_leaves': 131072, 'max_bin': 256, 'learning_rate': 0.01, 'objective': 'regression', 'metric': 'rmse', 'max_depth': 6, 'feature_fraction': 1.0, 'feature_fraction_bynode': 0.6, 'bagging_fraction': 1.0, 'seed': None, 'data_random_seed': 8161, 'feature_fraction_seed': 8161}
2022-11-01 16:33:20,148 :: paired_test_greedy_fs :: Before train.
2022-11-01 16:33:20,148 :: train :: 
Random seed for lgb == 4381,             data_random_seed == 4381,             feature_fraction_seed == 4381


delete model 1
delete model 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2152
[LightGBM] [Info] Number of data points in the train set: 1500000, number of used features: 9
[LightGBM] [Info] Start training from score 0.000002
Training until validation scores don't improve for 50 rounds
[50]	training's rmse: 0.000732337	valid_1's rmse: 0.00073455
[100]	training's rmse: 0.000729519	valid_1's rmse: 0.000732158


[150]	training's rmse: 0.000727842	valid_1's rmse: 0.000730924
[200]	training's rmse: 0.000726698	valid_1's rmse: 0.000730242


[250]	training's rmse: 0.000725803	valid_1's rmse: 0.000729817
[300]	training's rmse: 0.000725098	valid_1's rmse: 0.00072955


[350]	training's rmse: 0.000724484	valid_1's rmse: 0.000729362
[400]	training's rmse: 0.000723935	valid_1's rmse: 0.000729238
[450]	training's rmse: 0.000723481	valid_1's rmse: 0.000729142




2022-11-01 16:33:33,601 :: paired_test_greedy_fs :: After train.


[500]	training's rmse: 0.000723081	valid_1's rmse: 0.000729052
Did not meet early stopping. Best iteration is:
[500]	training's rmse: 0.000723081	valid_1's rmse: 0.000729052
(440000,) [ 1.39920987e-04  1.39920987e-04  1.37949988e-04  1.37949988e-04
  1.35979019e-04  1.08878798e-04  1.06415158e-04 -4.77864778e-05
 -4.77864778e-05 -9.50758113e-05 -7.78340836e-05 -4.87708749e-05
 -4.87708749e-05 -6.50265210e-05  9.36057859e-06  4.60146111e-04
  8.24715884e-04  8.23729730e-04  1.03458879e-03  1.08588114e-03
  9.41472070e-04  9.19298327e-04  8.97620979e-04  7.59676623e-04
  7.12314562e-04  7.11821951e-04  7.19704083e-04  7.19704083e-04
  7.19704083e-04  7.82265619e-04  7.82265619e-04  7.29556312e-04
  8.45319766e-04  8.54186772e-04  8.54186772e-04  8.54186772e-04
  8.54186772e-04  8.53694160e-04  8.53694160e-04  8.59112886e-04
  8.59112886e-04  7.26108032e-04  7.26108032e-04  7.84236065e-04
  7.84236065e-04  7.84236065e-04  7.84236065e-04  7.84235657e-04
  7.84235657e-04  7.90146994e-04  7.

2022-11-01 16:33:34,253 :: paired_test_greedy_fs :: performances == [0.034872403282289355, 0.03493560476595747, 0.03482881084691358, 0.03486585517598895, 0.034961446779415706, 0.032021796662922264, 0.032221437354181814, 0.032031297902929134]
2022-11-01 16:33:34,253 :: __init__ :: 
Initial random seed for lgb == {'num_leaves': 131072, 'max_bin': 256, 'learning_rate': 0.01, 'objective': 'regression', 'metric': 'rmse', 'max_depth': 6, 'feature_fraction': 1.0, 'feature_fraction_bynode': 0.6, 'bagging_fraction': 1.0, 'seed': None, 'data_random_seed': 4381, 'feature_fraction_seed': 4381}
2022-11-01 16:33:34,254 :: paired_test_greedy_fs :: Before train.
2022-11-01 16:33:34,255 :: train :: 
Random seed for lgb == 5107,             data_random_seed == 5107,             feature_fraction_seed == 5107


delete model 1
delete model 2
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2151
[LightGBM] [Info] Number of data points in the train set: 1500000, number of used features: 9
[LightGBM] [Info] Start training from score 0.000002
Training until validation scores don't improve for 50 rounds
[50]	training's rmse: 0.000732313	valid_1's rmse: 0.000734519
[100]	training's rmse: 0.000729499	valid_1's rmse: 0.000732108


[150]	training's rmse: 0.000727818	valid_1's rmse: 0.000730877
[200]	training's rmse: 0.000726704	valid_1's rmse: 0.000730229


[250]	training's rmse: 0.00072584	valid_1's rmse: 0.000729839
[300]	training's rmse: 0.000725105	valid_1's rmse: 0.000729563


[350]	training's rmse: 0.000724493	valid_1's rmse: 0.000729386
[400]	training's rmse: 0.000723962	valid_1's rmse: 0.00072927


[450]	training's rmse: 0.000723529	valid_1's rmse: 0.000729179


2022-11-01 16:34:00,109 :: paired_test_greedy_fs :: After train.


[500]	training's rmse: 0.000723145	valid_1's rmse: 0.000729102
Did not meet early stopping. Best iteration is:
[500]	training's rmse: 0.000723145	valid_1's rmse: 0.000729102
(440000,) [ 1.39920987e-04  1.39920987e-04  1.37949988e-04  1.37949988e-04
  1.35979019e-04  1.08878798e-04  1.06415158e-04 -4.77864778e-05
 -4.77864778e-05 -9.50758113e-05 -7.78340836e-05 -4.87708749e-05
 -4.87708749e-05 -6.50265210e-05  9.36057859e-06  4.60146111e-04
  8.24715884e-04  8.23729730e-04  1.03458879e-03  1.08588114e-03
  9.41472070e-04  9.19298327e-04  8.97620979e-04  7.59676623e-04
  7.12314562e-04  7.11821951e-04  7.19704083e-04  7.19704083e-04
  7.19704083e-04  7.82265619e-04  7.82265619e-04  7.29556312e-04
  8.45319766e-04  8.54186772e-04  8.54186772e-04  8.54186772e-04
  8.54186772e-04  8.53694160e-04  8.53694160e-04  8.59112886e-04
  8.59112886e-04  7.26108032e-04  7.26108032e-04  7.84236065e-04
  7.84236065e-04  7.84236065e-04  7.84236065e-04  7.84235657e-04
  7.84235657e-04  7.90146994e-04  7.

2022-11-01 16:34:00,708 :: paired_test_greedy_fs :: performances == [0.034872403282289355, 0.03493560476595747, 0.03482881084691358, 0.03486585517598895, 0.034961446779415706, 0.032021796662922264, 0.032221437354181814, 0.032031297902929134, 0.032053347339843]
2022-11-01 16:34:00,708 :: __init__ :: 
Initial random seed for lgb == {'num_leaves': 131072, 'max_bin': 256, 'learning_rate': 0.01, 'objective': 'regression', 'metric': 'rmse', 'max_depth': 6, 'feature_fraction': 1.0, 'feature_fraction_bynode': 0.6, 'bagging_fraction': 1.0, 'seed': None, 'data_random_seed': 5107, 'feature_fraction_seed': 5107}
2022-11-01 16:34:00,709 :: paired_test_greedy_fs :: Before train.
2022-11-01 16:34:00,709 :: train :: 
Random seed for lgb == 2571,             data_random_seed == 2571,             feature_fraction_seed == 2571


delete model 1
delete model 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2156
[LightGBM] [Info] Number of data points in the train set: 1500000, number of used features: 9
[LightGBM] [Info] Start training from score 0.000002
Training until validation scores don't improve for 50 rounds
[50]	training's rmse: 0.00073233	valid_1's rmse: 0.000734548
[100]	training's rmse: 0.000729499	valid_1's rmse: 0.000732157


[150]	training's rmse: 0.000727827	valid_1's rmse: 0.000730935
[200]	training's rmse: 0.000726703	valid_1's rmse: 0.000730258


[250]	training's rmse: 0.000725827	valid_1's rmse: 0.000729839
[300]	training's rmse: 0.000725112	valid_1's rmse: 0.00072957


[350]	training's rmse: 0.000724504	valid_1's rmse: 0.000729405
[400]	training's rmse: 0.000723975	valid_1's rmse: 0.000729277
[450]	training's rmse: 0.000723538	valid_1's rmse: 0.000729191


[500]	training's rmse: 0.000723156	valid_1's rmse: 0.000729104
Did not meet early stopping. Best iteration is:
[500]	training's rmse: 0.000723156	valid_1's rmse: 0.000729104


2022-11-01 16:34:14,510 :: paired_test_greedy_fs :: After train.


(440000,) [ 1.39920987e-04  1.39920987e-04  1.37949988e-04  1.37949988e-04
  1.35979019e-04  1.08878798e-04  1.06415158e-04 -4.77864778e-05
 -4.77864778e-05 -9.50758113e-05 -7.78340836e-05 -4.87708749e-05
 -4.87708749e-05 -6.50265210e-05  9.36057859e-06  4.60146111e-04
  8.24715884e-04  8.23729730e-04  1.03458879e-03  1.08588114e-03
  9.41472070e-04  9.19298327e-04  8.97620979e-04  7.59676623e-04
  7.12314562e-04  7.11821951e-04  7.19704083e-04  7.19704083e-04
  7.19704083e-04  7.82265619e-04  7.82265619e-04  7.29556312e-04
  8.45319766e-04  8.54186772e-04  8.54186772e-04  8.54186772e-04
  8.54186772e-04  8.53694160e-04  8.53694160e-04  8.59112886e-04
  8.59112886e-04  7.26108032e-04  7.26108032e-04  7.84236065e-04
  7.84236065e-04  7.84236065e-04  7.84236065e-04  7.84235657e-04
  7.84235657e-04  7.90146994e-04  7.90146994e-04  9.06508416e-04
  9.70555702e-04  9.53295443e-04  9.53295443e-04  9.48857283e-04
  9.40975209e-04  9.40975209e-04  9.41468345e-04  9.61174665e-04
  4.89973056e-0

2022-11-01 16:34:15,225 :: paired_test_greedy_fs :: performances == [0.034872403282289355, 0.03493560476595747, 0.03482881084691358, 0.03486585517598895, 0.034961446779415706, 0.032021796662922264, 0.032221437354181814, 0.032031297902929134, 0.032053347339843, 0.032176084165249086]
2022-11-01 16:34:15,226 :: get_specific_features :: data.columns Index(['stoch_k_price_21_1', 'Money', 'ask_size2', 'ask_spread',
       'bid_ask_spread2', 'bid_ask_w_spread2', 'bid_price2', 'bid_size2',
       'bid_spread', 'log_return_mean_price', 'rel_order_count_1_80',
       'rel_order_count_20_40', 'rel_order_count_40_80',
       'rel_order_count_5_80', 'rel_price_10_40', 'rel_price_1_5',
       'rel_price_40_80', 'rel_price_5_10', 'rel_size_10_80', 'rel_size_40_80',
       'rel_stoch_price_21_1_3', 'rel_volume_ask', 'rel_volume_ask_bid',
       'rel_volume_ask_bid1', 'stoch_d_size_42_3', 'stoch_k_size_14_1',
       'total_volume', 'volume_imbalance', 'wap_balance'],
      dtype='object'), possible_fea

delete model 1
delete model 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2152
[LightGBM] [Info] Number of data points in the train set: 1500000, number of used features: 9
[LightGBM] [Info] Start training from score 0.000002
Training until validation scores don't improve for 50 rounds
[50]	training's rmse: 0.000721543	valid_1's rmse: 0.000723993
[100]	training's rmse: 0.000718736	valid_1's rmse: 0.000721636


[150]	training's rmse: 0.000717118	valid_1's rmse: 0.000720453
[200]	training's rmse: 0.000716003	valid_1's rmse: 0.000719822


[250]	training's rmse: 0.000715151	valid_1's rmse: 0.000719467
[300]	training's rmse: 0.000714472	valid_1's rmse: 0.000719256


[350]	training's rmse: 0.000713898	valid_1's rmse: 0.00071911
[400]	training's rmse: 0.000713395	valid_1's rmse: 0.00071902
[450]	training's rmse: 0.00071295	valid_1's rmse: 0.000718927




2022-11-01 16:34:29,407 :: paired_test_greedy_fs :: After train.
2022-11-01 16:34:29,408 :: get_specific_features :: data.columns Index(['stoch_k_price_21_1', 'Money', 'ask_size2', 'ask_spread',
       'bid_ask_spread2', 'bid_ask_w_spread2', 'bid_price2', 'bid_size2',
       'bid_spread', 'log_return_mean_price', 'rel_order_count_1_80',
       'rel_order_count_20_40', 'rel_order_count_40_80',
       'rel_order_count_5_80', 'rel_price_10_40', 'rel_price_1_5',
       'rel_price_40_80', 'rel_price_5_10', 'rel_size_10_80', 'rel_size_40_80',
       'rel_stoch_price_21_1_3', 'rel_volume_ask', 'rel_volume_ask_bid',
       'rel_volume_ask_bid1', 'stoch_d_size_42_3', 'stoch_k_size_14_1',
       'total_volume', 'volume_imbalance', 'wap_balance'],
      dtype='object'), possible_feature_list == ['stoch_k_price_21_1', 'wap_balance', 'volume_imbalance', 'rel_order_count_1_80', 'rel_price_5_10', 'log_return_mean_price', 'bid_spread', 'ask_spread', 'stoch_k_size_14_1']
2022-11-01 16:34:29,413 :: get_

[500]	training's rmse: 0.000712582	valid_1's rmse: 0.000718877
Did not meet early stopping. Best iteration is:
[500]	training's rmse: 0.000712582	valid_1's rmse: 0.000718877
(440000,) [ 3.40202445e-04  3.39719059e-04  3.51781579e-04  3.69176996e-04
  3.69176996e-04  3.69176996e-04  3.81257210e-04  4.04460181e-04
  4.04460181e-04  4.95833752e-04  4.95350221e-04  5.34011691e-04
  5.34011691e-04  5.34011691e-04  5.34011691e-04  5.34011691e-04
  4.45535028e-04  3.93345021e-04  8.16458632e-05  7.72975254e-05
 -1.15946286e-05  1.93249780e-05 -5.07280674e-05 -4.68583428e-04
 -4.68583428e-04 -4.90311184e-04 -5.05286211e-04 -3.86932225e-04
 -4.89836384e-04 -4.89836384e-04 -4.86454868e-04 -4.84039745e-04
 -4.82107425e-04 -4.82107425e-04 -5.45390067e-04 -5.33802493e-04
 -5.20759379e-04 -5.20759379e-04 -5.23656316e-04 -5.23656316e-04
 -5.85452362e-04 -5.85452362e-04 -5.85452362e-04 -5.40519133e-04
 -5.42934344e-04 -5.55486302e-04 -5.57417399e-04 -5.59830922e-04
 -5.59830922e-04 -5.62246074e-04 -5.

2022-11-01 16:34:30,189 :: paired_test_greedy_fs :: performances == [0.034872403282289355, 0.03493560476595747, 0.03482881084691358, 0.03486585517598895, 0.034961446779415706, 0.032021796662922264, 0.032221437354181814, 0.032031297902929134, 0.032053347339843, 0.032176084165249086, 0.015111930226916592]
2022-11-01 16:34:30,190 :: __init__ :: 
Initial random seed for lgb == {'num_leaves': 131072, 'max_bin': 256, 'learning_rate': 0.01, 'objective': 'regression', 'metric': 'rmse', 'max_depth': 6, 'feature_fraction': 1.0, 'feature_fraction_bynode': 0.6, 'bagging_fraction': 1.0, 'seed': None, 'data_random_seed': 5083, 'feature_fraction_seed': 5083}
2022-11-01 16:34:30,191 :: paired_test_greedy_fs :: Before train.
2022-11-01 16:34:30,191 :: train :: 
Random seed for lgb == 1494,             data_random_seed == 1494,             feature_fraction_seed == 1494


delete model 1
delete model 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2148
[LightGBM] [Info] Number of data points in the train set: 1500000, number of used features: 9
[LightGBM] [Info] Start training from score 0.000002
Training until validation scores don't improve for 50 rounds
[50]	training's rmse: 0.000721534	valid_1's rmse: 0.000723979
[100]	training's rmse: 0.000718746	valid_1's rmse: 0.000721635


[150]	training's rmse: 0.000717105	valid_1's rmse: 0.000720465
[200]	training's rmse: 0.000716006	valid_1's rmse: 0.000719841


[250]	training's rmse: 0.000715152	valid_1's rmse: 0.000719466
[300]	training's rmse: 0.000714473	valid_1's rmse: 0.000719236


[350]	training's rmse: 0.000713926	valid_1's rmse: 0.00071908
[400]	training's rmse: 0.000713466	valid_1's rmse: 0.000718973
[450]	training's rmse: 0.000713035	valid_1's rmse: 0.000718886




2022-11-01 16:34:43,828 :: paired_test_greedy_fs :: After train.


[500]	training's rmse: 0.000712635	valid_1's rmse: 0.0007188
Did not meet early stopping. Best iteration is:
[500]	training's rmse: 0.000712635	valid_1's rmse: 0.0007188
(440000,) [ 3.40202445e-04  3.39719059e-04  3.51781579e-04  3.69176996e-04
  3.69176996e-04  3.69176996e-04  3.81257210e-04  4.04460181e-04
  4.04460181e-04  4.95833752e-04  4.95350221e-04  5.34011691e-04
  5.34011691e-04  5.34011691e-04  5.34011691e-04  5.34011691e-04
  4.45535028e-04  3.93345021e-04  8.16458632e-05  7.72975254e-05
 -1.15946286e-05  1.93249780e-05 -5.07280674e-05 -4.68583428e-04
 -4.68583428e-04 -4.90311184e-04 -5.05286211e-04 -3.86932225e-04
 -4.89836384e-04 -4.89836384e-04 -4.86454868e-04 -4.84039745e-04
 -4.82107425e-04 -4.82107425e-04 -5.45390067e-04 -5.33802493e-04
 -5.20759379e-04 -5.20759379e-04 -5.23656316e-04 -5.23656316e-04
 -5.85452362e-04 -5.85452362e-04 -5.85452362e-04 -5.40519133e-04
 -5.42934344e-04 -5.55486302e-04 -5.57417399e-04 -5.59830922e-04
 -5.59830922e-04 -5.62246074e-04 -5.6224

2022-11-01 16:34:44,407 :: paired_test_greedy_fs :: performances == [0.034872403282289355, 0.03493560476595747, 0.03482881084691358, 0.03486585517598895, 0.034961446779415706, 0.032021796662922264, 0.032221437354181814, 0.032031297902929134, 0.032053347339843, 0.032176084165249086, 0.015111930226916592, 0.015184198288046957]
2022-11-01 16:34:44,408 :: __init__ :: 
Initial random seed for lgb == {'num_leaves': 131072, 'max_bin': 256, 'learning_rate': 0.01, 'objective': 'regression', 'metric': 'rmse', 'max_depth': 6, 'feature_fraction': 1.0, 'feature_fraction_bynode': 0.6, 'bagging_fraction': 1.0, 'seed': None, 'data_random_seed': 1494, 'feature_fraction_seed': 1494}
2022-11-01 16:34:44,408 :: paired_test_greedy_fs :: Before train.
2022-11-01 16:34:44,408 :: train :: 
Random seed for lgb == 6630,             data_random_seed == 6630,             feature_fraction_seed == 6630


delete model 1
delete model 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2152
[LightGBM] [Info] Number of data points in the train set: 1500000, number of used features: 9
[LightGBM] [Info] Start training from score 0.000002
Training until validation scores don't improve for 50 rounds
[50]	training's rmse: 0.000721552	valid_1's rmse: 0.000723994
[100]	training's rmse: 0.000718755	valid_1's rmse: 0.000721634


[150]	training's rmse: 0.000717122	valid_1's rmse: 0.000720446
[200]	training's rmse: 0.000716025	valid_1's rmse: 0.000719822


[250]	training's rmse: 0.000715193	valid_1's rmse: 0.000719456
[300]	training's rmse: 0.000714501	valid_1's rmse: 0.000719222
[350]	training's rmse: 0.000713936	valid_1's rmse: 0.000719098


[400]	training's rmse: 0.000713439	valid_1's rmse: 0.000719012
[450]	training's rmse: 0.00071301	valid_1's rmse: 0.000718941




2022-11-01 16:34:58,092 :: paired_test_greedy_fs :: After train.


[500]	training's rmse: 0.000712607	valid_1's rmse: 0.00071887
Did not meet early stopping. Best iteration is:
[500]	training's rmse: 0.000712607	valid_1's rmse: 0.00071887
(440000,) [ 3.40202445e-04  3.39719059e-04  3.51781579e-04  3.69176996e-04
  3.69176996e-04  3.69176996e-04  3.81257210e-04  4.04460181e-04
  4.04460181e-04  4.95833752e-04  4.95350221e-04  5.34011691e-04
  5.34011691e-04  5.34011691e-04  5.34011691e-04  5.34011691e-04
  4.45535028e-04  3.93345021e-04  8.16458632e-05  7.72975254e-05
 -1.15946286e-05  1.93249780e-05 -5.07280674e-05 -4.68583428e-04
 -4.68583428e-04 -4.90311184e-04 -5.05286211e-04 -3.86932225e-04
 -4.89836384e-04 -4.89836384e-04 -4.86454868e-04 -4.84039745e-04
 -4.82107425e-04 -4.82107425e-04 -5.45390067e-04 -5.33802493e-04
 -5.20759379e-04 -5.20759379e-04 -5.23656316e-04 -5.23656316e-04
 -5.85452362e-04 -5.85452362e-04 -5.85452362e-04 -5.40519133e-04
 -5.42934344e-04 -5.55486302e-04 -5.57417399e-04 -5.59830922e-04
 -5.59830922e-04 -5.62246074e-04 -5.62

2022-11-01 16:34:58,748 :: paired_test_greedy_fs :: performances == [0.034872403282289355, 0.03493560476595747, 0.03482881084691358, 0.03486585517598895, 0.034961446779415706, 0.032021796662922264, 0.032221437354181814, 0.032031297902929134, 0.032053347339843, 0.032176084165249086, 0.015111930226916592, 0.015184198288046957, 0.014914443569628344]
2022-11-01 16:34:58,749 :: __init__ :: 
Initial random seed for lgb == {'num_leaves': 131072, 'max_bin': 256, 'learning_rate': 0.01, 'objective': 'regression', 'metric': 'rmse', 'max_depth': 6, 'feature_fraction': 1.0, 'feature_fraction_bynode': 0.6, 'bagging_fraction': 1.0, 'seed': None, 'data_random_seed': 6630, 'feature_fraction_seed': 6630}
2022-11-01 16:34:58,749 :: paired_test_greedy_fs :: Before train.
2022-11-01 16:34:58,750 :: train :: 
Random seed for lgb == 9666,             data_random_seed == 9666,             feature_fraction_seed == 9666


delete model 1
delete model 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2147
[LightGBM] [Info] Number of data points in the train set: 1500000, number of used features: 9
[LightGBM] [Info] Start training from score 0.000002
Training until validation scores don't improve for 50 rounds
[50]	training's rmse: 0.000721559	valid_1's rmse: 0.000723996
[100]	training's rmse: 0.00071875	valid_1's rmse: 0.000721622


[150]	training's rmse: 0.000717113	valid_1's rmse: 0.000720435
[200]	training's rmse: 0.000715998	valid_1's rmse: 0.000719811
[250]	training's rmse: 0.000715168	valid_1's rmse: 0.000719454


[300]	training's rmse: 0.000714487	valid_1's rmse: 0.000719233
[350]	training's rmse: 0.000713921	valid_1's rmse: 0.0007191


[400]	training's rmse: 0.000713412	valid_1's rmse: 0.000719006
[450]	training's rmse: 0.000712973	valid_1's rmse: 0.000718939




2022-11-01 16:35:12,289 :: paired_test_greedy_fs :: After train.


[500]	training's rmse: 0.000712577	valid_1's rmse: 0.000718868
Did not meet early stopping. Best iteration is:
[500]	training's rmse: 0.000712577	valid_1's rmse: 0.000718868
(440000,) [ 3.40202445e-04  3.39719059e-04  3.51781579e-04  3.69176996e-04
  3.69176996e-04  3.69176996e-04  3.81257210e-04  4.04460181e-04
  4.04460181e-04  4.95833752e-04  4.95350221e-04  5.34011691e-04
  5.34011691e-04  5.34011691e-04  5.34011691e-04  5.34011691e-04
  4.45535028e-04  3.93345021e-04  8.16458632e-05  7.72975254e-05
 -1.15946286e-05  1.93249780e-05 -5.07280674e-05 -4.68583428e-04
 -4.68583428e-04 -4.90311184e-04 -5.05286211e-04 -3.86932225e-04
 -4.89836384e-04 -4.89836384e-04 -4.86454868e-04 -4.84039745e-04
 -4.82107425e-04 -4.82107425e-04 -5.45390067e-04 -5.33802493e-04
 -5.20759379e-04 -5.20759379e-04 -5.23656316e-04 -5.23656316e-04
 -5.85452362e-04 -5.85452362e-04 -5.85452362e-04 -5.40519133e-04
 -5.42934344e-04 -5.55486302e-04 -5.57417399e-04 -5.59830922e-04
 -5.59830922e-04 -5.62246074e-04 -5.

2022-11-01 16:35:12,978 :: paired_test_greedy_fs :: performances == [0.034872403282289355, 0.03493560476595747, 0.03482881084691358, 0.03486585517598895, 0.034961446779415706, 0.032021796662922264, 0.032221437354181814, 0.032031297902929134, 0.032053347339843, 0.032176084165249086, 0.015111930226916592, 0.015184198288046957, 0.014914443569628344, 0.015057863044976694]
2022-11-01 16:35:12,979 :: __init__ :: 
Initial random seed for lgb == {'num_leaves': 131072, 'max_bin': 256, 'learning_rate': 0.01, 'objective': 'regression', 'metric': 'rmse', 'max_depth': 6, 'feature_fraction': 1.0, 'feature_fraction_bynode': 0.6, 'bagging_fraction': 1.0, 'seed': None, 'data_random_seed': 9666, 'feature_fraction_seed': 9666}
2022-11-01 16:35:12,980 :: paired_test_greedy_fs :: Before train.
2022-11-01 16:35:12,981 :: train :: 
Random seed for lgb == 442,             data_random_seed == 442,             feature_fraction_seed == 442


delete model 1
delete model 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2146
[LightGBM] [Info] Number of data points in the train set: 1500000, number of used features: 9
[LightGBM] [Info] Start training from score 0.000002
Training until validation scores don't improve for 50 rounds
[50]	training's rmse: 0.000721533	valid_1's rmse: 0.000723989
[100]	training's rmse: 0.000718756	valid_1's rmse: 0.000721637


[150]	training's rmse: 0.000717109	valid_1's rmse: 0.000720439
[200]	training's rmse: 0.00071601	valid_1's rmse: 0.000719804


[250]	training's rmse: 0.00071517	valid_1's rmse: 0.000719438
[300]	training's rmse: 0.000714489	valid_1's rmse: 0.000719217


[350]	training's rmse: 0.000713936	valid_1's rmse: 0.000719059
[400]	training's rmse: 0.000713456	valid_1's rmse: 0.000718969
[450]	training's rmse: 0.000713011	valid_1's rmse: 0.000718901




2022-11-01 16:35:27,588 :: paired_test_greedy_fs :: After train.


[500]	training's rmse: 0.000712602	valid_1's rmse: 0.000718839
Did not meet early stopping. Best iteration is:
[500]	training's rmse: 0.000712602	valid_1's rmse: 0.000718839
(440000,) [ 3.40202445e-04  3.39719059e-04  3.51781579e-04  3.69176996e-04
  3.69176996e-04  3.69176996e-04  3.81257210e-04  4.04460181e-04
  4.04460181e-04  4.95833752e-04  4.95350221e-04  5.34011691e-04
  5.34011691e-04  5.34011691e-04  5.34011691e-04  5.34011691e-04
  4.45535028e-04  3.93345021e-04  8.16458632e-05  7.72975254e-05
 -1.15946286e-05  1.93249780e-05 -5.07280674e-05 -4.68583428e-04
 -4.68583428e-04 -4.90311184e-04 -5.05286211e-04 -3.86932225e-04
 -4.89836384e-04 -4.89836384e-04 -4.86454868e-04 -4.84039745e-04
 -4.82107425e-04 -4.82107425e-04 -5.45390067e-04 -5.33802493e-04
 -5.20759379e-04 -5.20759379e-04 -5.23656316e-04 -5.23656316e-04
 -5.85452362e-04 -5.85452362e-04 -5.85452362e-04 -5.40519133e-04
 -5.42934344e-04 -5.55486302e-04 -5.57417399e-04 -5.59830922e-04
 -5.59830922e-04 -5.62246074e-04 -5.

2022-11-01 16:35:28,224 :: paired_test_greedy_fs :: performances == [0.034872403282289355, 0.03493560476595747, 0.03482881084691358, 0.03486585517598895, 0.034961446779415706, 0.032021796662922264, 0.032221437354181814, 0.032031297902929134, 0.032053347339843, 0.032176084165249086, 0.015111930226916592, 0.015184198288046957, 0.014914443569628344, 0.015057863044976694, 0.015230519308718216]
2022-11-01 16:35:28,225 :: get_specific_features :: data.columns Index(['stoch_k_price_21_1', 'Money', 'ask_size2', 'ask_spread',
       'bid_ask_spread2', 'bid_ask_w_spread2', 'bid_price2', 'bid_size2',
       'bid_spread', 'log_return_mean_price', 'rel_order_count_1_80',
       'rel_order_count_20_40', 'rel_order_count_40_80',
       'rel_order_count_5_80', 'rel_price_10_40', 'rel_price_1_5',
       'rel_price_40_80', 'rel_price_5_10', 'rel_size_10_80', 'rel_size_40_80',
       'rel_stoch_price_21_1_3', 'rel_volume_ask', 'rel_volume_ask_bid',
       'rel_volume_ask_bid1', 'stoch_d_size_42_3', 'stoc

delete model 1
delete model 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2147
[LightGBM] [Info] Number of data points in the train set: 1500000, number of used features: 9
[LightGBM] [Info] Start training from score 0.000002
Training until validation scores don't improve for 50 rounds
[50]	training's rmse: 0.000712258	valid_1's rmse: 0.000712682
[100]	training's rmse: 0.00070958	valid_1's rmse: 0.000710362


[150]	training's rmse: 0.00070804	valid_1's rmse: 0.000709206
[200]	training's rmse: 0.000707022	valid_1's rmse: 0.000708542


[250]	training's rmse: 0.00070625	valid_1's rmse: 0.000708121
[300]	training's rmse: 0.000705616	valid_1's rmse: 0.000707843
[350]	training's rmse: 0.000705043	valid_1's rmse: 0.000707665


[400]	training's rmse: 0.000704527	valid_1's rmse: 0.000707537
[450]	training's rmse: 0.000704062	valid_1's rmse: 0.000707448




2022-11-01 16:35:42,675 :: paired_test_greedy_fs :: After train.
2022-11-01 16:35:42,676 :: get_specific_features :: data.columns Index(['stoch_k_price_21_1', 'Money', 'ask_size2', 'ask_spread',
       'bid_ask_spread2', 'bid_ask_w_spread2', 'bid_price2', 'bid_size2',
       'bid_spread', 'log_return_mean_price', 'rel_order_count_1_80',
       'rel_order_count_20_40', 'rel_order_count_40_80',
       'rel_order_count_5_80', 'rel_price_10_40', 'rel_price_1_5',
       'rel_price_40_80', 'rel_price_5_10', 'rel_size_10_80', 'rel_size_40_80',
       'rel_stoch_price_21_1_3', 'rel_volume_ask', 'rel_volume_ask_bid',
       'rel_volume_ask_bid1', 'stoch_d_size_42_3', 'stoch_k_size_14_1',
       'total_volume', 'volume_imbalance', 'wap_balance'],
      dtype='object'), possible_feature_list == ['stoch_k_price_21_1', 'wap_balance', 'volume_imbalance', 'rel_order_count_1_80', 'rel_price_5_10', 'log_return_mean_price', 'bid_spread', 'ask_spread', 'stoch_k_size_14_1']
2022-11-01 16:35:42,681 :: get_

[500]	training's rmse: 0.000703638	valid_1's rmse: 0.000707365
Did not meet early stopping. Best iteration is:
[500]	training's rmse: 0.000703638	valid_1's rmse: 0.000707365
(440000,) [ 5.35734696e-04  5.20889647e-04  5.95075369e-04  5.95075369e-04
  5.95075369e-04  5.95075369e-04  5.74409321e-04  5.72819961e-04
  5.72819961e-04  6.15783152e-04  6.15783152e-04  3.37569276e-04
  3.37569276e-04  3.42870480e-04  3.44990956e-04  4.75886889e-04
  4.75886889e-04  5.38420223e-04  6.01483218e-04  6.01483218e-04
  4.79066803e-04  4.79066803e-04  4.79066803e-04  4.75887413e-04
  4.50450199e-04  4.50450199e-04  4.44618723e-04  4.44618723e-04
  2.92481796e-04  3.82027851e-04  1.95481261e-04 -2.64852952e-06
 -2.64852952e-06 -4.50230946e-05 -8.68662464e-05 -8.68662464e-05
 -1.02754784e-04 -1.04343773e-04 -2.54729035e-04 -2.80664011e-04
 -2.84900598e-04 -3.60151171e-05 -5.08447629e-05 -5.08447629e-05
 -1.33466092e-04 -1.33466092e-04 -1.95430533e-04 -2.58985179e-04
 -3.64882522e-04 -3.33637145e-04 -3.

2022-11-01 16:35:43,405 :: paired_test_greedy_fs :: performances == [0.034872403282289355, 0.03493560476595747, 0.03482881084691358, 0.03486585517598895, 0.034961446779415706, 0.032021796662922264, 0.032221437354181814, 0.032031297902929134, 0.032053347339843, 0.032176084165249086, 0.015111930226916592, 0.015184198288046957, 0.014914443569628344, 0.015057863044976694, 0.015230519308718216, 0.012078072890632674]
2022-11-01 16:35:43,406 :: __init__ :: 
Initial random seed for lgb == {'num_leaves': 131072, 'max_bin': 256, 'learning_rate': 0.01, 'objective': 'regression', 'metric': 'rmse', 'max_depth': 6, 'feature_fraction': 1.0, 'feature_fraction_bynode': 0.6, 'bagging_fraction': 1.0, 'seed': None, 'data_random_seed': 8394, 'feature_fraction_seed': 8394}
2022-11-01 16:35:43,406 :: paired_test_greedy_fs :: Before train.
2022-11-01 16:35:43,406 :: train :: 
Random seed for lgb == 5681,             data_random_seed == 5681,             feature_fraction_seed == 5681


delete model 1
delete model 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2145
[LightGBM] [Info] Number of data points in the train set: 1500000, number of used features: 9
[LightGBM] [Info] Start training from score 0.000002
Training until validation scores don't improve for 50 rounds
[50]	training's rmse: 0.00071227	valid_1's rmse: 0.000712714
[100]	training's rmse: 0.000709599	valid_1's rmse: 0.000710401


