**About :** Trains XGBoost models.

**TODO**:
- better neg sampling technique ??

In [None]:
cd ../src

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [4]:
import os
import gc
import cudf
import json
import glob
import numba
import xgboost
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from tqdm import tqdm
from pandarallel import pandarallel
from numerize.numerize import numerize

warnings.simplefilter(action="ignore", category=FutureWarning)
pd.options.display.max_columns = 500
pd.options.display.max_rows = 500
pandarallel.initialize(nb_workers=32, progress_bar=False)

INFO: Pandarallel will run on 32 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.


In [5]:
from params import *

from utils.metrics import get_coverage
from utils.plot import plot_importances
from utils.load import *
from utils.logger import save_config, prepare_log_folder, create_logger
from utils.torch import seed_everything

from training.xgb import kfold, optimize

### Load

In [6]:
VERSION = "clicks_cv3-tv5.10"
# VERSION = "cv3-tv5.10"

GT_VERSION = "gt.7"

#### Train data
- neg sampling could use candidates from lower versions

In [7]:
POS_RATIO = 0.5
TARGET = "gt_clicks"   # "gt_clicks", "gt_carts", "gt_orders"

In [8]:
PROBS_PATHS = {
    "gt_orders": "../logs/2023-01-14/9/",
    "gt_carts": "../logs/2023-01-14/8/",
}

In [9]:
# def load_sessions(regex):
#     dfs = []
#     for idx, chunk_file in enumerate(glob.glob(regex)):
#         df = cudf.read_parquet(chunk_file, columns=["session"])
#         dfs.append(df.drop_duplicates(keep="first"))

#     return cudf.concat(dfs).reset_index(drop=True)

# sessions = load_sessions( f"../output/features/fts_val_{VERSION}/*")

# from sklearn.model_selection import KFold
# K = 4

# kf = KFold(n_splits=K, shuffle=True, random_state=42)
# splits = kf.split(sessions)

# sessions['fold'] = -1
# for i, (_, val_idx) in enumerate(splits):
#     sessions.loc[val_idx, "fold"] = i

# sessions.to_csv(f"../input/folds_{K}.csv", index=False)

In [10]:
# df_train = load_parquets_cudf_folds(
#     f"../output/features/fts_val_{VERSION}/*",
#     "../input/folds_4.csv",
#     fold=0,
#     pos_ratio=POS_RATIO,
#     target=TARGET,
#     max_n=1,
#     train_only=True,
#     probs_file=PROBS_PATHS[TARGET]
# )

In [11]:
# df_train = load_parquets_cudf_folds(
#     f"../output/features/fts_val_{VERSION}/*",
#     "../input/folds_4.csv",
#     fold=0,
#     pos_ratio=0.1,
#     target=TARGET,
#     train_only=True,
#     columns=['session', 'candidates', 'gt_clicks', 'gt_carts', 'gt_orders'],
#     max_n=1,
# )

In [12]:
# df_train = df_train.sort_values('session', ignore_index=True)
# group = df_train[['session', 'candidates']].groupby('session').size().values

In [13]:
FEATURES = [  # REMOVE CORRELATED
    'clicks_popularity_w_pos-log', 'clicks_popularity_w_type-163', 'clicks_popularity_w_lastday', 'clicks_popularity_w_recsys', 
    'carts_popularity_w_pos-log', 'carts_popularity_w_type-163', 'carts_popularity_w_lastday', 'carts_popularity_w_recsys', 
    'orders_popularity_w_pos-log', 'orders_popularity_w_type-163', 'orders_popularity_w_lastday', 'orders_popularity_w_recsys', 
    'clicks_popularity_w_pos-log_w', 'clicks_popularity_w_type-163_w', 'clicks_popularity_w_recsys_w', 
    'carts_popularity_w_pos-log_w', 'carts_popularity_w_type-163_w', 'carts_popularity_w_recsys_w', 
    'orders_popularity_w_pos-log_w', 'orders_popularity_w_type-163_w', 'orders_popularity_w_recsys_w',
    'w_pos-log', 'w_type-163', 'w_lastday', 'w_time', 'w_recsys',
    'matrix_123_temporal_20_mean', 'matrix_123_temporal_20_sum', 'matrix_123_temporal_20_max', 'matrix_123_temporal_20_pos-log_mean', 'matrix_123_temporal_20_pos-log_sum', 'matrix_123_temporal_20_pos-log_max', 'matrix_123_temporal_20_type-163_mean', 'matrix_123_temporal_20_type-163_sum', 'matrix_123_temporal_20_type-163_max', 'matrix_123_temporal_20_lastday_mean', 'matrix_123_temporal_20_lastday_sum', 'matrix_123_temporal_20_lastday_max', 'matrix_123_temporal_20_time_mean', 'matrix_123_temporal_20_time_sum', 'matrix_123_temporal_20_time_max', 'matrix_123_temporal_20_recsys_mean', 'matrix_123_temporal_20_recsys_sum', 'matrix_123_temporal_20_recsys_max',
    'matrix_123_type136_20_mean', 'matrix_123_type136_20_sum', 'matrix_123_type136_20_max', 'matrix_123_type136_20_pos-log_mean', 'matrix_123_type136_20_pos-log_sum', 'matrix_123_type136_20_pos-log_max', 'matrix_123_type136_20_type-163_mean', 'matrix_123_type136_20_type-163_sum', 'matrix_123_type136_20_type-163_max', 'matrix_123_type136_20_lastday_mean', 'matrix_123_type136_20_lastday_sum', 'matrix_123_type136_20_lastday_max', 'matrix_123_type136_20_time_mean', 'matrix_123_type136_20_time_sum', 'matrix_123_type136_20_time_max', 'matrix_123_type136_20_recsys_mean', 'matrix_123_type136_20_recsys_sum', 'matrix_123_type136_20_recsys_max',
    'matrix_12__20_mean', 'matrix_12__20_sum', 'matrix_12__20_max', 'matrix_12__20_pos-log_mean', 'matrix_12__20_pos-log_sum', 'matrix_12__20_pos-log_max', 'matrix_12__20_type-163_mean', 'matrix_12__20_type-163_sum', 'matrix_12__20_type-163_max', 'matrix_12__20_lastday_mean', 'matrix_12__20_lastday_sum', 'matrix_12__20_lastday_max', 'matrix_12__20_time_mean', 'matrix_12__20_time_sum', 'matrix_12__20_time_max', 'matrix_12__20_recsys_mean', 'matrix_12__20_recsys_sum', 'matrix_12__20_recsys_max',
    'matrix_123_type0.590.5_20_mean', 'matrix_123_type0.590.5_20_sum', 'matrix_123_type0.590.5_20_max', 'matrix_123_type0.590.5_20_pos-log_mean', 'matrix_123_type0.590.5_20_pos-log_sum', 'matrix_123_type0.590.5_20_pos-log_max', 'matrix_123_type0.590.5_20_type-163_mean', 'matrix_123_type0.590.5_20_type-163_sum', 'matrix_123_type0.590.5_20_type-163_max', 'matrix_123_type0.590.5_20_lastday_mean', 'matrix_123_type0.590.5_20_lastday_sum', 'matrix_123_type0.590.5_20_lastday_max', 'matrix_123_type0.590.5_20_time_mean', 'matrix_123_type0.590.5_20_time_sum', 'matrix_123_type0.590.5_20_time_max', 'matrix_123_type0.590.5_20_recsys_mean', 'matrix_123_type0.590.5_20_recsys_sum', 'matrix_123_type0.590.5_20_recsys_max',
    'matrix_cpu-90_mean', 'matrix_cpu-90_sum', 'matrix_cpu-90_max', 'matrix_cpu-90_pos-log_mean', 'matrix_cpu-90_pos-log_sum', 'matrix_cpu-90_pos-log_max', 'matrix_cpu-90_type-163_mean', 'matrix_cpu-90_type-163_sum', 'matrix_cpu-90_type-163_max', 'matrix_cpu-90_lastday_mean', 'matrix_cpu-90_lastday_sum', 'matrix_cpu-90_lastday_max', 'matrix_cpu-90_time_mean', 'matrix_cpu-90_time_sum', 'matrix_cpu-90_time_max', 'matrix_cpu-90_recsys_mean', 'matrix_cpu-90_recsys_sum', 'matrix_cpu-90_recsys_max',
    'matrix_cpu-95_mean', 'matrix_cpu-95_sum', 'matrix_cpu-95_max', 'matrix_cpu-95_pos-log_mean', 'matrix_cpu-95_pos-log_sum', 'matrix_cpu-95_pos-log_max', 'matrix_cpu-95_type-163_mean', 'matrix_cpu-95_type-163_sum', 'matrix_cpu-95_type-163_max', 'matrix_cpu-95_lastday_mean', 'matrix_cpu-95_lastday_sum', 'matrix_cpu-95_lastday_max', 'matrix_cpu-95_time_mean', 'matrix_cpu-95_time_sum', 'matrix_cpu-95_time_max', 'matrix_cpu-95_recsys_mean', 'matrix_cpu-95_recsys_sum', 'matrix_cpu-95_recsys_max',
    'matrix_cpu-99_mean', 'matrix_cpu-99_sum', 'matrix_cpu-99_max', 'matrix_cpu-99_pos-log_mean', 'matrix_cpu-99_pos-log_sum', 'matrix_cpu-99_pos-log_max', 'matrix_cpu-99_type-163_mean', 'matrix_cpu-99_type-163_sum', 'matrix_cpu-99_type-163_max', 'matrix_cpu-99_lastday_mean', 'matrix_cpu-99_lastday_sum', 'matrix_cpu-99_lastday_max', 'matrix_cpu-99_time_mean', 'matrix_cpu-99_time_sum', 'matrix_cpu-99_time_max', 'matrix_cpu-99_recsys_mean', 'matrix_cpu-99_recsys_sum', 'matrix_cpu-99_recsys_max',
    'matrix_gpu-116_mean', 'matrix_gpu-116_sum', 'matrix_gpu-116_max', 'matrix_gpu-116_pos-log_mean', 'matrix_gpu-116_pos-log_sum', 'matrix_gpu-116_pos-log_max', 'matrix_gpu-116_type-163_mean', 'matrix_gpu-116_type-163_sum', 'matrix_gpu-116_type-163_max', 'matrix_gpu-116_lastday_mean', 'matrix_gpu-116_lastday_sum', 'matrix_gpu-116_lastday_max', 'matrix_gpu-116_time_mean', 'matrix_gpu-116_time_sum', 'matrix_gpu-116_time_max', 'matrix_gpu-116_recsys_mean', 'matrix_gpu-116_recsys_sum', 'matrix_gpu-116_recsys_max',
    'matrix_gpu-115_mean', 'matrix_gpu-115_sum', 'matrix_gpu-115_max', 'matrix_gpu-115_pos-log_mean', 'matrix_gpu-115_pos-log_sum', 'matrix_gpu-115_pos-log_max', 'matrix_gpu-115_type-163_mean', 'matrix_gpu-115_type-163_sum', 'matrix_gpu-115_type-163_max', 'matrix_gpu-115_lastday_mean', 'matrix_gpu-115_lastday_sum', 'matrix_gpu-115_lastday_max', 'matrix_gpu-115_time_mean', 'matrix_gpu-115_time_sum', 'matrix_gpu-115_time_max', 'matrix_gpu-115_recsys_mean', 'matrix_gpu-115_recsys_sum', 'matrix_gpu-115_recsys_max',
    'matrix_gpu-93_mean', 'matrix_gpu-93_sum', 'matrix_gpu-93_max', 'matrix_gpu-93_pos-log_mean', 'matrix_gpu-93_pos-log_sum', 'matrix_gpu-93_pos-log_max', 'matrix_gpu-93_type-163_mean', 'matrix_gpu-93_type-163_sum', 'matrix_gpu-93_type-163_max', 'matrix_gpu-93_lastday_mean', 'matrix_gpu-93_lastday_sum', 'matrix_gpu-93_lastday_max', 'matrix_gpu-93_time_mean', 'matrix_gpu-93_time_sum', 'matrix_gpu-93_time_max', 'matrix_gpu-93_recsys_mean', 'matrix_gpu-93_recsys_sum', 'matrix_gpu-93_recsys_max',
    'matrix_gpu-217_mean', 'matrix_gpu-217_sum', 'matrix_gpu-217_max', 'matrix_gpu-217_pos-log_mean', 'matrix_gpu-217_pos-log_sum', 'matrix_gpu-217_pos-log_max', 'matrix_gpu-217_type-163_mean', 'matrix_gpu-217_type-163_sum', 'matrix_gpu-217_type-163_max', 'matrix_gpu-217_lastday_mean', 'matrix_gpu-217_lastday_sum', 'matrix_gpu-217_lastday_max', 'matrix_gpu-217_time_mean', 'matrix_gpu-217_time_sum', 'matrix_gpu-217_time_max', 'matrix_gpu-217_recsys_mean', 'matrix_gpu-217_recsys_sum', 'matrix_gpu-217_recsys_max',
    'matrix_gpu-226_mean','matrix_gpu-226_sum','matrix_gpu-226_max','matrix_gpu-226_pos-log_mean','matrix_gpu-226_pos-log_sum','matrix_gpu-226_pos-log_max','matrix_gpu-226_type-163_mean','matrix_gpu-226_type-163_sum','matrix_gpu-226_type-163_max','matrix_gpu-226_lastday_mean','matrix_gpu-226_lastday_sum','matrix_gpu-226_lastday_max','matrix_gpu-226_time_mean','matrix_gpu-226_time_sum','matrix_gpu-226_time_max','matrix_gpu-226_recsys_mean','matrix_gpu-226_recsys_sum','matrix_gpu-226_recsys_max',
    'matrix_gpu-232_mean', 'matrix_gpu-232_sum', 'matrix_gpu-232_max', 'matrix_gpu-232_pos-log_mean', 'matrix_gpu-232_pos-log_sum', 'matrix_gpu-232_pos-log_max', 'matrix_gpu-232_type-163_mean', 'matrix_gpu-232_type-163_sum', 'matrix_gpu-232_type-163_max', 'matrix_gpu-232_lastday_mean', 'matrix_gpu-232_lastday_sum', 'matrix_gpu-232_lastday_max', 'matrix_gpu-232_time_mean', 'matrix_gpu-232_time_sum', 'matrix_gpu-232_time_max', 'matrix_gpu-232_recsys_mean', 'matrix_gpu-232_recsys_sum', 'matrix_gpu-232_recsys_max',
    'matrix_gpu-239_mean', 'matrix_gpu-239_sum', 'matrix_gpu-239_max', 'matrix_gpu-239_pos-log_mean', 'matrix_gpu-239_pos-log_sum', 'matrix_gpu-239_pos-log_max', 'matrix_gpu-239_type-163_mean', 'matrix_gpu-239_type-163_sum', 'matrix_gpu-239_type-163_max', 'matrix_gpu-239_lastday_mean', 'matrix_gpu-239_lastday_sum', 'matrix_gpu-239_lastday_max', 'matrix_gpu-239_time_mean', 'matrix_gpu-239_time_sum', 'matrix_gpu-239_time_max', 'matrix_gpu-239_recsys_mean', 'matrix_gpu-239_recsys_sum', 'matrix_gpu-239_recsys_max',
    'matrix_gpu-700_mean', 'matrix_gpu-700_sum', 'matrix_gpu-700_max', 'matrix_gpu-700_pos-log_mean', 'matrix_gpu-700_pos-log_sum', 'matrix_gpu-700_pos-log_max', 'matrix_gpu-700_type-163_mean', 'matrix_gpu-700_type-163_sum', 'matrix_gpu-700_type-163_max', 'matrix_gpu-700_lastday_mean', 'matrix_gpu-700_lastday_sum', 'matrix_gpu-700_lastday_max', 'matrix_gpu-700_time_mean', 'matrix_gpu-700_time_sum', 'matrix_gpu-700_time_max', 'matrix_gpu-700_recsys_mean', 'matrix_gpu-700_recsys_sum', 'matrix_gpu-700_recsys_max',
    'matrix_gpu-701_mean', 'matrix_gpu-701_sum', 'matrix_gpu-701_max', 'matrix_gpu-701_pos-log_mean', 'matrix_gpu-701_pos-log_sum', 'matrix_gpu-701_pos-log_max', 'matrix_gpu-701_type-163_mean', 'matrix_gpu-701_type-163_sum', 'matrix_gpu-701_type-163_max', 'matrix_gpu-701_lastday_mean', 'matrix_gpu-701_lastday_sum', 'matrix_gpu-701_lastday_max', 'matrix_gpu-701_time_mean', 'matrix_gpu-701_time_sum', 'matrix_gpu-701_time_max', 'matrix_gpu-701_recsys_mean', 'matrix_gpu-701_recsys_sum', 'matrix_gpu-701_recsys_max',
    'candidate_clicks_before', 'candidate_carts_before', 'candidate_orders_before', 'candidate_*_before', 'n_views', 'n_clicks', 'n_carts', 'n_orders',
    'clicks_popularity_w_pos-log_rank', 'clicks_popularity_w_type-163_rank', 'clicks_popularity_w_lastday_rank', 'clicks_popularity_w_time_rank', 'clicks_popularity_w_recsys_rank', 'carts_popularity_w_pos-log_rank', 'carts_popularity_w_type-163_rank', 'carts_popularity_w_lastday_rank', 'carts_popularity_w_time_rank', 'carts_popularity_w_recsys_rank', 'orders_popularity_w_pos-log_rank', 'orders_popularity_w_type-163_rank', 'orders_popularity_w_lastday_rank', 'orders_popularity_w_time_rank', 'orders_popularity_w_recsys_rank',
    'clicks_popularity_w_pos-log_w_rank', 'clicks_popularity_w_type-163_w_rank', 'clicks_popularity_w_lastday_w_rank', 'clicks_popularity_w_time_w_rank', 'clicks_popularity_w_recsys_w_rank', 'carts_popularity_w_pos-log_w_rank', 'carts_popularity_w_type-163_w_rank', 'carts_popularity_w_lastday_w_rank', 'carts_popularity_w_time_w_rank', 'carts_popularity_w_recsys_w_rank', 'orders_popularity_w_pos-log_w_rank', 'orders_popularity_w_type-163_w_rank', 'orders_popularity_w_lastday_w_rank', 'orders_popularity_w_time_w_rank', 'orders_popularity_w_recsys_w_rank',
    'w_pos-log_rank', 'w_type-163_rank', 'w_lastday_rank', 'w_time_rank', 'w_recsys_rank',
    'matrix_123_temporal_20_mean_rank', 'matrix_123_temporal_20_pos-log_mean_rank', 'matrix_123_temporal_20_type-163_mean_rank', 'matrix_123_temporal_20_lastday_mean_rank', 'matrix_123_temporal_20_time_mean_rank', 'matrix_123_temporal_20_recsys_mean_rank', 'matrix_123_type136_20_mean_rank', 'matrix_123_type136_20_pos-log_mean_rank', 'matrix_123_type136_20_type-163_mean_rank', 'matrix_123_type136_20_lastday_mean_rank', 'matrix_123_type136_20_time_mean_rank', 'matrix_123_type136_20_recsys_mean_rank', 
    'matrix_12__20_mean_rank', 'matrix_12__20_pos-log_mean_rank', 'matrix_12__20_type-163_mean_rank', 'matrix_12__20_lastday_mean_rank', 'matrix_12__20_time_mean_rank', 'matrix_12__20_recsys_mean_rank', 'matrix_123_type0.590.5_20_mean_rank', 'matrix_123_type0.590.5_20_pos-log_mean_rank', 'matrix_123_type0.590.5_20_type-163_mean_rank', 'matrix_123_type0.590.5_20_lastday_mean_rank', 'matrix_123_type0.590.5_20_time_mean_rank', 'matrix_123_type0.590.5_20_recsys_mean_rank',
    'matrix_cpu-90_mean_rank', 'matrix_cpu-90_pos-log_mean_rank', 'matrix_cpu-90_type-163_mean_rank', 'matrix_cpu-90_lastday_mean_rank', 'matrix_cpu-90_time_mean_rank', 'matrix_cpu-90_recsys_mean_rank', 'matrix_cpu-95_mean_rank', 'matrix_cpu-95_pos-log_mean_rank', 'matrix_cpu-95_type-163_mean_rank', 'matrix_cpu-95_lastday_mean_rank', 'matrix_cpu-95_time_mean_rank', 'matrix_cpu-95_recsys_mean_rank', 'matrix_cpu-99_mean_rank', 'matrix_cpu-99_pos-log_mean_rank', 'matrix_cpu-99_type-163_mean_rank', 'matrix_cpu-99_lastday_mean_rank', 'matrix_cpu-99_time_mean_rank', 'matrix_cpu-99_recsys_mean_rank',
    'matrix_gpu-116_mean_rank', 'matrix_gpu-116_pos-log_mean_rank', 'matrix_gpu-116_type-163_mean_rank', 'matrix_gpu-116_lastday_mean_rank', 'matrix_gpu-116_time_mean_rank', 'matrix_gpu-116_recsys_mean_rank', 'matrix_gpu-115_mean_rank', 'matrix_gpu-115_pos-log_mean_rank', 'matrix_gpu-115_type-163_mean_rank', 'matrix_gpu-115_lastday_mean_rank', 'matrix_gpu-115_time_mean_rank', 'matrix_gpu-115_recsys_mean_rank', 'matrix_gpu-93_mean_rank', 'matrix_gpu-93_pos-log_mean_rank', 'matrix_gpu-93_type-163_mean_rank', 'matrix_gpu-93_lastday_mean_rank', 'matrix_gpu-93_time_mean_rank', 'matrix_gpu-93_recsys_mean_rank',
    'matrix_gpu-217_mean_rank', 'matrix_gpu-217_pos-log_mean_rank', 'matrix_gpu-217_type-163_mean_rank', 'matrix_gpu-217_lastday_mean_rank', 'matrix_gpu-217_time_mean_rank', 'matrix_gpu-217_recsys_mean_rank', 'matrix_gpu-226_mean_rank', 'matrix_gpu-226_pos-log_mean_rank', 'matrix_gpu-226_type-163_mean_rank', 'matrix_gpu-226_lastday_mean_rank', 'matrix_gpu-226_time_mean_rank', 'matrix_gpu-226_recsys_mean_rank', 'matrix_gpu-232_mean_rank', 'matrix_gpu-232_pos-log_mean_rank', 'matrix_gpu-232_type-163_mean_rank', 'matrix_gpu-232_lastday_mean_rank', 'matrix_gpu-232_time_mean_rank', 'matrix_gpu-232_recsys_mean_rank',
    'matrix_gpu-239_mean_rank', 'matrix_gpu-239_pos-log_mean_rank', 'matrix_gpu-239_type-163_mean_rank', 'matrix_gpu-239_lastday_mean_rank', 'matrix_gpu-239_time_mean_rank', 'matrix_gpu-239_recsys_mean_rank', 'matrix_gpu-700_mean_rank', 'matrix_gpu-700_pos-log_mean_rank', 'matrix_gpu-700_type-163_mean_rank', 'matrix_gpu-700_lastday_mean_rank', 'matrix_gpu-700_time_mean_rank', 'matrix_gpu-700_recsys_mean_rank', 'matrix_gpu-701_mean_rank', 'matrix_gpu-701_pos-log_mean_rank', 'matrix_gpu-701_type-163_mean_rank', 'matrix_gpu-701_lastday_mean_rank', 'matrix_gpu-701_time_mean_rank', 'matrix_gpu-701_recsys_mean_rank',
]

In [14]:
FEATURES += [
    'popularity_week_clicks','popularity_day_clicks','popularity_hour_clicks','popularity_hour/day_clicks','popularity_day/week_clicks','popularity_week_carts','popularity_day_carts','popularity_hour_carts','popularity_hour/day_carts','popularity_day/week_carts','popularity_week_orders','popularity_day_orders','popularity_hour_orders','popularity_hour/day_orders','popularity_day/week_orders',
    'embed_1-9_64_cartbuy_last_0', 'embed_1-9_64_cartbuy_last_1', 'embed_1-9_64_cartbuy_last_2', 'embed_1-9_64_cartbuy_last_3', 'embed_1-9_64_cartbuy_last_4', 'embed_1-9_64_cartbuy_pos-log_mean', 'embed_1-9_64_cartbuy_pos-log_sum', 'embed_1-9_64_cartbuy_pos-log_max', 'embed_1-9_64_cartbuy_type-163_mean', 'embed_1-9_64_cartbuy_type-163_sum', 'embed_1-9_64_cartbuy_type-163_max', 'embed_1-9_64_cartbuy_lastday_mean', 'embed_1-9_64_cartbuy_lastday_sum', 'embed_1-9_64_cartbuy_lastday_max', 'embed_1-9_64_cartbuy_time_mean', 'embed_1-9_64_cartbuy_time_sum', 'embed_1-9_64_cartbuy_time_max', 'embed_1-9_64_cartbuy_recsys_mean', 'embed_1-9_64_cartbuy_recsys_sum', 'embed_1-9_64_cartbuy_recsys_max',
    'embed_1_64_last_0', 'embed_1_64_last_1', 'embed_1_64_last_2', 'embed_1_64_last_3', 'embed_1_64_last_4', 'embed_1_64_pos-log_mean', 'embed_1_64_pos-log_sum', 'embed_1_64_pos-log_max', 'embed_1_64_type-163_mean', 'embed_1_64_type-163_sum', 'embed_1_64_type-163_max', 'embed_1_64_lastday_mean', 'embed_1_64_lastday_sum', 'embed_1_64_lastday_max', 'embed_1_64_time_mean', 'embed_1_64_time_sum', 'embed_1_64_time_max', 'embed_1_64_recsys_mean', 'embed_1_64_recsys_sum', 'embed_1_64_recsys_max',
    'embed_1-5_64_last_0', 'embed_1-5_64_last_1', 'embed_1-5_64_last_2', 'embed_1-5_64_last_3', 'embed_1-5_64_last_4', 'embed_1-5_64_pos-log_mean', 'embed_1-5_64_pos-log_sum', 'embed_1-5_64_pos-log_max', 'embed_1-5_64_type-163_mean', 'embed_1-5_64_type-163_sum', 'embed_1-5_64_type-163_max', 'embed_1-5_64_lastday_mean', 'embed_1-5_64_lastday_sum', 'embed_1-5_64_lastday_max', 'embed_1-5_64_time_mean', 'embed_1-5_64_time_sum', 'embed_1-5_64_time_max', 'embed_1-5_64_recsys_mean', 'embed_1-5_64_recsys_sum', 'embed_1-5_64_recsys_max',
]


In [15]:
FEATURES += [
    'popularity_week_clicks_rank', 'popularity_day_clicks_rank', 'popularity_hour_clicks_rank', 'popularity_hour/day_clicks_rank', 'popularity_day/week_clicks_rank', 'popularity_week_carts_rank', 'popularity_day_carts_rank', 'popularity_hour_carts_rank', 'popularity_hour/day_carts_rank', 'popularity_day/week_carts_rank', 'popularity_week_orders_rank', 'popularity_day_orders_rank', 'popularity_hour_orders_rank', 'popularity_hour/day_orders_rank', 'popularity_day/week_orders_rank',
    'embed_1-9_64_cartbuy_last_0_rank', 'embed_1-9_64_cartbuy_last_1_rank', 'embed_1-9_64_cartbuy_last_2_rank', 'embed_1-9_64_cartbuy_last_3_rank', 'embed_1-9_64_cartbuy_last_4_rank', 'embed_1-9_64_cartbuy_pos-log_mean_rank', 'embed_1-9_64_cartbuy_type-163_mean_rank', 'embed_1-9_64_cartbuy_lastday_mean_rank', 'embed_1-9_64_cartbuy_time_mean_rank', 'embed_1-9_64_cartbuy_recsys_mean_rank', 'embed_1_64_last_0_rank', 'embed_1_64_last_1_rank', 'embed_1_64_last_2_rank', 'embed_1_64_last_3_rank', 'embed_1_64_last_4_rank', 'embed_1_64_pos-log_mean_rank', 'embed_1_64_type-163_mean_rank', 'embed_1_64_lastday_mean_rank', 'embed_1_64_time_mean_rank', 'embed_1_64_recsys_mean_rank', 'embed_1-5_64_last_0_rank', 'embed_1-5_64_last_1_rank', 'embed_1-5_64_last_2_rank', 'embed_1-5_64_last_3_rank', 'embed_1-5_64_last_4_rank', 'embed_1-5_64_pos-log_mean_rank', 'embed_1-5_64_type-163_mean_rank', 'embed_1-5_64_lastday_mean_rank', 'embed_1-5_64_time_mean_rank', 'embed_1-5_64_recsys_mean_rank'
]

In [16]:
TO_REMOVE = []
TO_REMOVE += [f for f in FEATURES if "popularity_w_time" in f]
TO_REMOVE += [f for f in FEATURES if "popularity_w_lastday_w" in f]

FEATURES = [f for f in FEATURES if f not in TO_REMOVE]

In [17]:
len(FEATURES)

564

In [18]:
# df_train = cudf.from_pandas(df_train)
# corr = df_train[FEATURES].corr()
# corr = corr.to_pandas()
# corr = corr.values

# mask = np.zeros_like(corr, dtype=bool)
# mask[np.triu_indices_from(mask)] = True
# corr[mask] = 0

In [19]:
# TH = 0.99

# for i in range(len(corr)):
#     for j in range(len(corr)):
#         if corr[i, j] > TH:
#             if FEATURES[i] in TO_REMOVE or FEATURES[j] in TO_REMOVE:
#                 continue
#             print(FEATURES[i], FEATURES[j], f'{corr[i, j] :.3f}')

In [20]:
# df = cudf.read_parquet(glob.glob(REGEX)[0])
# df = df.rename(columns={"clicks_popularity_w_pos-log_rank" : "clicks_popularity_w_pos-log_rank_ref"})
# df = cudf.read_parquet(glob.glob(TEST_REGEX)[0])

# from data.fe import add_rank_feature
# for c in ['clicks_popularity_w_pos-log', 'clicks_popularity_w_type-163', 'clicks_popularity_w_lastday']:
#     if c + "_rank" not in df.columns:
#         print(f'Add rank ft for {c}')
#         df = df.reset_index(drop=True)
#         add_rank_feature(df, c)
# (df['clicks_popularity_w_pos-log_rank'] == df['clicks_popularity_w_pos-log_rank_ref']).all()

# for f in tqdm(glob.glob(TEST_REGEX)):
#     dft = cudf.read_parquet(f, columns=['clicks_popularity_w_pos-log_rank'])

In [21]:
if TARGET != "gt_clicks":
    REGEX = f"../output/features/fts_val_{VERSION}_{TARGET}/*"
else:
    REGEX = f"../output/features/fts_val_{VERSION}/*"
len(glob.glob(REGEX))

91

In [22]:
TEST_REGEX = f"../output/features/fts_test_{VERSION}/*"
len(glob.glob(TEST_REGEX))

85

In [23]:
GT_REGEX = f"../output/features/fts_val_{GT_VERSION}/*"
len(glob.glob(GT_REGEX))

0

### Params

In [24]:
PARAMS = {
    "xgb":
    {
        "learning_rate": 0.01,
        'max_depth': 8,
        "subsample": 0.9,  # 0.7 / 0.8 / O.9
        'colsample_bytree': 0.7,  # 0.7 / 0.8 / 0.9
        'reg_alpha': 0.01,
        'reg_lambda': 0.1,
        "min_child_weight": 0,
#         "gamma": 0.01,
#         'scale_pos_weight': 1,
        'eval_metric': 'auc',
        'objective': 'binary:logistic',  # 'binary:logistic',
        'tree_method':'gpu_hist',
        'predictor':'gpu_predictor',
        "random_state": 42,
    },
}

In [25]:
# TO_REMOVE = [
#     'candidate_*_before', 'matrix_gpu-700_lastday_max', 'matrix_12__20_lastday_max', 'matrix_gpu-226_lastday_max', 'matrix_cpu-90_lastday_max', 'matrix_gpu-700_sum', 'matrix_gpu-700_pos-log_sum', 'matrix_12__20_lastday_sum',
#     'matrix_gpu-700_pos-log_max', 'matrix_gpu-226_lastday_sum', 'matrix_gpu-700_max', 'matrix_gpu-700_time_sum', 'matrix_123_type136_20_lastday_max', 'matrix_cpu-90_lastday_sum', 'matrix_cpu-90_type-163_max', 'matrix_gpu-700_time_max',
#     'matrix_12__20_time_sum', 'matrix_gpu-700_type-163_sum', 'matrix_gpu-700_lastday_sum', 'matrix_gpu-700_type-163_max', 'matrix_cpu-90_time_sum', 'matrix_123_type136_20_time_sum', 'matrix_gpu-217_lastday_max', 'matrix_12__20_pos-log_sum',
#     'matrix_12__20_type-163_max', 'matrix_12__20_time_max', 'matrix_cpu-90_max', 'matrix_cpu-90_type-163_sum', 'matrix_cpu-99_lastday_max', 'matrix_cpu-90_sum', 'matrix_gpu-226_sum', 'matrix_gpu-226_time_sum', 'matrix_12__20_time_mean',
#     'matrix_12__20_type-163_mean', 'matrix_gpu-700_pos-log_mean', 'matrix_123_type0.590.5_20_lastday_max', 'matrix_gpu-700_time_mean', 'matrix_12__20_type-163_sum', 'matrix_12__20_pos-log_max', 'matrix_123_type136_20_lastday_sum',
#     'matrix_cpu-90_time_mean', 'matrix_gpu-226_max', 'matrix_123_type136_20_type-163_max', 'matrix_gpu-226_type-163_max', 'matrix_gpu-226_lastday_mean', 'matrix_gpu-226_type-163_sum', 'matrix_cpu-99_time_sum', 'matrix_12__20_lastday_mean',
#     'matrix_gpu-700_type-163_mean','matrix_123_type136_20_type-163_sum'
# ][:50]

In [26]:
class Config:
    seed = 42
    version = VERSION
    
    folds_file = "../input/folds_4.csv"
    k = 4
    mode = ""

    features = FEATURES
#     features = [ft for ft in features if ft not in TO_REMOVE]

    cat_features = []

    target = TARGET
    pos_ratio = POS_RATIO

    use_gt_sessions = True  # filter out sessions with no gt
    use_gt_pos = False  # add candidates from gt
    gt_regex = GT_REGEX
    
    model = "xgb"

    params = PARAMS[model]

    use_es = True
    num_boost_round = 10000
    
    probs_file = None  # PROBS_PATHS[target]
    probs_mode = ""  # "head"  "rank_40"
    restrict_all = False
    
    selected_folds = [0]
    folds_optimize = [0, 1, 2, 3]
    n_trials = 20

### Main

In [27]:
log_folder = None

#### Optimize

In [28]:
# DEBUG = False
# DEBUG_MORE = True

In [29]:
# %%time

# if not DEBUG:
#     log_folder = prepare_log_folder(LOG_PATH)
#     print(f'Logging results to {log_folder}')
#     save_config(Config, log_folder + 'config')
#     create_logger(directory=log_folder, name="logs.txt")

# study = optimize(REGEX, Config, log_folder=log_folder, n_trials=1, debug=DEBUG_MORE)

In [30]:
# PARAMS['xgb'].update(study.best_params)
# Config.params = PARAMS['xgb']

### Train

In [31]:
DEBUG = False
DEBUG_MORE = False

In [None]:
%%time

if not DEBUG:
    if log_folder is None:
        log_folder = prepare_log_folder(LOG_PATH)
        print(f'Logging results to {log_folder}')
        create_logger(directory=log_folder, name="logs.txt")

    save_config(Config, log_folder + 'config')

df_val, ft_imp = kfold(REGEX, TEST_REGEX, Config, log_folder=log_folder, debug=DEBUG_MORE)

Logging results to ../logs/2023-01-20/34/


Train / val session inter 0

-------------  Optimizing XGB Model  -------------


    -> 2.51M training candidates
    -> 3.22M validation candidates



[32m[I 2023-01-20 19:08:53,258][0m A new study created in memory with name: no-name-469fb83a-e256-46d7-9596-e356377e0445[0m


[0]	val-auc:0.88244
[100]	val-auc:0.89146
[200]	val-auc:0.89478
[300]	val-auc:0.89781
[400]	val-auc:0.90004
[500]	val-auc:0.90165
[600]	val-auc:0.90283
[700]	val-auc:0.90362
[800]	val-auc:0.90424
[900]	val-auc:0.90471
[1000]	val-auc:0.90513
[1100]	val-auc:0.90547
[1200]	val-auc:0.90577
[1300]	val-auc:0.90606
[1400]	val-auc:0.90629
[1500]	val-auc:0.90649
[1600]	val-auc:0.90669
[1700]	val-auc:0.90686
[1800]	val-auc:0.90701
[1900]	val-auc:0.90715
[2000]	val-auc:0.90729
[2100]	val-auc:0.90741
[2200]	val-auc:0.90752
[2300]	val-auc:0.90763
[2400]	val-auc:0.90774
[2500]	val-auc:0.90783
[2600]	val-auc:0.90791
[2700]	val-auc:0.90798
[2800]	val-auc:0.90806
[2900]	val-auc:0.90813
[3000]	val-auc:0.90819
[3100]	val-auc:0.90826
[3200]	val-auc:0.90832
[3300]	val-auc:0.90839
[3400]	val-auc:0.90844
[3500]	val-auc:0.90850
[3600]	val-auc:0.90856
[3700]	val-auc:0.90861
[3800]	val-auc:0.90865
[3900]	val-auc:0.90870
[4000]	val-auc:0.90875
[4100]	val-auc:0.90879
[4200]	val-auc:0.90883
[4300]	val-auc:0.90886


[32m[I 2023-01-20 19:26:39,789][0m Trial 0 finished with value: 0.5578270095142486 and parameters: {'max_depth': 6, 'subsample': 0.7853550380143092, 'colsample_bytree': 0.5133383081370366, 'reg_alpha': 0.0001563898055592192, 'reg_lambda': 0.018465748971833115}. Best is trial 0 with value: 0.5578270095142486.[0m


[0]	val-auc:0.88546
[100]	val-auc:0.89378
[200]	val-auc:0.89699
[300]	val-auc:0.89960
[400]	val-auc:0.90166
[500]	val-auc:0.90322
[600]	val-auc:0.90422
[700]	val-auc:0.90494
[800]	val-auc:0.90548
[900]	val-auc:0.90591
[1000]	val-auc:0.90627
[1100]	val-auc:0.90658
[1200]	val-auc:0.90685
[1300]	val-auc:0.90709
[1400]	val-auc:0.90730
[1500]	val-auc:0.90749
[1600]	val-auc:0.90764
[1700]	val-auc:0.90780
[1800]	val-auc:0.90794
[1900]	val-auc:0.90807
[2000]	val-auc:0.90818
[2100]	val-auc:0.90828
[2200]	val-auc:0.90837
[2300]	val-auc:0.90847
[2400]	val-auc:0.90855
[2500]	val-auc:0.90862
[2600]	val-auc:0.90869
[2700]	val-auc:0.90877
[2800]	val-auc:0.90883
[2900]	val-auc:0.90890
[3000]	val-auc:0.90896
[3100]	val-auc:0.90901
[3200]	val-auc:0.90907
[3300]	val-auc:0.90912
[3400]	val-auc:0.90916
[3500]	val-auc:0.90920
[3600]	val-auc:0.90924
[3700]	val-auc:0.90928
[3800]	val-auc:0.90933
[3900]	val-auc:0.90937
[4000]	val-auc:0.90940
[4100]	val-auc:0.90942
[4200]	val-auc:0.90946
[4300]	val-auc:0.90949


[32m[I 2023-01-20 19:44:53,414][0m Trial 1 finished with value: 0.5577357457391225 and parameters: {'max_depth': 7, 'subsample': 0.8760105579274585, 'colsample_bytree': 0.8850552081831782, 'reg_alpha': 0.0005583356664726724, 'reg_lambda': 0.001035868665215054}. Best is trial 0 with value: 0.5578270095142486.[0m


[0]	val-auc:0.88896
[100]	val-auc:0.89956
[200]	val-auc:0.90186
[300]	val-auc:0.90372
[400]	val-auc:0.90510
[500]	val-auc:0.90616
[600]	val-auc:0.90687
[700]	val-auc:0.90737
[800]	val-auc:0.90778
[900]	val-auc:0.90812
[1000]	val-auc:0.90838
[1100]	val-auc:0.90861
[1200]	val-auc:0.90879
[1300]	val-auc:0.90895
[1400]	val-auc:0.90910
[1500]	val-auc:0.90922
[1600]	val-auc:0.90933
[1700]	val-auc:0.90942
[1800]	val-auc:0.90951
[1900]	val-auc:0.90959
[2000]	val-auc:0.90964
[2100]	val-auc:0.90969
[2200]	val-auc:0.90974
[2300]	val-auc:0.90977
[2400]	val-auc:0.90980
[2500]	val-auc:0.90982
[2600]	val-auc:0.90986
[2700]	val-auc:0.90989
[2800]	val-auc:0.90990
[2900]	val-auc:0.90993
[3000]	val-auc:0.90994
[3100]	val-auc:0.90997
[3200]	val-auc:0.90999
[3300]	val-auc:0.91002
[3400]	val-auc:0.91004
[3500]	val-auc:0.91005
[3600]	val-auc:0.91007
[3700]	val-auc:0.91008
[3800]	val-auc:0.91009
[3859]	val-auc:0.91009

-> gt_clicks  -  Recall : 0.5590

Params : {'max_depth': 10, 'subsample': '0.639', 'colsamp

[32m[I 2023-01-20 20:00:33,404][0m Trial 2 finished with value: 0.5589678067033242 and parameters: {'max_depth': 10, 'subsample': 0.6385207163111063, 'colsample_bytree': 0.776505044639317, 'reg_alpha': 0.0018413627504815383, 'reg_lambda': 1.1315085942425098e-05}. Best is trial 2 with value: 0.5589678067033242.[0m


[0]	val-auc:0.88177
[100]	val-auc:0.89107
[200]	val-auc:0.89465
[300]	val-auc:0.89774
[400]	val-auc:0.90002
[500]	val-auc:0.90167
[600]	val-auc:0.90286
[700]	val-auc:0.90368
[800]	val-auc:0.90429
[900]	val-auc:0.90478
[1000]	val-auc:0.90518
[1100]	val-auc:0.90554
[1200]	val-auc:0.90586
[1300]	val-auc:0.90613
[1400]	val-auc:0.90637
[1500]	val-auc:0.90658
[1600]	val-auc:0.90676
[1700]	val-auc:0.90695
[1800]	val-auc:0.90711
[1900]	val-auc:0.90726
[2000]	val-auc:0.90740
[2100]	val-auc:0.90752
[2200]	val-auc:0.90763
[2300]	val-auc:0.90774
[2400]	val-auc:0.90783
[2500]	val-auc:0.90792
[2600]	val-auc:0.90801
[2700]	val-auc:0.90809
[2800]	val-auc:0.90817
[2900]	val-auc:0.90824
[3000]	val-auc:0.90831
[3100]	val-auc:0.90837
[3200]	val-auc:0.90844
[3300]	val-auc:0.90850
[3400]	val-auc:0.90856
[3500]	val-auc:0.90862
[3600]	val-auc:0.90868
[3700]	val-auc:0.90873
[3800]	val-auc:0.90878
[3900]	val-auc:0.90882
[4000]	val-auc:0.90887
[4100]	val-auc:0.90891
[4200]	val-auc:0.90896
[4300]	val-auc:0.90900


[32m[I 2023-01-20 20:18:05,676][0m Trial 3 finished with value: 0.5576216660202149 and parameters: {'max_depth': 6, 'subsample': 0.687117153312204, 'colsample_bytree': 0.8119906509953599, 'reg_alpha': 0.00021525738448087233, 'reg_lambda': 0.8694657457270097}. Best is trial 2 with value: 0.5589678067033242.[0m


[0]	val-auc:0.88941
[100]	val-auc:0.89777
[200]	val-auc:0.90041
[300]	val-auc:0.90254
[400]	val-auc:0.90419
[500]	val-auc:0.90543
[600]	val-auc:0.90625
[700]	val-auc:0.90681
[800]	val-auc:0.90724
[900]	val-auc:0.90760
[1000]	val-auc:0.90790
[1100]	val-auc:0.90814
[1200]	val-auc:0.90835
[1300]	val-auc:0.90856
[1400]	val-auc:0.90870
[1500]	val-auc:0.90885
[1600]	val-auc:0.90897
[1700]	val-auc:0.90908
[1800]	val-auc:0.90917
[1900]	val-auc:0.90928
[2000]	val-auc:0.90934
[2100]	val-auc:0.90942
[2200]	val-auc:0.90947
[2300]	val-auc:0.90953
[2400]	val-auc:0.90958
[2500]	val-auc:0.90962
[2600]	val-auc:0.90966
[2700]	val-auc:0.90969
[2800]	val-auc:0.90974
[2900]	val-auc:0.90978
[3000]	val-auc:0.90982
[3100]	val-auc:0.90985
[3200]	val-auc:0.90989
[3300]	val-auc:0.90992
[3400]	val-auc:0.90996
[3500]	val-auc:0.90998
[3600]	val-auc:0.91001
[3700]	val-auc:0.91003
[3800]	val-auc:0.91006
[3900]	val-auc:0.91009
[4000]	val-auc:0.91009
[4034]	val-auc:0.91009

-> gt_clicks  -  Recall : 0.5582

Params : {'

[32m[I 2023-01-20 20:30:58,402][0m Trial 4 finished with value: 0.5581692486709713 and parameters: {'max_depth': 9, 'subsample': 0.7863213458103926, 'colsample_bytree': 0.9329001546449827, 'reg_alpha': 0.04358135322804268, 'reg_lambda': 0.0003160794024366034}. Best is trial 2 with value: 0.5589678067033242.[0m


[0]	val-auc:0.88280
[100]	val-auc:0.89149
[200]	val-auc:0.89475
[300]	val-auc:0.89780
[400]	val-auc:0.89999
[500]	val-auc:0.90163
[600]	val-auc:0.90278
[700]	val-auc:0.90356
[800]	val-auc:0.90419
[900]	val-auc:0.90467
[1000]	val-auc:0.90509
[1100]	val-auc:0.90545
[1200]	val-auc:0.90575
[1300]	val-auc:0.90602
[1400]	val-auc:0.90625
[1500]	val-auc:0.90647
[1600]	val-auc:0.90666
[1700]	val-auc:0.90683
[1800]	val-auc:0.90699
[1900]	val-auc:0.90714
[2000]	val-auc:0.90727
[2100]	val-auc:0.90741
[2200]	val-auc:0.90752
[2300]	val-auc:0.90764
[2400]	val-auc:0.90774
[2500]	val-auc:0.90783
[2600]	val-auc:0.90792
[2700]	val-auc:0.90799
[2800]	val-auc:0.90808
[2900]	val-auc:0.90815
[3000]	val-auc:0.90823
[3100]	val-auc:0.90830
[3200]	val-auc:0.90836
[3300]	val-auc:0.90842
[3400]	val-auc:0.90848
[3500]	val-auc:0.90852
[3600]	val-auc:0.90858
[3700]	val-auc:0.90862
[3800]	val-auc:0.90867
[3900]	val-auc:0.90872
[4000]	val-auc:0.90877
[4100]	val-auc:0.90881
[4200]	val-auc:0.90884
[4300]	val-auc:0.90888


[32m[I 2023-01-20 20:48:11,808][0m Trial 5 finished with value: 0.5578954573455931 and parameters: {'max_depth': 6, 'subsample': 0.8791578686043338, 'colsample_bytree': 0.5136646784249935, 'reg_alpha': 0.0007205795937555965, 'reg_lambda': 7.103064871325849e-06}. Best is trial 2 with value: 0.5589678067033242.[0m


[0]	val-auc:0.88532
[100]	val-auc:0.89387
[200]	val-auc:0.89695
[300]	val-auc:0.89965
[400]	val-auc:0.90169
[500]	val-auc:0.90322
[600]	val-auc:0.90423
[700]	val-auc:0.90494
[800]	val-auc:0.90548
[900]	val-auc:0.90590
[1000]	val-auc:0.90630
[1100]	val-auc:0.90663
[1200]	val-auc:0.90688
[1300]	val-auc:0.90711
[1400]	val-auc:0.90732
[1500]	val-auc:0.90750
[1600]	val-auc:0.90768
[1700]	val-auc:0.90784
[1800]	val-auc:0.90797
[1900]	val-auc:0.90808
[2000]	val-auc:0.90819
[2100]	val-auc:0.90830
[2200]	val-auc:0.90840
[2300]	val-auc:0.90849
[2400]	val-auc:0.90857
[2500]	val-auc:0.90864
[2600]	val-auc:0.90872
[2700]	val-auc:0.90879
[2800]	val-auc:0.90886
[2900]	val-auc:0.90892
[3000]	val-auc:0.90897
[3100]	val-auc:0.90903
[3200]	val-auc:0.90907
[3300]	val-auc:0.90912
[3400]	val-auc:0.90917
[3500]	val-auc:0.90920
[3600]	val-auc:0.90925
[3700]	val-auc:0.90929
[3800]	val-auc:0.90933
[3900]	val-auc:0.90937
[4000]	val-auc:0.90940
[4100]	val-auc:0.90943
[4200]	val-auc:0.90946
[4300]	val-auc:0.90950


[32m[I 2023-01-20 21:07:13,188][0m Trial 6 finished with value: 0.5579182732893746 and parameters: {'max_depth': 7, 'subsample': 0.9386504406954178, 'colsample_bytree': 0.6609467700877397, 'reg_alpha': 0.006254301632233858, 'reg_lambda': 0.015232967859509734}. Best is trial 2 with value: 0.5589678067033242.[0m


[0]	val-auc:0.88272
[100]	val-auc:0.89149
[200]	val-auc:0.89471
[300]	val-auc:0.89775
[400]	val-auc:0.90000
[500]	val-auc:0.90164
[600]	val-auc:0.90277
[700]	val-auc:0.90356
[800]	val-auc:0.90418
[900]	val-auc:0.90467
[1000]	val-auc:0.90507
[1100]	val-auc:0.90544
[1200]	val-auc:0.90574
[1300]	val-auc:0.90602
[1400]	val-auc:0.90625
[1500]	val-auc:0.90645
[1600]	val-auc:0.90664
[1700]	val-auc:0.90681
[1800]	val-auc:0.90697
[1900]	val-auc:0.90711
[2000]	val-auc:0.90724
[2100]	val-auc:0.90737
[2200]	val-auc:0.90748
[2300]	val-auc:0.90759
[2400]	val-auc:0.90770
[2500]	val-auc:0.90779
[2600]	val-auc:0.90787
[2700]	val-auc:0.90796
[2800]	val-auc:0.90804
[2900]	val-auc:0.90811
[3000]	val-auc:0.90818
[3100]	val-auc:0.90825
[3200]	val-auc:0.90831
[3300]	val-auc:0.90837
[3400]	val-auc:0.90842
[3500]	val-auc:0.90848
[3600]	val-auc:0.90854
[3700]	val-auc:0.90860
[3800]	val-auc:0.90865
[3900]	val-auc:0.90869
[4000]	val-auc:0.90874
[4100]	val-auc:0.90878
[4200]	val-auc:0.90882
[4300]	val-auc:0.90886


[32m[I 2023-01-20 21:24:28,926][0m Trial 7 finished with value: 0.5574847703575259 and parameters: {'max_depth': 6, 'subsample': 0.8022895120339223, 'colsample_bytree': 0.5394880616783784, 'reg_alpha': 0.00017543436186804792, 'reg_lambda': 0.0001667222907570356}. Best is trial 2 with value: 0.5589678067033242.[0m


[0]	val-auc:0.88658
[100]	val-auc:0.89614
[200]	val-auc:0.89890
[300]	val-auc:0.90132
[400]	val-auc:0.90312
[500]	val-auc:0.90449
[600]	val-auc:0.90533
[700]	val-auc:0.90598
[800]	val-auc:0.90649
[900]	val-auc:0.90687
[1000]	val-auc:0.90720
[1100]	val-auc:0.90748
[1200]	val-auc:0.90771
[1300]	val-auc:0.90791
[1400]	val-auc:0.90810
[1500]	val-auc:0.90827
[1600]	val-auc:0.90840
[1700]	val-auc:0.90853
[1800]	val-auc:0.90864
[1900]	val-auc:0.90876
[2000]	val-auc:0.90885
[2100]	val-auc:0.90894
[2200]	val-auc:0.90902
[2300]	val-auc:0.90910
[2400]	val-auc:0.90916
[2500]	val-auc:0.90922
[2600]	val-auc:0.90928
[2700]	val-auc:0.90933
[2800]	val-auc:0.90937
[2900]	val-auc:0.90942
[3000]	val-auc:0.90946
[3100]	val-auc:0.90951
[3200]	val-auc:0.90956
[3300]	val-auc:0.90959
[3400]	val-auc:0.90962
[3500]	val-auc:0.90965
[3600]	val-auc:0.90967
[3700]	val-auc:0.90971
[3800]	val-auc:0.90973
[3900]	val-auc:0.90975
[4000]	val-auc:0.90978
[4100]	val-auc:0.90980
[4200]	val-auc:0.90982
[4300]	val-auc:0.90983


[32m[I 2023-01-20 21:38:42,062][0m Trial 8 finished with value: 0.5580551689520638 and parameters: {'max_depth': 8, 'subsample': 0.5871346658327417, 'colsample_bytree': 0.7580192326866726, 'reg_alpha': 0.06857013243368781, 'reg_lambda': 5.602110999973888e-06}. Best is trial 2 with value: 0.5589678067033242.[0m


[0]	val-auc:0.88603
[100]	val-auc:0.89610
[200]	val-auc:0.89889
[300]	val-auc:0.90129
[400]	val-auc:0.90307
[500]	val-auc:0.90446
[600]	val-auc:0.90534
[700]	val-auc:0.90596
[800]	val-auc:0.90646
[900]	val-auc:0.90686
[1000]	val-auc:0.90718
[1100]	val-auc:0.90747
[1200]	val-auc:0.90770
[1300]	val-auc:0.90792
[1400]	val-auc:0.90809
[1500]	val-auc:0.90826
[1600]	val-auc:0.90840
[1700]	val-auc:0.90853
[1800]	val-auc:0.90864
[1900]	val-auc:0.90874
[2000]	val-auc:0.90884
[2100]	val-auc:0.90893
[2200]	val-auc:0.90901
[2300]	val-auc:0.90908
[2400]	val-auc:0.90916
[2500]	val-auc:0.90921
[2600]	val-auc:0.90926
[2700]	val-auc:0.90930
[2800]	val-auc:0.90936
[2900]	val-auc:0.90940
[3000]	val-auc:0.90944
[3100]	val-auc:0.90949
[3200]	val-auc:0.90953
[3300]	val-auc:0.90957
[3400]	val-auc:0.90961
[3500]	val-auc:0.90965
[3600]	val-auc:0.90968
[3700]	val-auc:0.90971
[3800]	val-auc:0.90973
[3900]	val-auc:0.90976
[4000]	val-auc:0.90978
[4100]	val-auc:0.90980
[4200]	val-auc:0.90982
[4300]	val-auc:0.90985


[32m[I 2023-01-20 21:56:19,570][0m Trial 9 finished with value: 0.5579639051769376 and parameters: {'max_depth': 8, 'subsample': 0.7279403618625049, 'colsample_bytree': 0.7146550257547484, 'reg_alpha': 6.508964345356784e-05, 'reg_lambda': 2.8336278476622605e-06}. Best is trial 2 with value: 0.5589678067033242.[0m


[0]	val-auc:0.89017
[100]	val-auc:0.89951
[200]	val-auc:0.90188
[300]	val-auc:0.90369
[400]	val-auc:0.90508
[500]	val-auc:0.90612
[600]	val-auc:0.90683
[700]	val-auc:0.90734
[800]	val-auc:0.90774
[900]	val-auc:0.90804
[1000]	val-auc:0.90831
[1100]	val-auc:0.90851
[1200]	val-auc:0.90869
[1300]	val-auc:0.90886
[1400]	val-auc:0.90898
[1500]	val-auc:0.90908
[1600]	val-auc:0.90919
[1700]	val-auc:0.90928
[1800]	val-auc:0.90936
[1900]	val-auc:0.90943
[2000]	val-auc:0.90950
[2100]	val-auc:0.90954
[2200]	val-auc:0.90957
[2300]	val-auc:0.90961
[2400]	val-auc:0.90964
[2500]	val-auc:0.90967
[2600]	val-auc:0.90971
[2700]	val-auc:0.90974
[2800]	val-auc:0.90976
[2900]	val-auc:0.90978
[3000]	val-auc:0.90982
[3100]	val-auc:0.90984
[3200]	val-auc:0.90987
[3300]	val-auc:0.90990
[3400]	val-auc:0.90992
[3500]	val-auc:0.90993
[3529]	val-auc:0.90993

-> gt_clicks  -  Recall : 0.5578

Params : {'max_depth': 10, 'subsample': '0.501', 'colsample_bytree': '0.969', 'reg_alpha': '1.24e-05', 'reg_lambda': '1.58e-06

[32m[I 2023-01-20 22:11:23,713][0m Trial 10 finished with value: 0.5577813776266856 and parameters: {'max_depth': 10, 'subsample': 0.5008012089995554, 'colsample_bytree': 0.9688843347230611, 'reg_alpha': 1.244111884373806e-05, 'reg_lambda': 1.5804202061722094e-06}. Best is trial 2 with value: 0.5589678067033242.[0m


[0]	val-auc:0.88993
[100]	val-auc:0.89940
[200]	val-auc:0.90181
[300]	val-auc:0.90370
[400]	val-auc:0.90512
[500]	val-auc:0.90619
[600]	val-auc:0.90689
[700]	val-auc:0.90742
[800]	val-auc:0.90787
[900]	val-auc:0.90820
[1000]	val-auc:0.90848
[1100]	val-auc:0.90869
[1200]	val-auc:0.90889
[1300]	val-auc:0.90903
[1400]	val-auc:0.90918
[1500]	val-auc:0.90929
[1600]	val-auc:0.90939
[1700]	val-auc:0.90948
[1800]	val-auc:0.90958
[1900]	val-auc:0.90965
[2000]	val-auc:0.90970
[2100]	val-auc:0.90975
[2200]	val-auc:0.90980
[2300]	val-auc:0.90983
[2400]	val-auc:0.90985
[2500]	val-auc:0.90988
[2600]	val-auc:0.90992
[2700]	val-auc:0.90993
[2800]	val-auc:0.90995
[2900]	val-auc:0.90997
[3000]	val-auc:0.91000
[3100]	val-auc:0.91003
[3200]	val-auc:0.91004
[3300]	val-auc:0.91006
[3400]	val-auc:0.91008
[3500]	val-auc:0.91009
[3600]	val-auc:0.91011
[3700]	val-auc:0.91013


In [None]:
print('\n', log_folder)

In [None]:
# plot_importances(ft_imp)

**Fold 0**
- orders	-  Found 52.04K GTs	-  Recall : 0.6672

Done