In [1]:
%load_ext autoreload
%autoreload 2

import functools
import gc
import logging
import pickle
import sys
from pathlib import Path
from typing import Dict, List, Tuple, Callable

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import tensorflow_addons as tfa
from IPython.display import clear_output
from keras.regularizers import L1L2
from multiprocess import Pool
from multiprocess.dummy import Pool as ThreadPool
from tqdm import tqdm


logging.basicConfig(level=logging.DEBUG, format='%(asctime)s :: %(funcName)s :: %(message)s')

from ccf.datasets import get_sample, get_initial_setting, get_left_right_name, get_std
from ccf.models import ModelLSTM, ModelCNN, ModelLSTM_FCN, ModelRF, ModelDNN, ModelLGB
from ccf.metrics import get_gini
from ccf.utils import delete_objs, cuttoff_ts_in_df, natural_reindex, sort_df, create_block_vars_from_separate
from ccf.analytics import greedy_feature_selection, search_architecture, get_analytics_row
from ccf.ZOO import *
from ccf.callbacks import FrequencyCallback, FrequencyEpoch
from ccf.preprocess import get_sample_2d_lgb
from tensorflow.keras.callbacks import EarlyStopping

In [2]:
current_feature_list = []
count_folds = 1

base_path_train = Path('../data/all_features_ns_full_25/X.csv')
base_path_val_train = Path('../data/all_features_ns_full_25/val.csv')
base_path_val = Path('../data/all_features_ns_full_25/val.csv')

count_obs_train = None
count_obs_val = None
count_obs_val_train = None

features_path = Path('../data/all_features_ns_full_25/features_m.csv')
features = pd.read_csv(features_path, nrows = 1)
list_candidates = list(set(features.columns) - set(current_feature_list) - set(['id']))
current_feature_list = list_candidates[:-1]
list_candidates = list_candidates[-1:]
list_candidates.sort()

selection_rule = {'field_name': 'confidence_lower', 'ascending': False}

dict_fields, _, _ = get_initial_setting(
    features_path,
    count_cuttoff = 0
)

experiment_name = 'lgb_ns_full_single'

analytics_path = Path('../data/all_features_ns_full/analytics/')



learning_setting = {
    "batch_size": 1024, 
    "epochs": 100,
    "custom": False,
    "callbacks": EarlyStopping(
        monitor="val_loss",
        restore_best_weights = True,
        #verbose = 1,
        patience = 3
    )
}




get_sample_func = lambda possible_feature_list, base_path, count_obs, scaler: get_sample_2d_lgb(
    possible_feature_list,
    base_path,
    count_obs,
    features_path,
    categoricals = [
        'part', 
        'prior_question_had_explanation', 
        'kmean_cluster', 
        'content_id',
        'lag_part_bool', 
        'lag_clu_bool', 
        'lag_answ_corr',
        'lag_answ_and_part_bool', 
        'lag_answ_and_clu_bool', 
        'lag_expl_and_part_bool', 
        'lag_expl_and_clu_bool', 
        'lag_expl_and_part_bool_not_corr', 
        'lag_expl_and_clu_bool_not_corr'
    ],
    experiment_name = 'ns_full_25'
)

In [3]:
experiments = {
    f'lgb_12_07_256': {
        "max_depth": 12,
        "feature_fraction": 0.75,
        'max_bin': 256,
    },
}

In [4]:
for experiment_name, params in experiments.items():
    logging.debug(f"experiment_name == {experiment_name}")
    curr_setting = {
        "verbose_eval": 50,
        "num_boost_round": 4_000,
        "early_stopping_rounds": 50,
        "params": {
            'num_leaves': 131_072,
            'max_bin': 256,
            'learning_rate': 0.1,
            "objective": "binary",
            "metric": "auc",
            'force_row_wise': True,
            #'min_data_in_leaf': 100
        },
    }
    for param_name, value in params.items():
        curr_setting["params"][param_name] = value
    
    model_class = lambda train_matrix_shape, name: ModelLGB(
        save_path = Path('../saved_models/'), 
        name = name,
        metric = get_gini,
        learning_setting = curr_setting
    )
    greedy_feature_selection(
        current_feature_list,
        list_candidates,
        dict_fields,
        count_folds,
        count_obs_train,
        count_obs_val,
        experiment_name, 
        model_class,
        analytics_path,
        selection_rule,
        get_sample_func,
        base_path_train,
        base_path_val,
        base_path_val_train,
        count_obs_val_train,
        print_iteration = False
    )
    logging.debug("\n")

2021-01-05 20:34:06,043 :: <module> :: experiment_name == lgb_12_07_256
2021-01-05 20:34:06,044 :: get_sample_2d_lgb :: 
2021-01-05 20:34:06,044 :: get_sample_2d_lgb :: ns_full_25_X_dataset.pkl always exist
2021-01-05 20:38:36,515 :: get_sample_2d_lgb :: 0. ns_full_25_X_dataset.pkl load - complete.
2021-01-05 20:38:36,515 :: get_sample_2d_lgb :: 
2021-01-05 20:38:36,516 :: get_sample_2d_lgb :: ns_full_25_val_dataset.pkl always exist
2021-01-05 20:38:55,377 :: get_sample_2d_lgb :: 0. ns_full_25_val_dataset.pkl load - complete.


[LightGBM] [Info] Number of positive: 58781780, number of negative: 30562390
[LightGBM] [Info] Total Bins 4515
[LightGBM] [Info] Number of data points in the train set: 89344170, number of used features: 25




[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.657925 -> initscore=0.654062
[LightGBM] [Info] Start training from score 0.654062
Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.779305	valid_1's auc: 0.780761
[100]	training's auc: 0.78337	valid_1's auc: 0.783428


[150]	training's auc: 0.785831	valid_1's auc: 0.784693
[200]	training's auc: 0.787659	valid_1's auc: 0.785443


[250]	training's auc: 0.789108	valid_1's auc: 0.785936
[300]	training's auc: 0.790503	valid_1's auc: 0.786373


[350]	training's auc: 0.791559	valid_1's auc: 0.786621
[400]	training's auc: 0.792746	valid_1's auc: 0.786863


[450]	training's auc: 0.793692	valid_1's auc: 0.787014
[500]	training's auc: 0.794691	valid_1's auc: 0.787183


[550]	training's auc: 0.795607	valid_1's auc: 0.787304
[600]	training's auc: 0.796423	valid_1's auc: 0.787418


[650]	training's auc: 0.79722	valid_1's auc: 0.787484
[700]	training's auc: 0.797999	valid_1's auc: 0.787554
[750]	training's auc: 0.798794	valid_1's auc: 0.787611


[800]	training's auc: 0.799496	valid_1's auc: 0.787648
[850]	training's auc: 0.800226	valid_1's auc: 0.787701


[900]	training's auc: 0.80096	valid_1's auc: 0.787733
[950]	training's auc: 0.801704	valid_1's auc: 0.787759


[1000]	training's auc: 0.8024	valid_1's auc: 0.787781
[1050]	training's auc: 0.803068	valid_1's auc: 0.787793


[1100]	training's auc: 0.803747	valid_1's auc: 0.787808
[1150]	training's auc: 0.804401	valid_1's auc: 0.78781


Early stopping, best iteration is:
[1137]	training's auc: 0.804237	valid_1's auc: 0.787815


100%|██████████| 1000/1000 [16:07<00:00,  1.03it/s]
2021-01-06 03:15:13,603 :: <module> :: 



In [5]:
res = create_block_vars_from_separate(list(experiments.keys()), parent_path = analytics_path)
res

Unnamed: 0,count,confidence_lower,confidence_upper,mean,std,min,25%,50%,75%,max,...,macro,macro_mean,macro_std,macro_lower_bound,macro_upper_bound,curr_features,new_var,count_vars,best_in_iter,experiment_name
0,1000.0,57.017274,58.171615,57.575853,0.295697,56.675055,57.362218,57.571753,57.790829,58.635208,...,[57.56302704021397],57.563027,1.0,55.603027,59.523027,"['part_bundle_id', 'attempt_no_mean', 'trend_u...",strike,25,1,lgb_12_07_256


In [6]:
experiments = {
    f'lgb_12_06_256': {
        "max_depth": 12,
        "feature_fraction": 0.6,
        'max_bin': 256,
    },
    
    f'lgb_10_06_256': {
        "max_depth": 10,
        "feature_fraction": 0.6,
        'max_bin': 256,
    },
}
for experiment_name, params in experiments.items():
    logging.debug(f"experiment_name == {experiment_name}")
    curr_setting = {
        "verbose_eval": 50,
        "num_boost_round": 4_000,
        "early_stopping_rounds": 50,
        "params": {
            'num_leaves': 131_072,
            'max_bin': 256,
            'learning_rate': 0.1,
            "objective": "binary",
            "metric": "auc",
            'force_row_wise': True,
            #'min_data_in_leaf': 100
        },
    }
    for param_name, value in params.items():
        curr_setting["params"][param_name] = value
    
    model_class = lambda train_matrix_shape, name: ModelLGB(
        save_path = Path('../saved_models/'), 
        name = name,
        metric = get_gini,
        learning_setting = curr_setting
    )
    greedy_feature_selection(
        current_feature_list,
        list_candidates,
        dict_fields,
        count_folds,
        count_obs_train,
        count_obs_val,
        experiment_name, 
        model_class,
        analytics_path,
        selection_rule,
        get_sample_func,
        base_path_train,
        base_path_val,
        base_path_val_train,
        count_obs_val_train,
        print_iteration = False
    )
    logging.debug("\n")
res = create_block_vars_from_separate(list(experiments.keys()), parent_path = analytics_path)
res

2021-01-06 03:15:14,464 :: <module> :: experiment_name == lgb_12_06_256
2021-01-06 03:15:14,465 :: get_sample_2d_lgb :: 
2021-01-06 03:15:14,465 :: get_sample_2d_lgb :: ns_full_25_X_dataset.pkl always exist
2021-01-06 03:19:42,276 :: get_sample_2d_lgb :: 0. ns_full_25_X_dataset.pkl load - complete.
2021-01-06 03:19:42,277 :: get_sample_2d_lgb :: 
2021-01-06 03:19:42,278 :: get_sample_2d_lgb :: ns_full_25_val_dataset.pkl always exist
2021-01-06 03:20:23,796 :: get_sample_2d_lgb :: 0. ns_full_25_val_dataset.pkl load - complete.


[LightGBM] [Info] Number of positive: 58781780, number of negative: 30562390
[LightGBM] [Info] Total Bins 4515
[LightGBM] [Info] Number of data points in the train set: 89344170, number of used features: 25




[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.657925 -> initscore=0.654062
[LightGBM] [Info] Start training from score 0.654062
Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.779127	valid_1's auc: 0.780593
[100]	training's auc: 0.783282	valid_1's auc: 0.783304


[150]	training's auc: 0.785867	valid_1's auc: 0.784644
[200]	training's auc: 0.787488	valid_1's auc: 0.785277


[250]	training's auc: 0.789005	valid_1's auc: 0.785826
[300]	training's auc: 0.790333	valid_1's auc: 0.786214


[350]	training's auc: 0.791588	valid_1's auc: 0.786513
[400]	training's auc: 0.792633	valid_1's auc: 0.786751


[450]	training's auc: 0.793633	valid_1's auc: 0.786929
[500]	training's auc: 0.794603	valid_1's auc: 0.787096


[550]	training's auc: 0.795507	valid_1's auc: 0.787237
[600]	training's auc: 0.796365	valid_1's auc: 0.787352


[650]	training's auc: 0.797122	valid_1's auc: 0.787434
[700]	training's auc: 0.797914	valid_1's auc: 0.787505
[750]	training's auc: 0.798687	valid_1's auc: 0.787566


[800]	training's auc: 0.79936	valid_1's auc: 0.787594
[850]	training's auc: 0.800049	valid_1's auc: 0.787629


[900]	training's auc: 0.800689	valid_1's auc: 0.787639
[950]	training's auc: 0.801349	valid_1's auc: 0.787651


[1000]	training's auc: 0.802075	valid_1's auc: 0.787661
[1050]	training's auc: 0.802734	valid_1's auc: 0.787667


[1100]	training's auc: 0.80339	valid_1's auc: 0.787695
[1150]	training's auc: 0.804076	valid_1's auc: 0.78773


[1200]	training's auc: 0.804755	valid_1's auc: 0.787728
[1250]	training's auc: 0.805371	valid_1's auc: 0.787741
Early stopping, best iteration is:
[1234]	training's auc: 0.805189	valid_1's auc: 0.787743


100%|██████████| 1000/1000 [16:08<00:00,  1.03it/s]
2021-01-06 10:29:41,922 :: <module> :: 

2021-01-06 10:29:41,923 :: <module> :: experiment_name == lgb_10_06_256
2021-01-06 10:29:41,924 :: get_sample_2d_lgb :: 
2021-01-06 10:29:41,941 :: get_sample_2d_lgb :: ns_full_25_X_dataset.pkl always exist
2021-01-06 10:33:53,266 :: get_sample_2d_lgb :: 0. ns_full_25_X_dataset.pkl load - complete.
2021-01-06 10:33:53,266 :: get_sample_2d_lgb :: 
2021-01-06 10:33:53,267 :: get_sample_2d_lgb :: ns_full_25_val_dataset.pkl always exist
2021-01-06 10:34:37,624 :: get_sample_2d_lgb :: 0. ns_full_25_val_dataset.pkl load - complete.


[LightGBM] [Info] Number of positive: 58781780, number of negative: 30562390
[LightGBM] [Info] Total Bins 4515
[LightGBM] [Info] Number of data points in the train set: 89344170, number of used features: 25




[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.657925 -> initscore=0.654062
[LightGBM] [Info] Start training from score 0.654062
Training until validation scores don't improve for 50 rounds
[50]	training's auc: 0.775812	valid_1's auc: 0.778413
[100]	training's auc: 0.7792	valid_1's auc: 0.78125


[150]	training's auc: 0.781146	valid_1's auc: 0.782793
[200]	training's auc: 0.78241	valid_1's auc: 0.783674


[250]	training's auc: 0.783307	valid_1's auc: 0.784234
[300]	training's auc: 0.784059	valid_1's auc: 0.784663


[350]	training's auc: 0.784709	valid_1's auc: 0.784991
[400]	training's auc: 0.785365	valid_1's auc: 0.785322


[450]	training's auc: 0.785952	valid_1's auc: 0.785594
[500]	training's auc: 0.786451	valid_1's auc: 0.785791


[550]	training's auc: 0.786951	valid_1's auc: 0.786003
[600]	training's auc: 0.787438	valid_1's auc: 0.786193


[650]	training's auc: 0.787874	valid_1's auc: 0.786339
[700]	training's auc: 0.788312	valid_1's auc: 0.786499
[750]	training's auc: 0.788719	valid_1's auc: 0.78662


[800]	training's auc: 0.789118	valid_1's auc: 0.786743
[850]	training's auc: 0.789499	valid_1's auc: 0.786842


[900]	training's auc: 0.789841	valid_1's auc: 0.786926
[950]	training's auc: 0.790189	valid_1's auc: 0.787001


[1000]	training's auc: 0.79054	valid_1's auc: 0.787069
[1050]	training's auc: 0.790854	valid_1's auc: 0.787128


[1100]	training's auc: 0.791174	valid_1's auc: 0.787187
[1150]	training's auc: 0.7915	valid_1's auc: 0.787254


[1200]	training's auc: 0.79181	valid_1's auc: 0.7873
[1250]	training's auc: 0.792136	valid_1's auc: 0.787359


[1300]	training's auc: 0.792461	valid_1's auc: 0.787421
[1350]	training's auc: 0.792765	valid_1's auc: 0.787463
[1400]	training's auc: 0.793055	valid_1's auc: 0.787505


[1450]	training's auc: 0.793347	valid_1's auc: 0.787532
[1500]	training's auc: 0.793608	valid_1's auc: 0.787568


[1550]	training's auc: 0.793885	valid_1's auc: 0.787595
[1600]	training's auc: 0.794153	valid_1's auc: 0.787618


[1650]	training's auc: 0.794436	valid_1's auc: 0.787654
[1700]	training's auc: 0.794672	valid_1's auc: 0.787666


[1750]	training's auc: 0.794939	valid_1's auc: 0.787693
[1800]	training's auc: 0.795196	valid_1's auc: 0.787716


[1850]	training's auc: 0.795447	valid_1's auc: 0.787733
[1900]	training's auc: 0.795702	valid_1's auc: 0.787752


[1950]	training's auc: 0.795969	valid_1's auc: 0.787769
[2000]	training's auc: 0.796245	valid_1's auc: 0.787784
[2050]	training's auc: 0.796495	valid_1's auc: 0.7878


[2100]	training's auc: 0.796744	valid_1's auc: 0.78781
[2150]	training's auc: 0.796987	valid_1's auc: 0.787821


[2200]	training's auc: 0.797242	valid_1's auc: 0.787833
[2250]	training's auc: 0.7975	valid_1's auc: 0.787845


[2300]	training's auc: 0.797734	valid_1's auc: 0.787851
[2350]	training's auc: 0.797982	valid_1's auc: 0.787858


[2400]	training's auc: 0.798224	valid_1's auc: 0.787865
[2450]	training's auc: 0.798469	valid_1's auc: 0.787876


[2500]	training's auc: 0.7987	valid_1's auc: 0.787884
[2550]	training's auc: 0.798962	valid_1's auc: 0.787894


[2600]	training's auc: 0.799198	valid_1's auc: 0.787894
Early stopping, best iteration is:
[2562]	training's auc: 0.799021	valid_1's auc: 0.787897


100%|██████████| 1000/1000 [16:12<00:00,  1.03it/s]
2021-01-06 23:10:40,147 :: <module> :: 



Unnamed: 0,count,confidence_lower,confidence_upper,mean,std,min,25%,50%,75%,max,...,macro,macro_mean,macro_std,macro_lower_bound,macro_upper_bound,curr_features,new_var,count_vars,best_in_iter,experiment_name
0,1000.0,57.009378,58.092832,57.556939,0.282636,56.712249,57.365216,57.557652,57.756011,58.647545,...,[57.548675446635315],57.548675,1.0,55.588675,59.508675,"['part_bundle_id', 'attempt_no_mean', 'trend_u...",strike,25,1,lgb_12_06_256
1,1000.0,57.011426,58.093403,57.573561,0.288136,56.663221,57.373789,57.556126,57.778308,58.5202,...,[57.579368981730106],57.579369,1.0,55.619369,59.539369,"['part_bundle_id', 'attempt_no_mean', 'trend_u...",strike,25,1,lgb_10_06_256


In [7]:
with open(Path('../data/all_features_ns_full_25/ns_full_25_val_dataset.pkl'), "rb") as f:
    val_data = pickle.load(f)
val_data = val_data.construct()
val_data = val_data.get_data()
val_data.head()

Unnamed: 0,strike,attempt_no_mean,attempt_no_count,lag_expl_and_part_bool,part_bundle_id,mul_user_content_ema_answ,abs_chng_timestamp_3,trend_user_he_mean_answ,rel_user_he_mean_answ,abs_chng_timestamp_1,...,user_he_mean_prior_question_elapsed_time,rel_lect_part,prior_question_elapsed_time,user_ucount_part,content_he_mean_answered_correctly,user_he_ucount_part,content_sum_answered_correctly,rel_chng_timestamp_1,rel_user_he_ema_pqet,rel_strike_part
0,0,0.0,0,1,505958,0.334414,74289.0,0.997683,0.997099,26731.0,...,14994.465,0.769231,14000.0,7,0.481985,2,5147,2e-06,1.001887,0.0
1,3,0.0,0,1,200989,0.587776,58522.0,1.008947,1.002797,17964.0,...,38496.43,0.142857,16000.0,6,0.864626,6,7250,7e-06,0.9985,0.002017
2,1,0.0,1,1,200475,0.264242,65453.0,0.923994,0.844444,16117.0,...,19500.0,0.0,17000.0,3,0.643768,2,6436,0.000434,0.839652,0.052632
3,0,0.0,0,1,504516,0.305414,153582.0,1.007653,0.972383,45260.0,...,21117.977,1.0,33000.0,2,0.613362,2,4134,0.000173,0.994465,0.0
4,0,0.0,1,1,201373,0.42765,1614281.0,0.982908,1.008545,25402.0,...,26952.146,0.375,18000.0,7,0.628989,7,13487,2.7e-05,0.997647,0.0


In [8]:
list(val_data.columns)

['strike',
 'attempt_no_mean',
 'attempt_no_count',
 'lag_expl_and_part_bool',
 'part_bundle_id',
 'mul_user_content_ema_answ',
 'abs_chng_timestamp_3',
 'trend_user_he_mean_answ',
 'rel_user_he_mean_answ',
 'abs_chng_timestamp_1',
 'content_he_part_count_answered_correctly',
 'rel_user_content_he_mean_answ',
 'abs_chng_timestamp_2',
 'strike_bundle',
 'user_sum_answered_correctly',
 'user_he_mean_prior_question_elapsed_time',
 'rel_lect_part',
 'prior_question_elapsed_time',
 'user_ucount_part',
 'content_he_mean_answered_correctly',
 'user_he_ucount_part',
 'content_sum_answered_correctly',
 'rel_chng_timestamp_1',
 'rel_user_he_ema_pqet',
 'rel_strike_part']

In [10]:
del val_data

In [9]:
len(['strike',
 'attempt_no_mean',
 'attempt_no_count',
 'lag_expl_and_part_bool',
 'part_bundle_id',
 'mul_user_content_ema_answ',
 'abs_chng_timestamp_3',
 'trend_user_he_mean_answ',
 'rel_user_he_mean_answ',
 'abs_chng_timestamp_1',
 'content_he_part_count_answered_correctly',
 'rel_user_content_he_mean_answ',
 'abs_chng_timestamp_2',
 'strike_bundle',
 'user_sum_answered_correctly',
 'user_he_mean_prior_question_elapsed_time',
 'rel_lect_part',
 'prior_question_elapsed_time',
 'user_ucount_part',
 'content_he_mean_answered_correctly',
 'user_he_ucount_part',
 'content_sum_answered_correctly',
 'rel_chng_timestamp_1',
 'rel_user_he_ema_pqet',
 'rel_strike_part'])

25

In [5]:
res = create_block_vars_from_separate(list(experiments.keys()), parent_path = analytics_path)
res
# "max_depth": 12,"feature_fraction": 0.6,'max_bin': 512,'reg_lambda': 0.1,
# [1300]	training's auc: 0.803773	valid_1's auc: 0.787495

Unnamed: 0,count,confidence_lower,confidence_upper,mean,std,min,25%,50%,75%,max,...,macro,macro_mean,macro_std,macro_lower_bound,macro_upper_bound,curr_features,new_var,count_vars,best_in_iter,experiment_name
0,1000.0,56.921119,58.068913,57.512761,0.289949,56.585005,57.324579,57.52375,57.70889,58.419282,...,[57.4990845257052],57.499085,1.0,55.539085,59.459085,"['prior_question_elapsed_time', 'abs_chng_time...",user_he_mean_prior_question_elapsed_time,20,1,lgb_12_06_512_l2_01


In [12]:
res = create_block_vars_from_separate(list(experiments.keys()), parent_path = analytics_path)
res # 12_08_512 0.1
#Early stopping, best iteration is:
#[1211]	training's auc: 0.80234	valid_1's auc: 0.787436

Unnamed: 0,count,confidence_lower,confidence_upper,mean,std,min,25%,50%,75%,max,...,macro,macro_mean,macro_std,macro_lower_bound,macro_upper_bound,curr_features,new_var,count_vars,best_in_iter,experiment_name
0,1000.0,56.911479,58.059301,57.478873,0.294608,56.423711,57.28282,57.478398,57.664749,58.426247,...,[57.487280755184834],57.487281,1.0,55.527281,59.447281,"['part_bundle_id', 'lag_expl_and_part_bool', '...",rel_user_he_mean_answ,20,1,lgb_12_08_512


In [7]:
res = create_block_vars_from_separate(list(experiments.keys()), parent_path = analytics_path)
res# 12_06_512 0.1, 
#Early stopping, best iteration is:
#[1286]	training's auc: 0.803301	valid_1's auc: 0.787463

Unnamed: 0,count,confidence_lower,confidence_upper,mean,std,min,25%,50%,75%,max,...,macro,macro_mean,macro_std,macro_lower_bound,macro_upper_bound,curr_features,new_var,count_vars,best_in_iter,experiment_name
0,1000.0,56.907554,58.059679,57.489701,0.284514,56.615365,57.28853,57.498903,57.683893,58.30086,...,[57.49268384927695],57.492684,1.0,55.532684,59.452684,"['part_bundle_id', 'lag_expl_and_part_bool', '...",rel_user_he_mean_answ,20,1,lgb_12_06_512


In [8]:
res = create_block_vars_from_separate(list(experiments.keys()), parent_path = analytics_path)
res

Unnamed: 0,count,confidence_lower,confidence_upper,mean,std,min,25%,50%,75%,max,...,macro,macro_mean,macro_std,macro_lower_bound,macro_upper_bound,curr_features,new_var,count_vars,best_in_iter,experiment_name
0,1000.0,56.889738,58.063423,57.475097,0.295379,56.37741,57.273219,57.48046,57.66749,58.379394,...,[57.48074117981934],57.480741,1.0,55.520741,59.440741,"['attempt_no_count', 'prior_question_elapsed_t...",content_sum_answered_correctly,20,1,lgb_md_12_bin_256


In [34]:
res # [382]	training's auc: 0.809864	valid_1's auc: 0.779955 number of used features: 20

Unnamed: 0,count,confidence_lower,confidence_upper,mean,std,min,25%,50%,75%,max,...,macro,macro_mean,macro_std,macro_lower_bound,macro_upper_bound,curr_features,new_var,count_vars,best_in_iter,experiment_name
0,1000.0,55.482818,56.471182,55.979293,0.261653,55.150542,55.800032,55.975478,56.156608,56.771764,...,[55.99096606190233],55.990966,1.0,54.030966,57.950966,"['content_he_mean_answered_correctly', 'mul_us...",strike_bundle,20,1,lgb_max_depth_10_ff_06


In [None]:
"""
'min_child_weight': 0.03454472573214212,
"boosting_type": 'gbdt',
"bagging_seed": 11,
'reg_alpha': 0.39,
'reg_lambda': 0.65,
'random_state': 47,
'bagging_fraction': 1.0,
'feature_fraction': 1.0,


f'lgb_max_depth_10_ff_06': {
        "max_depth": 10,
        "feature_fraction": 0.6,
    },
    f'lgb_max_depth_10_ff_06_bf_06': {
        "max_depth": 10,
        "feature_fraction": 0.6,
        'bagging_fraction': 0.6,
    },
"""