In [8]:
import re
import json
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
import sklearn
import sklearn.metrics
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import f1_score, accuracy_score
from sklearn.model_selection import KFold
import optuna.integration.lightgbm as lgb

In [2]:
all_new = pd.read_csv('data/feature_df.csv')
wordvec_df = pd.read_csv('sub_notebook/data/wordvectorized_df.csv')
bert_embeded = pd.read_csv('kaggle-notebook/data/bert_embeded.csv')
#tfidf_1000 = pd.read_csv('data/tfidf_1000.csv')
tfidf_svd_raw_64 = pd.read_csv("data/tfidf_svd_raw_64.csv")
#tfidf_svd_content_64 = pd.read_csv("data/tfidf_svd_content_64.csv")
#tfidf_svd_compiled_64 = pd.read_csv("data/tfidf_svd_compiled_64.csv")
mfw_df = pd.read_csv('data/mfw.csv')
miw_df = pd.read_csv('data/miw.csv')

In [3]:
all_new = pd.merge(all_new, bert_embeded, on="id", how="outer")
all_new = pd.merge(all_new, tfidf_svd_raw_64, on="id", how="outer")
all_new = pd.merge(all_new, wordvec_df, on="id", how="outer")
all_new = pd.merge(all_new, miw_df, on="id", how="outer")
all_new = pd.merge(all_new, mfw_df.drop(["mfw", "sfw"], axis=1), on="id", how="outer")

In [37]:
all_new["data_type"][all_new["data_type"] == "train"] = int(0)
all_new["data_type"][all_new["data_type"] == "test"] = int(1)
all_new["data_type"] = all_new["data_type"].astype(int)
all_new.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  all_new["data_type"][all_new["data_type"] == "train"] = int(0)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  all_new["data_type"][all_new["data_type"] == "test"] = int(1)


Unnamed: 0,id,country,duration,category1,category2,state,data_type,number_of_chars,number_of_words,number_of_sentences,...,wordvec_15_y,wordvec_16_y,wordvec_17_y,wordvec_18_y,wordvec_19_y,wordvec_20_y,wordvec_21_y,wordvec_22_y,wordvec_23_y,wordvec_24_y
0,0,4,29,12,143,0.0,0,5289,961,40,...,0.10287,-0.17618,-1.2881,-0.59801,0.26131,-1.2619,0.39202,0.59309,-0.55232,0.005087
1,1,16,34,5,108,0.0,0,1144,202,9,...,0.35207,0.16624,-0.91364,0.6933,-0.65035,0.77353,-0.087782,-0.43228,-0.99982,-0.68077
2,2,21,30,7,122,0.0,0,3316,549,25,...,0.355,0.16465,0.32006,-0.045544,-0.31911,-1.0138,-0.054359,0.20074,0.32061,-0.45584
3,3,21,41,13,0,0.0,0,1670,293,11,...,-0.9108,0.69415,-0.32473,1.1841,0.45249,-1.0321,-1.2659,-0.74085,0.26441,-0.49707
4,4,9,29,13,33,1.0,0,7560,1211,67,...,-0.24938,1.0264,-0.10366,-0.37982,-0.82619,-0.20513,0.57208,0.43497,0.32462,-0.33293


In [46]:
cv = KFold(n_splits=5, shuffle=True, random_state=1234)
for fold_id, (train_index, valid_index) in enumerate(cv.split(all_new)):
    test_idx = valid_index
test_set = all_new[all_new.index.isin(test_idx)]
train_set = all_new[~(all_new.index.isin(test_idx))]

In [47]:
FEATURE_COLS = list(all_new.columns.drop(["id", "state","data_type"]))


TARGET_COLS = ["data_type"]
CATEGORICAL_COLS = [
    "country",
    "category1",
    "category2",
    "country+category1",
    "country+category2",
    "category1+category2",
]

params = {
        'objective': 'binary',
        'metric': 'binary_logloss',
    }

for categorical_col in set(CATEGORICAL_COLS):
    if categorical_col not in FEATURE_COLS:
        CATEGORICAL_COLS.remove(categorical_col)
        print(f"{categorical_col} removed")

In [48]:
def run_optuna_lgbm(test, train, target_cols, feature_cols, categorical_cols):
    """
    import optuna.integration.lightgbm as lgb
    """
    X_train = train[feature_cols]
    y_train = train[target_cols]
    X_test = test[feature_cols]
    
    y_preds = []
    models = []
    oof_train = np.zeros((len(X_train),))
    importances = []
    
    cv = KFold(n_splits=5, shuffle=True, random_state=0)
    for fold_id, (train_index, valid_index) in enumerate(cv.split(X_train)):
        X_tr = X_train.iloc[train_index, :]
        X_val = X_train.iloc[valid_index, :]
        y_tr = y_train.iloc[train_index]
        y_val = y_train.iloc[valid_index]

        lgb_train = lgb.Dataset(X_tr,
                                y_tr,
                                categorical_feature=categorical_cols)

        lgb_eval = lgb.Dataset(X_val,
                               y_val,
                               reference=lgb_train,
                               categorical_feature=categorical_cols)
        
        model = lgb.train(params,
                          lgb_train,
                          valid_sets=[lgb_train, lgb_eval],
                          verbose_eval=False,
                          num_boost_round=1000,
                          early_stopping_rounds=5,
                          )
        
        oof_train[valid_index] = model.predict(X_val,
                                               num_iteration=model.best_iteration)
        y_pred = model.predict(X_test,
                               num_iteration=model.best_iteration)

        y_preds.append(y_pred)
        models.append(model)
        
        
        # display importance
        importance = pd.DataFrame(model.feature_importance(), index=feature_cols, columns=['importance'])
        display(importance)
        importances.append(importance)

    return oof_train, sum(y_preds) / len(y_preds), importances

In [49]:
oof, preds, importances = run_optuna_lgbm(test_set, train_set, TARGET_COLS, FEATURE_COLS, CATEGORICAL_COLS)

[32m[I 2021-01-25 11:44:16,317][0m A new study created in memory with name: no-name-244cfc71-2ed8-40d4-8ec8-8d0f8fa76764[0m



  0%|          | 0/7 [00:00<?, ?it/s][A[A[A




[LightGBM] [Info] Number of positive: 6773, number of negative: 6724
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 731072
[LightGBM] [Info] Number of data points in the train set: 13497, number of used features: 4721




[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501815 -> initscore=0.007261
[LightGBM] [Info] Start training from score 0.007261





feature_fraction, val_score: 0.000000:   0%|          | 0/7 [00:11<?, ?it/s][A[A[A


feature_fraction, val_score: 0.000000:  14%|#4        | 1/7 [00:11<01:10, 11.76s/it][A[A[A[32m[I 2021-01-25 11:44:28,091][0m Trial 0 finished with value: 1.4757886134419414e-07 and parameters: {'feature_fraction': 0.5}. Best is trial 0 with value: 1.4757886134419414e-07.[0m



feature_fraction, val_score: 0.000000:  14%|#4        | 1/7 [00:11<01:10, 11.76s/it][A[A[A

[LightGBM] [Info] Number of positive: 6773, number of negative: 6724
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 731072
[LightGBM] [Info] Number of data points in the train set: 13497, number of used features: 4721
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501815 -> initscore=0.007261
[LightGBM] [Info] Start training from score 0.007261





feature_fraction, val_score: 0.000000:  14%|#4        | 1/7 [00:26<01:10, 11.76s/it][A[A[A


feature_fraction, val_score: 0.000000:  29%|##8       | 2/7 [00:26<01:02, 12.53s/it][A[A[A



[32m[I 2021-01-25 11:44:42,442][0m Trial 1 finished with value: 1.4757886134419414e-07 and parameters: {'feature_fraction': 0.8999999999999999}. Best is trial 0 with value: 1.4757886134419414e-07.[0m



feature_fraction, val_score: 0.000000:  29%|##8       | 2/7 [00:26<01:02, 12.53s/it][A[A[A

[LightGBM] [Info] Number of positive: 6773, number of negative: 6724
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 731072
[LightGBM] [Info] Number of data points in the train set: 13497, number of used features: 4721
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501815 -> initscore=0.007261
[LightGBM] [Info] Start training from score 0.007261





feature_fraction, val_score: 0.000000:  29%|##8       | 2/7 [00:38<01:02, 12.53s/it][A[A[A


feature_fraction, val_score: 0.000000:  43%|####2     | 3/7 [00:38<00:49, 12.43s/it][A[A[A[32m[I 2021-01-25 11:44:54,616][0m Trial 2 finished with value: 1.4757886134419411e-07 and parameters: {'feature_fraction': 0.6}. Best is trial 2 with value: 1.4757886134419411e-07.[0m



feature_fraction, val_score: 0.000000:  43%|####2     | 3/7 [00:38<00:49, 12.43s/it][A[A[A

[LightGBM] [Info] Number of positive: 6773, number of negative: 6724
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 731072
[LightGBM] [Info] Number of data points in the train set: 13497, number of used features: 4721
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501815 -> initscore=0.007261
[LightGBM] [Info] Start training from score 0.007261





feature_fraction, val_score: 0.000000:  43%|####2     | 3/7 [00:51<00:49, 12.43s/it][A[A[A


feature_fraction, val_score: 0.000000:  57%|#####7    | 4/7 [00:51<00:38, 12.75s/it][A[A[A[32m[I 2021-01-25 11:45:08,099][0m Trial 3 finished with value: 1.4757886134419414e-07 and parameters: {'feature_fraction': 0.8}. Best is trial 2 with value: 1.4757886134419411e-07.[0m



feature_fraction, val_score: 0.000000:  57%|#####7    | 4/7 [00:51<00:38, 12.75s/it][A[A[A

[LightGBM] [Info] Number of positive: 6773, number of negative: 6724
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 731072
[LightGBM] [Info] Number of data points in the train set: 13497, number of used features: 4721
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501815 -> initscore=0.007261
[LightGBM] [Info] Start training from score 0.007261





feature_fraction, val_score: 0.000000:  57%|#####7    | 4/7 [01:03<00:38, 12.75s/it][A[A[A


feature_fraction, val_score: 0.000000:  71%|#######1  | 5/7 [01:03<00:24, 12.33s/it][A[A[A[32m[I 2021-01-25 11:45:19,463][0m Trial 4 finished with value: 1.4757886134419414e-07 and parameters: {'feature_fraction': 0.4}. Best is trial 2 with value: 1.4757886134419411e-07.[0m



feature_fraction, val_score: 0.000000:  71%|#######1  | 5/7 [01:03<00:24, 12.33s/it][A[A[A

[LightGBM] [Info] Number of positive: 6773, number of negative: 6724
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 731072
[LightGBM] [Info] Number of data points in the train set: 13497, number of used features: 4721
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501815 -> initscore=0.007261
[LightGBM] [Info] Start training from score 0.007261





feature_fraction, val_score: 0.000000:  71%|#######1  | 5/7 [01:17<00:24, 12.33s/it][A[A[A


feature_fraction, val_score: 0.000000:  86%|########5 | 6/7 [01:17<00:12, 12.82s/it][A[A[A[32m[I 2021-01-25 11:45:33,414][0m Trial 5 finished with value: 1.4757886134419414e-07 and parameters: {'feature_fraction': 1.0}. Best is trial 2 with value: 1.4757886134419411e-07.[0m



feature_fraction, val_score: 0.000000:  86%|########5 | 6/7 [01:17<00:12, 12.82s/it][A[A[A



feature_fraction, val_score: inf:   0%|          | 0/7 [15:34<?, ?it/s]
feature_fraction, val_score: inf:   0%|          | 0/7 [13:34<?, ?it/s]
regularization_factors, val_score: 0.200818:  65%|######5   | 13/20 [10:50<05:50, 50.04s/it]


[LightGBM] [Info] Number of positive: 6773, number of negative: 6724
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 731072
[LightGBM] [Info] Number of data points in the train set: 13497, number of used features: 4721
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501815 -> initscore=0.007261
[LightGBM] [Info] Start training from score 0.007261





feature_fraction, val_score: 0.000000:  86%|########5 | 6/7 [01:29<00:12, 12.82s/it][A[A[A


feature_fraction, val_score: 0.000000: 100%|##########| 7/7 [01:29<00:00, 12.79s/it][A[A[A[32m[I 2021-01-25 11:45:46,160][0m Trial 6 finished with value: 1.4757886134419411e-07 and parameters: {'feature_fraction': 0.7}. Best is trial 2 with value: 1.4757886134419411e-07.[0m
feature_fraction, val_score: 0.000000: 100%|##########| 7/7 [01:29<00:00, 12.83s/it]
num_leaves, val_score: 0.000000:   0%|          | 0/20 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 6773, number of negative: 6724
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 731072
[LightGBM] [Info] Number of data points in the train set: 13497, number of used features: 4721
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501815 -> initscore=0.007261
[LightGBM] [Info] Start training from score 0.007261


num_leaves, val_score: 0.000000:   5%|5         | 1/20 [00:12<03:49, 12.09s/it][32m[I 2021-01-25 11:45:58,268][0m Trial 7 finished with value: 1.4757886134419414e-07 and parameters: {'num_leaves': 113}. Best is trial 7 with value: 1.4757886134419414e-07.[0m
num_leaves, val_score: 0.000000:   5%|5         | 1/20 [00:12<03:49, 12.09s/it]

[LightGBM] [Info] Number of positive: 6773, number of negative: 6724
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 731072
[LightGBM] [Info] Number of data points in the train set: 13497, number of used features: 4721
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501815 -> initscore=0.007261
[LightGBM] [Info] Start training from score 0.007261


num_leaves, val_score: 0.000000:  10%|#         | 2/20 [00:24<03:38, 12.14s/it][32m[I 2021-01-25 11:46:10,532][0m Trial 8 finished with value: 1.4757886134419414e-07 and parameters: {'num_leaves': 236}. Best is trial 7 with value: 1.4757886134419414e-07.[0m
num_leaves, val_score: 0.000000:  10%|#         | 2/20 [00:24<03:38, 12.14s/it]

[LightGBM] [Info] Number of positive: 6773, number of negative: 6724
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 731072
[LightGBM] [Info] Number of data points in the train set: 13497, number of used features: 4721
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501815 -> initscore=0.007261
[LightGBM] [Info] Start training from score 0.007261


num_leaves, val_score: 0.000000:  15%|#5        | 3/20 [00:36<03:28, 12.28s/it][32m[I 2021-01-25 11:46:23,124][0m Trial 9 finished with value: 1.4757886134419414e-07 and parameters: {'num_leaves': 12}. Best is trial 7 with value: 1.4757886134419414e-07.[0m
num_leaves, val_score: 0.000000:  15%|#5        | 3/20 [00:36<03:28, 12.28s/it]

[LightGBM] [Info] Number of positive: 6773, number of negative: 6724
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 731072
[LightGBM] [Info] Number of data points in the train set: 13497, number of used features: 4721
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501815 -> initscore=0.007261
[LightGBM] [Info] Start training from score 0.007261


num_leaves, val_score: 0.000000:  20%|##        | 4/20 [00:49<03:18, 12.42s/it][32m[I 2021-01-25 11:46:35,887][0m Trial 10 finished with value: 1.4757886134419414e-07 and parameters: {'num_leaves': 243}. Best is trial 7 with value: 1.4757886134419414e-07.[0m
num_leaves, val_score: 0.000000:  20%|##        | 4/20 [00:49<03:18, 12.42s/it]

[LightGBM] [Info] Number of positive: 6773, number of negative: 6724
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 731072
[LightGBM] [Info] Number of data points in the train set: 13497, number of used features: 4721
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501815 -> initscore=0.007261
[LightGBM] [Info] Start training from score 0.007261


num_leaves, val_score: 0.000000:  25%|##5       | 5/20 [01:02<03:08, 12.57s/it][32m[I 2021-01-25 11:46:48,804][0m Trial 11 finished with value: 1.4757886134419414e-07 and parameters: {'num_leaves': 8}. Best is trial 7 with value: 1.4757886134419414e-07.[0m
num_leaves, val_score: 0.000000:  25%|##5       | 5/20 [01:02<03:08, 12.57s/it]

[LightGBM] [Info] Number of positive: 6773, number of negative: 6724
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 731072
[LightGBM] [Info] Number of data points in the train set: 13497, number of used features: 4721
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501815 -> initscore=0.007261
[LightGBM] [Info] Start training from score 0.007261

num_leaves, val_score: 0.000000:  30%|###       | 6/20 [01:16<03:00, 12.86s/it][32m[I 2021-01-25 11:47:02,345][0m Trial 12 finished with value: 1.4757886134419414e-07 and parameters: {'num_leaves': 142}. Best is trial 7 with value: 1.4757886134419414e-07.[0m
num_leaves, val_score: 0.000000:  30%|###       | 6/20 [01:16<03:00, 12.86s/it]

No further splits with positive gain, best gain: -inf
[LightGBM] [Info] Number of positive: 6773, number of negative: 6724
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 731072
[LightGBM] [Info] Number of data points in the train set: 13497, number of used features: 4721
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501815 -> initscore=0.007261
[LightGBM] [Info] Start training from score 0.007261


num_leaves, val_score: 0.000000:  35%|###5      | 7/20 [01:28<02:44, 12.65s/it][32m[I 2021-01-25 11:47:14,503][0m Trial 13 finished with value: 1.4757886134419414e-07 and parameters: {'num_leaves': 86}. Best is trial 7 with value: 1.4757886134419414e-07.[0m
num_leaves, val_score: 0.000000:  35%|###5      | 7/20 [01:28<02:44, 12.65s/it]

[LightGBM] [Info] Number of positive: 6773, number of negative: 6724
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 731072
[LightGBM] [Info] Number of data points in the train set: 13497, number of used features: 4721
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501815 -> initscore=0.007261
[LightGBM] [Info] Start training from score 0.007261


num_leaves, val_score: 0.000000:  40%|####      | 8/20 [01:40<02:30, 12.50s/it][32m[I 2021-01-25 11:47:26,657][0m Trial 14 finished with value: 1.4757886134419414e-07 and parameters: {'num_leaves': 159}. Best is trial 7 with value: 1.4757886134419414e-07.[0m
num_leaves, val_score: 0.000000:  40%|####      | 8/20 [01:40<02:30, 12.50s/it]

[LightGBM] [Info] Number of positive: 6773, number of negative: 6724
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 731072
[LightGBM] [Info] Number of data points in the train set: 13497, number of used features: 4721
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501815 -> initscore=0.007261
[LightGBM] [Info] Start training from score 0.007261


num_leaves, val_score: 0.000000:  45%|####5     | 9/20 [01:53<02:19, 12.70s/it][32m[I 2021-01-25 11:47:39,820][0m Trial 15 finished with value: 1.4757886134419414e-07 and parameters: {'num_leaves': 197}. Best is trial 7 with value: 1.4757886134419414e-07.[0m
num_leaves, val_score: 0.000000:  45%|####5     | 9/20 [01:53<02:19, 12.70s/it]

[LightGBM] [Info] Number of positive: 6773, number of negative: 6724
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 731072
[LightGBM] [Info] Number of data points in the train set: 13497, number of used features: 4721
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501815 -> initscore=0.007261
[LightGBM] [Info] Start training from score 0.007261


num_leaves, val_score: 0.000000:  50%|#####     | 10/20 [02:05<02:05, 12.57s/it]



[32m[I 2021-01-25 11:47:52,076][0m Trial 16 finished with value: 1.4757886134419414e-07 and parameters: {'num_leaves': 54}. Best is trial 7 with value: 1.4757886134419414e-07.[0m
num_leaves, val_score: 0.000000:  50%|#####     | 10/20 [02:05<02:05, 12.57s/it]

[LightGBM] [Info] Number of positive: 6773, number of negative: 6724
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 731072
[LightGBM] [Info] Number of data points in the train set: 13497, number of used features: 4721
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501815 -> initscore=0.007261
[LightGBM] [Info] Start training from score 0.007261


num_leaves, val_score: 0.000000:  55%|#####5    | 11/20 [02:17<01:51, 12.39s/it][32m[I 2021-01-25 11:48:04,052][0m Trial 17 finished with value: 1.4757886134419411e-07 and parameters: {'num_leaves': 53}. Best is trial 17 with value: 1.4757886134419411e-07.[0m
num_leaves, val_score: 0.000000:  55%|#####5    | 11/20 [02:17<01:51, 12.39s/it]

[LightGBM] [Info] Number of positive: 6773, number of negative: 6724
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 731072
[LightGBM] [Info] Number of data points in the train set: 13497, number of used features: 4721
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501815 -> initscore=0.007261
[LightGBM] [Info] Start training from score 0.007261


num_leaves, val_score: 0.000000:  60%|######    | 12/20 [02:29<01:38, 12.27s/it][32m[I 2021-01-25 11:48:16,039][0m Trial 18 finished with value: 1.4757886134419414e-07 and parameters: {'num_leaves': 56}. Best is trial 17 with value: 1.4757886134419411e-07.[0m
num_leaves, val_score: 0.000000:  60%|######    | 12/20 [02:29<01:38, 12.27s/it]

[LightGBM] [Info] Number of positive: 6773, number of negative: 6724
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 731072
[LightGBM] [Info] Number of data points in the train set: 13497, number of used features: 4721
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501815 -> initscore=0.007261
[LightGBM] [Info] Start training from score 0.007261

num_leaves, val_score: 0.000000:  65%|######5   | 13/20 [02:43<01:28, 12.65s/it][32m[I 2021-01-25 11:48:29,582][0m Trial 19 finished with value: 1.4757886134419414e-07 and parameters: {'num_leaves': 195}. Best is trial 17 with value: 1.4757886134419411e-07.[0m
num_leaves, val_score: 0.000000:  65%|######5   | 13/20 [02:43<01:28, 12.65s/it]

No further splits with positive gain, best gain: -inf
[LightGBM] [Info] Number of positive: 6773, number of negative: 6724
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 731072
[LightGBM] [Info] Number of data points in the train set: 13497, number of used features: 4721
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501815 -> initscore=0.007261
[LightGBM] [Info] Start training from score 0.007261


num_leaves, val_score: 0.000000:  70%|#######   | 14/20 [02:55<01:15, 12.53s/it][32m[I 2021-01-25 11:48:41,824][0m Trial 20 finished with value: 1.4757886134419411e-07 and parameters: {'num_leaves': 58}. Best is trial 17 with value: 1.4757886134419411e-07.[0m
num_leaves, val_score: 0.000000:  70%|#######   | 14/20 [02:55<01:15, 12.53s/it]

[LightGBM] [Info] Number of positive: 6773, number of negative: 6724
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 731072
[LightGBM] [Info] Number of data points in the train set: 13497, number of used features: 4721
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501815 -> initscore=0.007261
[LightGBM] [Info] Start training from score 0.007261


num_leaves, val_score: 0.000000:  75%|#######5  | 15/20 [03:07<01:02, 12.40s/it][32m[I 2021-01-25 11:48:53,931][0m Trial 21 finished with value: 1.4757886134419414e-07 and parameters: {'num_leaves': 48}. Best is trial 17 with value: 1.4757886134419411e-07.[0m
num_leaves, val_score: 0.000000:  75%|#######5  | 15/20 [03:07<01:02, 12.40s/it]

[LightGBM] [Info] Number of positive: 6773, number of negative: 6724
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 731072
[LightGBM] [Info] Number of data points in the train set: 13497, number of used features: 4721
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501815 -> initscore=0.007261
[LightGBM] [Info] Start training from score 0.007261


num_leaves, val_score: 0.000000:  80%|########  | 16/20 [03:19<00:49, 12.35s/it][32m[I 2021-01-25 11:49:06,163][0m Trial 22 finished with value: 1.4757886134419414e-07 and parameters: {'num_leaves': 109}. Best is trial 17 with value: 1.4757886134419411e-07.[0m
num_leaves, val_score: 0.000000:  80%|########  | 16/20 [03:20<00:49, 12.35s/it]

[LightGBM] [Info] Number of positive: 6773, number of negative: 6724
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 731072
[LightGBM] [Info] Number of data points in the train set: 13497, number of used features: 4721
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501815 -> initscore=0.007261
[LightGBM] [Info] Start training from score 0.007261


num_leaves, val_score: 0.000000:  85%|########5 | 17/20 [03:32<00:36, 12.30s/it][32m[I 2021-01-25 11:49:18,361][0m Trial 23 finished with value: 1.4757886134419414e-07 and parameters: {'num_leaves': 29}. Best is trial 17 with value: 1.4757886134419411e-07.[0m
num_leaves, val_score: 0.000000:  85%|########5 | 17/20 [03:32<00:36, 12.30s/it]

[LightGBM] [Info] Number of positive: 6773, number of negative: 6724
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 731072
[LightGBM] [Info] Number of data points in the train set: 13497, number of used features: 4721
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501815 -> initscore=0.007261
[LightGBM] [Info] Start training from score 0.007261


num_leaves, val_score: 0.000000:  90%|######### | 18/20 [03:44<00:24, 12.25s/it][32m[I 2021-01-25 11:49:30,480][0m Trial 24 finished with value: 1.4757886134419414e-07 and parameters: {'num_leaves': 95}. Best is trial 17 with value: 1.4757886134419411e-07.[0m
num_leaves, val_score: 0.000000:  90%|######### | 18/20 [03:44<00:24, 12.25s/it]

[LightGBM] [Info] Number of positive: 6773, number of negative: 6724
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 731072
[LightGBM] [Info] Number of data points in the train set: 13497, number of used features: 4721
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501815 -> initscore=0.007261
[LightGBM] [Info] Start training from score 0.007261


num_leaves, val_score: 0.000000:  95%|#########5| 19/20 [03:56<00:12, 12.24s/it][32m[I 2021-01-25 11:49:42,678][0m Trial 25 finished with value: 1.4757886134419411e-07 and parameters: {'num_leaves': 77}. Best is trial 17 with value: 1.4757886134419411e-07.[0m
num_leaves, val_score: 0.000000:  95%|#########5| 19/20 [03:56<00:12, 12.24s/it]

[LightGBM] [Info] Number of positive: 6773, number of negative: 6724
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 731072
[LightGBM] [Info] Number of data points in the train set: 13497, number of used features: 4721
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501815 -> initscore=0.007261
[LightGBM] [Info] Start training from score 0.007261


num_leaves, val_score: 0.000000: 100%|##########| 20/20 [04:08<00:00, 12.21s/it][32m[I 2021-01-25 11:49:54,842][0m Trial 26 finished with value: 1.4757886134419414e-07 and parameters: {'num_leaves': 83}. Best is trial 17 with value: 1.4757886134419411e-07.[0m
num_leaves, val_score: 0.000000: 100%|##########| 20/20 [04:08<00:00, 12.43s/it]
bagging, val_score: 0.000000:   0%|          | 0/10 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 6773, number of negative: 6724
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 731072
[LightGBM] [Info] Number of data points in the train set: 13497, number of used features: 4721
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501815 -> initscore=0.007261
[LightGBM] [Info] Start training from score 0.007261


bagging, val_score: 0.000000:  10%|#         | 1/10 [00:11<01:47, 11.91s/it][32m[I 2021-01-25 11:50:06,767][0m Trial 27 finished with value: 1.6309986731332756e-07 and parameters: {'bagging_fraction': 0.8368325116588962, 'bagging_freq': 7}. Best is trial 27 with value: 1.6309986731332756e-07.[0m
bagging, val_score: 0.000000:  10%|#         | 1/10 [00:11<01:47, 11.91s/it]

[LightGBM] [Info] Number of positive: 6773, number of negative: 6724
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 731072
[LightGBM] [Info] Number of data points in the train set: 13497, number of used features: 4721
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501815 -> initscore=0.007261
[LightGBM] [Info] Start training from score 0.007261


bagging, val_score: 0.000000:  20%|##        | 2/10 [00:23<01:33, 11.74s/it][32m[I 2021-01-25 11:50:18,122][0m Trial 28 finished with value: 3.629845126232672e-07 and parameters: {'bagging_fraction': 0.40072951406360274, 'bagging_freq': 1}. Best is trial 27 with value: 1.6309986731332756e-07.[0m
bagging, val_score: 0.000000:  20%|##        | 2/10 [00:23<01:33, 11.74s/it]

[LightGBM] [Info] Number of positive: 6773, number of negative: 6724
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 731072
[LightGBM] [Info] Number of data points in the train set: 13497, number of used features: 4721
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501815 -> initscore=0.007261
[LightGBM] [Info] Start training from score 0.007261


bagging, val_score: 0.000000:  30%|###       | 3/10 [00:34<01:21, 11.66s/it][32m[I 2021-01-25 11:50:29,578][0m Trial 29 finished with value: 3.284419607433499e-07 and parameters: {'bagging_fraction': 0.45030432948119226, 'bagging_freq': 4}. Best is trial 27 with value: 1.6309986731332756e-07.[0m
bagging, val_score: 0.000000:  30%|###       | 3/10 [00:34<01:21, 11.66s/it]

[LightGBM] [Info] Number of positive: 6773, number of negative: 6724
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 731072
[LightGBM] [Info] Number of data points in the train set: 13497, number of used features: 4721
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501815 -> initscore=0.007261
[LightGBM] [Info] Start training from score 0.007261


bagging, val_score: 0.000000:  40%|####      | 4/10 [00:47<01:11, 11.85s/it][32m[I 2021-01-25 11:50:41,881][0m Trial 30 finished with value: 1.4757886134419414e-07 and parameters: {'bagging_fraction': 0.9816185031053948, 'bagging_freq': 7}. Best is trial 30 with value: 1.4757886134419414e-07.[0m
bagging, val_score: 0.000000:  40%|####      | 4/10 [00:47<01:11, 11.85s/it]

[LightGBM] [Info] Number of positive: 6773, number of negative: 6724
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 731072
[LightGBM] [Info] Number of data points in the train set: 13497, number of used features: 4721
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501815 -> initscore=0.007261
[LightGBM] [Info] Start training from score 0.007261


bagging, val_score: 0.000000:  50%|#####     | 5/10 [00:58<00:59, 11.84s/it][32m[I 2021-01-25 11:50:53,700][0m Trial 31 finished with value: 2.201610892182512e-07 and parameters: {'bagging_fraction': 0.6170629731853758, 'bagging_freq': 1}. Best is trial 30 with value: 1.4757886134419414e-07.[0m
bagging, val_score: 0.000000:  50%|#####     | 5/10 [00:58<00:59, 11.84s/it]

[LightGBM] [Info] Number of positive: 6773, number of negative: 6724
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 731072
[LightGBM] [Info] Number of data points in the train set: 13497, number of used features: 4721
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501815 -> initscore=0.007261
[LightGBM] [Info] Start training from score 0.007261


bagging, val_score: 0.000000:  60%|######    | 6/10 [01:10<00:47, 11.90s/it][32m[I 2021-01-25 11:51:05,732][0m Trial 32 finished with value: 2.201617972664476e-07 and parameters: {'bagging_fraction': 0.6555800151882399, 'bagging_freq': 4}. Best is trial 30 with value: 1.4757886134419414e-07.[0m
bagging, val_score: 0.000000:  60%|######    | 6/10 [01:10<00:47, 11.90s/it]

[LightGBM] [Info] Number of positive: 6773, number of negative: 6724
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 731072
[LightGBM] [Info] Number of data points in the train set: 13497, number of used features: 4721
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501815 -> initscore=0.007261
[LightGBM] [Info] Start training from score 0.007261


bagging, val_score: 0.000000:  70%|#######   | 7/10 [01:24<00:37, 12.42s/it][32m[I 2021-01-25 11:51:19,371][0m Trial 33 finished with value: 1.4757886134419414e-07 and parameters: {'bagging_fraction': 0.9915042036265491, 'bagging_freq': 6}. Best is trial 30 with value: 1.4757886134419414e-07.[0m
bagging, val_score: 0.000000:  70%|#######   | 7/10 [01:24<00:37, 12.42s/it]



KeyboardInterrupt: 