In [1]:
import numpy as np
import pandas as pd
import gc
import time
import lightgbm as lgb
import matplotlib.pyplot as plt
from bayes_opt import BayesianOptimization

In [2]:
df = pd.read_pickle("/Users/chienan/Pycon/github/SportLottery/input/FEATURE_AGG_2012TO2019.pkl")

In [3]:
[col for col in df.columns if "LAST_" not in col]

['GAME_ID',
 'TEAM_ID_HOME',
 'TEAM_ID_AWAY',
 'PLUS_MINUS',
 'GAME_DATE',
 'TEAM_ID_H',
 'TEAM_ID_A']

In [4]:
len([col for col in df.columns if "_H" in col])

362

In [5]:
len([col for col in df.columns if ("_A" in col) & ("_H" not in col)])

362

In [6]:
set(df.columns) - set([col for col in df.columns if "_H" in col]) - set([col for col in df.columns if ("_A" in col) & ("_H" not in col)])

{'GAME_DATE', 'GAME_ID', 'PLUS_MINUS'}

In [7]:
df["TEAM_ID_AWAY"] = df["TEAM_ID_AWAY"].astype("category")
df["TEAM_ID_HOME"] = df["TEAM_ID_HOME"].astype("category")

In [8]:
np.percentile(df.loc[:, "PLUS_MINUS"].values, [15, 50, 75]), np.mean(df.loc[:, "PLUS_MINUS"].values)

(array([-11.,   4.,  11.]), 2.690338713343942)

In [9]:
column = ["_".join(col.split("_")[2:-1]) for col in df.columns if ("LAST_1_" in col)&("_H" in col)]

In [None]:
# 修改極大或極小
df.loc[df[df.loc[:,"PLUS_MINUS"]>=11].index,"PLUS_MINUS"] = 11
df.loc[df[df.loc[:,"PLUS_MINUS"]<=-11].index,"PLUS_MINUS"] = -11

In [10]:
new_col = []
for col_i in column:
    new_col += [col for col in df.columns if col_i in col]

In [11]:
new_col = list(set(df.columns)-set(new_col))+list(new_col)

In [12]:
len(set(new_col))

727

In [13]:
df = df[new_col]

In [14]:
df.head()

Unnamed: 0,PLUS_MINUS,TEAM_ID_AWAY,TEAM_ID_A,GAME_ID,GAME_DATE,TEAM_ID_H,TEAM_ID_HOME,LAST_1_GAME_FGM_H,LAST_2_GAME_FGM_H,LAST_3_GAME_FGM_H,...,LAST_1_GAME_PLAYER_PIE_A,LAST_2_GAME_PLAYER_PIE_A,LAST_3_GAME_PLAYER_PIE_A,LAST_4_GAME_PLAYER_PIE_A,LAST_5_GAME_PLAYER_PIE_A,LAST_6_GAME_PLAYER_PIE_A,LAST_7_GAME_PLAYER_PIE_A,LAST_8_GAME_PLAYER_PIE_A,LAST_9_GAME_PLAYER_PIE_A,LAST_10_GAME_PLAYER_PIE_A
0,-39.0,1610612741,1610612741,21100222,2012-01-20,1610612739,1610612739,35.0,37.0,35.333333,...,0.099561,0.098817,0.088188,0.102924,0.100449,0.108826,0.090536,0.088891,0.089388,0.085876
1,15.0,1610612751,1610612751,21100244,2012-01-23,1610612741,1610612741,40.0,43.0,44.333333,...,0.090257,0.09135,0.081796,0.077719,0.077584,0.07804,0.074292,0.07265,0.073911,0.069259
2,12.0,1610612758,1610612758,21100250,2012-01-23,1610612757,1610612757,34.0,33.5,34.0,...,0.091012,0.083085,0.07305,0.06063,0.063579,0.061998,0.064544,0.067435,0.067592,0.065412
3,7.0,1610612739,1610612739,21100254,2012-01-24,1610612748,1610612748,25.0,34.5,35.666667,...,0.044851,0.044384,0.059031,0.06761,0.071996,0.067467,0.069797,0.076624,0.075287,0.078071
4,-3.0,1610612761,1610612761,21100255,2012-01-24,1610612756,1610612756,31.0,31.0,32.666667,...,0.105249,0.091096,0.09999,0.089473,0.09912,0.101186,0.099608,0.100829,0.093281,0.09067


In [15]:
df_train = df[df.GAME_ID<"0021700000"].reset_index(drop=True)
df_val = df[(df.GAME_ID<"0021800000")&(df.GAME_ID>"0021700000")].reset_index(drop=True)

In [16]:
x_train = df_train.loc[:, [col for col in df_train.columns if (col not in ["GAME_ID","GAME_DATE","PLUS_MINUS"])&
                          ("TEAM_ID" not in col)]]
x_val = df_val.loc[:, [col for col in df_val.columns if (col not in ["GAME_ID","GAME_DATE","PLUS_MINUS"])&
                        ("TEAM_ID" not in col)]]

In [17]:
y_train = df_train.loc[:, "PLUS_MINUS"]
y_val = df_val.loc[:, "PLUS_MINUS"]

In [18]:
train_dataset = lgb.Dataset(x_train, y_train)
test_dataset = lgb.Dataset(x_val, y_val)

In [19]:
def lgb_eval(learning_rate, num_leaves, feature_fraction, bagging_fraction, max_depth, lambda_l1, lambda_l2, min_split_gain, min_child_weight):
    evals_result = {}
    params = {'application':'regression_l2','num_iterations': 1000, 'early_stopping_round':100, 'metric':'l2'}
    
    params["num_leaves"] = int(round(num_leaves))
    params["learning_rate"] = max(learning_rate, 0)
    params['feature_fraction'] = max(min(feature_fraction, 1), 0)
    params['bagging_fraction'] = max(min(bagging_fraction, 1), 0)
    params['max_depth'] = int(round(max_depth))
    params['lambda_l1'] = max(lambda_l1, 0)
    params['lambda_l2'] = max(lambda_l2, 0)
    params['min_split_gain'] = min_split_gain
    params['min_child_weight'] = min_child_weight
    result = lgb.train(train_set=train_dataset,valid_sets=[train_dataset, test_dataset] ,evals_result=evals_result,
                       params=params,verbose_eval =200)
    return -min(evals_result["valid_1"]["l2"])

lgbBO = BayesianOptimization(lgb_eval, {'learning_rate': (0.001, 0.1),
                                        'num_leaves': (24, 45),
                                        'feature_fraction': (0.1, 0.9),
                                        'bagging_fraction': (0.5, 1),
                                        'max_depth': (4, 9),
                                        'lambda_l1': (0, 5),
                                        'lambda_l2': (0, 3),
                                        'min_split_gain': (0.001, 0.1),
                                        'min_child_weight': (5, 50)}, random_state=7)

In [20]:
%%time
lgbBO.maximize(init_points=25, n_iter=75)

|   iter    |  target   | baggin... | featur... | lambda_l1 | lambda_l2 | learni... | max_depth | min_ch... | min_sp... | num_le... |
-------------------------------------------------------------------------------------------------------------------------------------




Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[31]	training's l2: 107.093	valid_1's l2: 168.675
| [0m 1       [0m | [0m-168.7   [0m | [0m 0.5382  [0m | [0m 0.7239  [0m | [0m 2.192   [0m | [0m 2.17    [0m | [0m 0.09782 [0m | [0m 6.692   [0m | [0m 27.55   [0m | [0m 0.008133[0m | [0m 29.64   [0m |
Training until validation scores don't improve for 100 rounds.
[200]	training's l2: 131.685	valid_1's l2: 169.483
[400]	training's l2: 117.074	valid_1's l2: 167.862
Early stopping, best iteration is:
[424]	training's l2: 115.963	valid_1's l2: 167.777
| [95m 2       [0m | [95m-167.8   [0m | [95m 0.7499  [0m | [95m 0.6434  [0m | [95m 4.019   [0m | [95m 1.143   [0m | [95m 0.007528[0m | [95m 5.441   [0m | [95m 45.93   [0m | [95m 0.02213 [0m | [95m 33.49   [0m |
Training until validation scores don't improve for 100 rounds.
[200]	training's l2: 96.0262	valid_1's l2: 167.493
Early stopping, best iteration i

Early stopping, best iteration is:
[160]	training's l2: 99.3094	valid_1's l2: 167.151
| [0m 21      [0m | [0m-167.2   [0m | [0m 0.7675  [0m | [0m 0.3536  [0m | [0m 3.685   [0m | [0m 0.4806  [0m | [0m 0.02006 [0m | [0m 5.773   [0m | [0m 22.03   [0m | [0m 0.02142 [0m | [0m 43.29   [0m |
Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[70]	training's l2: 117.377	valid_1's l2: 165.672
| [0m 22      [0m | [0m-165.7   [0m | [0m 0.914   [0m | [0m 0.1855  [0m | [0m 1.847   [0m | [0m 0.698   [0m | [0m 0.04566 [0m | [0m 5.382   [0m | [0m 27.58   [0m | [0m 0.09234 [0m | [0m 32.03   [0m |
Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[21]	training's l2: 121.192	valid_1's l2: 170.254
| [0m 23      [0m | [0m-170.3   [0m | [0m 0.8251  [0m | [0m 0.5765  [0m | [0m 3.76    [0m | [0m 0.185   [0m | [0m 0.07474 [0m | [0m 8.731   [0m | [0m 32.16  



Training until validation scores don't improve for 100 rounds.
[200]	training's l2: 110.096	valid_1's l2: 166.861
Early stopping, best iteration is:
[207]	training's l2: 109.33	valid_1's l2: 166.83
| [0m 26      [0m | [0m-166.8   [0m | [0m 0.7668  [0m | [0m 0.1076  [0m | [0m 0.01577 [0m | [0m 1.369   [0m | [0m 0.02235 [0m | [0m 4.623   [0m | [0m 6.57    [0m | [0m 0.007741[0m | [0m 38.76   [0m |




Training until validation scores don't improve for 100 rounds.
[200]	training's l2: 163.975	valid_1's l2: 178.177
[400]	training's l2: 153.162	valid_1's l2: 173.119
[600]	training's l2: 145.119	valid_1's l2: 170.148
[800]	training's l2: 138.807	valid_1's l2: 168.395
[1000]	training's l2: 133.622	valid_1's l2: 167.316
Did not meet early stopping. Best iteration is:
[1000]	training's l2: 133.622	valid_1's l2: 167.316
| [0m 27      [0m | [0m-167.3   [0m | [0m 0.9629  [0m | [0m 0.1086  [0m | [0m 0.04122 [0m | [0m 1.064   [0m | [0m 0.001606[0m | [0m 4.83    [0m | [0m 5.898   [0m | [0m 0.06683 [0m | [0m 38.91   [0m |




Training until validation scores don't improve for 100 rounds.
[200]	training's l2: 111.072	valid_1's l2: 168.129
[400]	training's l2: 94.1287	valid_1's l2: 167.865
Early stopping, best iteration is:
[382]	training's l2: 95.7197	valid_1's l2: 167.7
| [0m 28      [0m | [0m-167.7   [0m | [0m 0.7772  [0m | [0m 0.5933  [0m | [0m 0.2853  [0m | [0m 1.421   [0m | [0m 0.0171  [0m | [0m 4.812   [0m | [0m 6.177   [0m | [0m 0.07857 [0m | [0m 38.67   [0m |




Training until validation scores don't improve for 100 rounds.
[200]	training's l2: 85.866	valid_1's l2: 167.097
Early stopping, best iteration is:
[129]	training's l2: 100.324	valid_1's l2: 165.988
| [0m 29      [0m | [0m-166.0   [0m | [0m 0.8871  [0m | [0m 0.1497  [0m | [0m 2.794   [0m | [0m 2.785   [0m | [0m 0.03066 [0m | [0m 6.658   [0m | [0m 45.76   [0m | [0m 0.02014 [0m | [0m 34.96   [0m |




Training until validation scores don't improve for 100 rounds.
[200]	training's l2: 90.4128	valid_1's l2: 168.833
Early stopping, best iteration is:
[123]	training's l2: 104.391	valid_1's l2: 168.184
| [0m 30      [0m | [0m-168.2   [0m | [0m 0.6153  [0m | [0m 0.411   [0m | [0m 1.66    [0m | [0m 0.8049  [0m | [0m 0.02855 [0m | [0m 5.827   [0m | [0m 27.93   [0m | [0m 0.04505 [0m | [0m 31.88   [0m |




Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[46]	training's l2: 95.7198	valid_1's l2: 169.236
| [0m 31      [0m | [0m-169.2   [0m | [0m 0.8036  [0m | [0m 0.3077  [0m | [0m 4.426   [0m | [0m 2.347   [0m | [0m 0.07415 [0m | [0m 8.071   [0m | [0m 9.356   [0m | [0m 0.04301 [0m | [0m 38.86   [0m |




Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[48]	training's l2: 108.059	valid_1's l2: 168.42
| [0m 32      [0m | [0m-168.4   [0m | [0m 0.7619  [0m | [0m 0.3426  [0m | [0m 1.101   [0m | [0m 2.092   [0m | [0m 0.06825 [0m | [0m 6.07    [0m | [0m 40.73   [0m | [0m 0.005085[0m | [0m 34.52   [0m |




Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[57]	training's l2: 106.854	valid_1's l2: 166.488
| [0m 33      [0m | [0m-166.5   [0m | [0m 0.8409  [0m | [0m 0.1     [0m | [0m 1.844   [0m | [0m 2.447   [0m | [0m 0.0528  [0m | [0m 6.751   [0m | [0m 37.81   [0m | [0m 0.08223 [0m | [0m 39.81   [0m |




Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[83]	training's l2: 113.446	valid_1's l2: 164.954
| [95m 34      [0m | [95m-165.0   [0m | [95m 1.0     [0m | [95m 0.1063  [0m | [95m 1.912   [0m | [95m 0.6611  [0m | [95m 0.05151 [0m | [95m 5.229   [0m | [95m 27.46   [0m | [95m 0.1     [0m | [95m 32.08   [0m |




Training until validation scores don't improve for 100 rounds.
[200]	training's l2: 126.651	valid_1's l2: 165.6
[400]	training's l2: 110.118	valid_1's l2: 164.765
Early stopping, best iteration is:
[331]	training's l2: 114.422	valid_1's l2: 164.616
| [95m 35      [0m | [95m-164.6   [0m | [95m 0.8706  [0m | [95m 0.1     [0m | [95m 0.000199[0m | [95m 1.083   [0m | [95m 0.01095 [0m | [95m 4.673   [0m | [95m 6.177   [0m | [95m 0.03504 [0m | [95m 38.48   [0m |




Training until validation scores don't improve for 100 rounds.
[200]	training's l2: 131.266	valid_1's l2: 166.186
[400]	training's l2: 114.765	valid_1's l2: 165.246
Early stopping, best iteration is:
[369]	training's l2: 116.572	valid_1's l2: 165.083
| [0m 36      [0m | [0m-165.1   [0m | [0m 0.8924  [0m | [0m 0.1     [0m | [0m 0.07497 [0m | [0m 1.034   [0m | [0m 0.008995[0m | [0m 4.729   [0m | [0m 5.992   [0m | [0m 0.05923 [0m | [0m 38.36   [0m |




Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[72]	training's l2: 110.701	valid_1's l2: 167.962
| [0m 37      [0m | [0m-168.0   [0m | [0m 1.0     [0m | [0m 0.3273  [0m | [0m 1.884   [0m | [0m 0.6676  [0m | [0m 0.05666 [0m | [0m 5.082   [0m | [0m 27.41   [0m | [0m 0.1     [0m | [0m 32.13   [0m |




Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[68]	training's l2: 116.974	valid_1's l2: 166.625
| [0m 38      [0m | [0m-166.6   [0m | [0m 0.7245  [0m | [0m 0.2466  [0m | [0m 0.5589  [0m | [0m 2.13    [0m | [0m 0.04368 [0m | [0m 7.606   [0m | [0m 47.88   [0m | [0m 0.05543 [0m | [0m 25.17   [0m |




Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[37]	training's l2: 124.36	valid_1's l2: 165.739
| [0m 39      [0m | [0m-165.7   [0m | [0m 0.8994  [0m | [0m 0.1     [0m | [0m 0.1705  [0m | [0m 1.085   [0m | [0m 0.06787 [0m | [0m 4.67    [0m | [0m 6.131   [0m | [0m 0.02581 [0m | [0m 38.51   [0m |




Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[68]	training's l2: 117.293	valid_1's l2: 165.451
| [0m 40      [0m | [0m-165.5   [0m | [0m 0.659   [0m | [0m 0.1     [0m | [0m 0.5594  [0m | [0m 2.174   [0m | [0m 0.04923 [0m | [0m 7.278   [0m | [0m 47.96   [0m | [0m 0.08008 [0m | [0m 25.05   [0m |




Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[26]	training's l2: 125.426	valid_1's l2: 166.565
| [0m 41      [0m | [0m-166.6   [0m | [0m 0.7983  [0m | [0m 0.2131  [0m | [0m 1.664   [0m | [0m 2.239   [0m | [0m 0.05752 [0m | [0m 6.916   [0m | [0m 37.73   [0m | [0m 0.06848 [0m | [0m 39.7    [0m |




Training until validation scores don't improve for 100 rounds.
[200]	training's l2: 88.0727	valid_1's l2: 167.049
Early stopping, best iteration is:
[147]	training's l2: 99.2993	valid_1's l2: 166.316
| [0m 42      [0m | [0m-166.3   [0m | [0m 0.9217  [0m | [0m 0.1112  [0m | [0m 2.659   [0m | [0m 2.834   [0m | [0m 0.02927 [0m | [0m 6.877   [0m | [0m 45.73   [0m | [0m 0.01606 [0m | [0m 34.84   [0m |




Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[85]	training's l2: 114.966	valid_1's l2: 165.84
| [0m 43      [0m | [0m-165.8   [0m | [0m 0.8354  [0m | [0m 0.1471  [0m | [0m 2.816   [0m | [0m 2.656   [0m | [0m 0.02947 [0m | [0m 6.844   [0m | [0m 45.58   [0m | [0m 0.02928 [0m | [0m 35.06   [0m |




Training until validation scores don't improve for 100 rounds.
[200]	training's l2: 121.579	valid_1's l2: 165.976
Early stopping, best iteration is:
[299]	training's l2: 109.14	valid_1's l2: 165.348
| [0m 44      [0m | [0m-165.3   [0m | [0m 0.9668  [0m | [0m 0.1     [0m | [0m 2.807   [0m | [0m 2.861   [0m | [0m 0.01066 [0m | [0m 6.719   [0m | [0m 45.47   [0m | [0m 0.01269 [0m | [0m 35.02   [0m |




Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[79]	training's l2: 110.427	valid_1's l2: 165.645
| [0m 45      [0m | [0m-165.6   [0m | [0m 0.6516  [0m | [0m 0.1199  [0m | [0m 0.4863  [0m | [0m 2.016   [0m | [0m 0.05305 [0m | [0m 7.305   [0m | [0m 48.12   [0m | [0m 0.07588 [0m | [0m 24.93   [0m |




Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[41]	training's l2: 111.385	valid_1's l2: 170.019
| [0m 46      [0m | [0m-170.0   [0m | [0m 0.607   [0m | [0m 0.291   [0m | [0m 0.4526  [0m | [0m 2.035   [0m | [0m 0.09165 [0m | [0m 6.778   [0m | [0m 47.86   [0m | [0m 0.004769[0m | [0m 25.1    [0m |




Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[48]	training's l2: 118.194	valid_1's l2: 167.54
| [0m 47      [0m | [0m-167.5   [0m | [0m 0.9269  [0m | [0m 0.2547  [0m | [0m 1.866   [0m | [0m 0.8684  [0m | [0m 0.05936 [0m | [0m 5.413   [0m | [0m 27.78   [0m | [0m 0.04498 [0m | [0m 32.6    [0m |




Training until validation scores don't improve for 100 rounds.
[200]	training's l2: 107.808	valid_1's l2: 168.242
Early stopping, best iteration is:
[161]	training's l2: 114.366	valid_1's l2: 167.847
| [0m 48      [0m | [0m-167.8   [0m | [0m 0.9188  [0m | [0m 0.3204  [0m | [0m 0.6144  [0m | [0m 2.214   [0m | [0m 0.01879 [0m | [0m 7.225   [0m | [0m 8.201   [0m | [0m 0.00743 [0m | [0m 25.32   [0m |




Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[49]	training's l2: 104.713	valid_1's l2: 168.82
| [0m 49      [0m | [0m-168.8   [0m | [0m 0.7752  [0m | [0m 0.2626  [0m | [0m 1.892   [0m | [0m 2.333   [0m | [0m 0.05693 [0m | [0m 6.971   [0m | [0m 38.06   [0m | [0m 0.07032 [0m | [0m 39.76   [0m |




Training until validation scores don't improve for 100 rounds.
[200]	training's l2: 109.37	valid_1's l2: 166.447
Early stopping, best iteration is:
[250]	training's l2: 101.985	valid_1's l2: 166.024
| [0m 50      [0m | [0m-166.0   [0m | [0m 0.8246  [0m | [0m 0.3549  [0m | [0m 0.574   [0m | [0m 2.219   [0m | [0m 0.01825 [0m | [0m 7.596   [0m | [0m 48.1    [0m | [0m 0.08176 [0m | [0m 25.2    [0m |




Training until validation scores don't improve for 100 rounds.
[200]	training's l2: 112.022	valid_1's l2: 166.429
Early stopping, best iteration is:
[197]	training's l2: 112.35	valid_1's l2: 166.326
| [0m 51      [0m | [0m-166.3   [0m | [0m 0.8462  [0m | [0m 0.1335  [0m | [0m 2.125   [0m | [0m 0.6167  [0m | [0m 0.02126 [0m | [0m 5.392   [0m | [0m 27.73   [0m | [0m 0.04934 [0m | [0m 31.9    [0m |




Training until validation scores don't improve for 100 rounds.
[200]	training's l2: 112.078	valid_1's l2: 165.971
Early stopping, best iteration is:
[150]	training's l2: 119.806	valid_1's l2: 165.625
| [0m 52      [0m | [0m-165.6   [0m | [0m 0.7214  [0m | [0m 0.1337  [0m | [0m 0.6196  [0m | [0m 2.229   [0m | [0m 0.0194  [0m | [0m 7.401   [0m | [0m 48.16   [0m | [0m 0.1     [0m | [0m 25.0    [0m |




Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[68]	training's l2: 118.114	valid_1's l2: 165.342
| [0m 53      [0m | [0m-165.3   [0m | [0m 1.0     [0m | [0m 0.1     [0m | [0m 2.007   [0m | [0m 0.6128  [0m | [0m 0.04892 [0m | [0m 5.392   [0m | [0m 27.42   [0m | [0m 0.1     [0m | [0m 32.03   [0m |




Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[53]	training's l2: 114.043	valid_1's l2: 166.414
| [0m 54      [0m | [0m-166.4   [0m | [0m 0.87    [0m | [0m 0.1     [0m | [0m 1.653   [0m | [0m 2.421   [0m | [0m 0.04627 [0m | [0m 6.791   [0m | [0m 37.5    [0m | [0m 0.08504 [0m | [0m 39.89   [0m |




Training until validation scores don't improve for 100 rounds.
[200]	training's l2: 113.654	valid_1's l2: 166.49
[400]	training's l2: 92.4116	valid_1's l2: 166.555
Early stopping, best iteration is:
[317]	training's l2: 100.074	valid_1's l2: 166.143
| [0m 55      [0m | [0m-166.1   [0m | [0m 0.8538  [0m | [0m 0.1499  [0m | [0m 2.691   [0m | [0m 2.918   [0m | [0m 0.01298 [0m | [0m 6.749   [0m | [0m 45.56   [0m | [0m 0.00182 [0m | [0m 35.18   [0m |




Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[33]	training's l2: 106.85	valid_1's l2: 166.97
| [0m 56      [0m | [0m-167.0   [0m | [0m 0.5211  [0m | [0m 0.1447  [0m | [0m 1.72    [0m | [0m 2.434   [0m | [0m 0.09982 [0m | [0m 6.482   [0m | [0m 37.58   [0m | [0m 0.02869 [0m | [0m 40.33   [0m |




Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[41]	training's l2: 122.683	valid_1's l2: 166.892
| [0m 57      [0m | [0m-166.9   [0m | [0m 0.9166  [0m | [0m 0.1199  [0m | [0m 0.08583 [0m | [0m 0.9228  [0m | [0m 0.06303 [0m | [0m 4.76    [0m | [0m 6.66    [0m | [0m 0.02716 [0m | [0m 38.13   [0m |




Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[25]	training's l2: 121.449	valid_1's l2: 168.06
| [0m 58      [0m | [0m-168.1   [0m | [0m 0.7861  [0m | [0m 0.1002  [0m | [0m 2.863   [0m | [0m 2.834   [0m | [0m 0.08877 [0m | [0m 6.699   [0m | [0m 45.45   [0m | [0m 0.09951 [0m | [0m 34.89   [0m |




Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[27]	training's l2: 125.041	valid_1's l2: 168.2
| [0m 59      [0m | [0m-168.2   [0m | [0m 0.5551  [0m | [0m 0.316   [0m | [0m 0.7594  [0m | [0m 2.529   [0m | [0m 0.07989 [0m | [0m 7.612   [0m | [0m 48.21   [0m | [0m 0.01111 [0m | [0m 25.08   [0m |




Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[39]	training's l2: 113.278	valid_1's l2: 167.13
| [0m 60      [0m | [0m-167.1   [0m | [0m 0.5214  [0m | [0m 0.1368  [0m | [0m 0.6815  [0m | [0m 2.1     [0m | [0m 0.09931 [0m | [0m 7.448   [0m | [0m 47.63   [0m | [0m 0.07321 [0m | [0m 24.95   [0m |




Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[42]	training's l2: 112.833	valid_1's l2: 166.124
| [0m 61      [0m | [0m-166.1   [0m | [0m 0.8301  [0m | [0m 0.1202  [0m | [0m 1.541   [0m | [0m 2.109   [0m | [0m 0.05815 [0m | [0m 6.769   [0m | [0m 37.69   [0m | [0m 0.1     [0m | [0m 39.93   [0m |




Training until validation scores don't improve for 100 rounds.
[200]	training's l2: 168.507	valid_1's l2: 180.787
[400]	training's l2: 159.979	valid_1's l2: 176.586
[600]	training's l2: 152.97	valid_1's l2: 173.557
[800]	training's l2: 146.999	valid_1's l2: 171.314
[1000]	training's l2: 141.87	valid_1's l2: 169.692
Did not meet early stopping. Best iteration is:
[1000]	training's l2: 141.87	valid_1's l2: 169.692
| [0m 62      [0m | [0m-169.7   [0m | [0m 1.0     [0m | [0m 0.1     [0m | [0m 2.719   [0m | [0m 2.709   [0m | [0m 0.001   [0m | [0m 6.715   [0m | [0m 45.57   [0m | [0m 0.001   [0m | [0m 35.1    [0m |




Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[99]	training's l2: 110.62	valid_1's l2: 165.415
| [0m 63      [0m | [0m-165.4   [0m | [0m 0.7077  [0m | [0m 0.1952  [0m | [0m 0.5444  [0m | [0m 2.113   [0m | [0m 0.03905 [0m | [0m 7.369   [0m | [0m 48.06   [0m | [0m 0.08373 [0m | [0m 25.04   [0m |




Training until validation scores don't improve for 100 rounds.
[200]	training's l2: 116.069	valid_1's l2: 167.388
Early stopping, best iteration is:
[263]	training's l2: 110.344	valid_1's l2: 167.13
| [0m 64      [0m | [0m-167.1   [0m | [0m 0.5117  [0m | [0m 0.6354  [0m | [0m 0.776   [0m | [0m 0.2149  [0m | [0m 0.0148  [0m | [0m 5.142   [0m | [0m 6.734   [0m | [0m 0.08337 [0m | [0m 24.58   [0m |




Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[43]	training's l2: 118.115	valid_1's l2: 166.649
| [0m 65      [0m | [0m-166.6   [0m | [0m 0.8753  [0m | [0m 0.1839  [0m | [0m 0.6314  [0m | [0m 1.624   [0m | [0m 0.07015 [0m | [0m 7.449   [0m | [0m 48.28   [0m | [0m 0.02525 [0m | [0m 25.34   [0m |




Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[90]	training's l2: 88.8652	valid_1's l2: 167.729
| [0m 66      [0m | [0m-167.7   [0m | [0m 0.5887  [0m | [0m 0.3273  [0m | [0m 2.877   [0m | [0m 2.835   [0m | [0m 0.05813 [0m | [0m 7.269   [0m | [0m 45.52   [0m | [0m 0.06183 [0m | [0m 35.28   [0m |




Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[26]	training's l2: 123.583	valid_1's l2: 169.057
| [0m 67      [0m | [0m-169.1   [0m | [0m 0.9773  [0m | [0m 0.2987  [0m | [0m 0.1871  [0m | [0m 0.8773  [0m | [0m 0.08298 [0m | [0m 4.718   [0m | [0m 6.576   [0m | [0m 0.03714 [0m | [0m 38.5    [0m |




Training until validation scores don't improve for 100 rounds.
[200]	training's l2: 103.461	valid_1's l2: 166.371
Early stopping, best iteration is:
[179]	training's l2: 106.877	valid_1's l2: 166.256
| [0m 68      [0m | [0m-166.3   [0m | [0m 0.5723  [0m | [0m 0.1308  [0m | [0m 2.904   [0m | [0m 2.997   [0m | [0m 0.01854 [0m | [0m 6.802   [0m | [0m 45.48   [0m | [0m 0.03125 [0m | [0m 35.26   [0m |




Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[57]	training's l2: 114.852	valid_1's l2: 168.458
| [0m 69      [0m | [0m-168.5   [0m | [0m 0.6567  [0m | [0m 0.4106  [0m | [0m 0.9924  [0m | [0m 1.737   [0m | [0m 0.05246 [0m | [0m 7.557   [0m | [0m 48.08   [0m | [0m 0.05618 [0m | [0m 24.93   [0m |




Training until validation scores don't improve for 100 rounds.
[200]	training's l2: 106.794	valid_1's l2: 168.012
Early stopping, best iteration is:
[112]	training's l2: 122.601	valid_1's l2: 167.336
| [0m 70      [0m | [0m-167.3   [0m | [0m 0.8945  [0m | [0m 0.2963  [0m | [0m 0.3347  [0m | [0m 2.276   [0m | [0m 0.02118 [0m | [0m 7.116   [0m | [0m 48.52   [0m | [0m 0.01049 [0m | [0m 25.22   [0m |




Training until validation scores don't improve for 100 rounds.
[200]	training's l2: 106.653	valid_1's l2: 167.853
Early stopping, best iteration is:
[232]	training's l2: 101.691	valid_1's l2: 167.404
| [0m 71      [0m | [0m-167.4   [0m | [0m 0.6289  [0m | [0m 0.8001  [0m | [0m 3.758   [0m | [0m 0.9148  [0m | [0m 0.01372 [0m | [0m 6.844   [0m | [0m 12.47   [0m | [0m 0.02255 [0m | [0m 32.19   [0m |




Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[50]	training's l2: 99.0464	valid_1's l2: 168.596
| [0m 72      [0m | [0m-168.6   [0m | [0m 0.9207  [0m | [0m 0.2435  [0m | [0m 3.213   [0m | [0m 2.443   [0m | [0m 0.08178 [0m | [0m 7.106   [0m | [0m 45.76   [0m | [0m 0.01049 [0m | [0m 35.07   [0m |




Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[36]	training's l2: 129.968	valid_1's l2: 168.539
| [0m 73      [0m | [0m-168.5   [0m | [0m 0.8225  [0m | [0m 0.2742  [0m | [0m 2.384   [0m | [0m 0.5569  [0m | [0m 0.04686 [0m | [0m 5.466   [0m | [0m 27.64   [0m | [0m 0.02338 [0m | [0m 31.64   [0m |




Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[41]	training's l2: 118.396	valid_1's l2: 168.943
| [0m 74      [0m | [0m-168.9   [0m | [0m 0.9381  [0m | [0m 0.3363  [0m | [0m 2.996   [0m | [0m 2.648   [0m | [0m 0.04959 [0m | [0m 6.686   [0m | [0m 45.04   [0m | [0m 0.007183[0m | [0m 35.14   [0m |




Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[56]	training's l2: 112.613	valid_1's l2: 166.614
| [0m 75      [0m | [0m-166.6   [0m | [0m 0.9056  [0m | [0m 0.2167  [0m | [0m 2.603   [0m | [0m 2.994   [0m | [0m 0.04691 [0m | [0m 7.236   [0m | [0m 46.0    [0m | [0m 0.06817 [0m | [0m 34.95   [0m |




Training until validation scores don't improve for 100 rounds.
[200]	training's l2: 159.796	valid_1's l2: 176.94
[400]	training's l2: 147.505	valid_1's l2: 172.294
[600]	training's l2: 138.593	valid_1's l2: 170.27
[800]	training's l2: 131.672	valid_1's l2: 169.134
[1000]	training's l2: 125.857	valid_1's l2: 168.38
Did not meet early stopping. Best iteration is:
[1000]	training's l2: 125.857	valid_1's l2: 168.38
| [0m 76      [0m | [0m-168.4   [0m | [0m 0.7913  [0m | [0m 0.4684  [0m | [0m 0.8115  [0m | [0m 1.678   [0m | [0m 0.0019  [0m | [0m 7.674   [0m | [0m 48.49   [0m | [0m 0.0773  [0m | [0m 25.59   [0m |




Training until validation scores don't improve for 100 rounds.
[200]	training's l2: 168.063	valid_1's l2: 180.68
[400]	training's l2: 159.252	valid_1's l2: 176.499
[600]	training's l2: 152.05	valid_1's l2: 173.553
[800]	training's l2: 145.933	valid_1's l2: 171.338
[1000]	training's l2: 140.666	valid_1's l2: 169.821
Did not meet early stopping. Best iteration is:
[1000]	training's l2: 140.666	valid_1's l2: 169.821
| [0m 77      [0m | [0m-169.8   [0m | [0m 0.9163  [0m | [0m 0.138   [0m | [0m 2.847   [0m | [0m 2.953   [0m | [0m 0.001   [0m | [0m 6.793   [0m | [0m 45.52   [0m | [0m 0.001   [0m | [0m 35.04   [0m |




Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[94]	training's l2: 105.458	valid_1's l2: 165.697
| [0m 78      [0m | [0m-165.7   [0m | [0m 0.8705  [0m | [0m 0.1048  [0m | [0m 2.564   [0m | [0m 2.383   [0m | [0m 0.03818 [0m | [0m 7.014   [0m | [0m 46.07   [0m | [0m 0.06456 [0m | [0m 34.51   [0m |




Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[55]	training's l2: 105.483	valid_1's l2: 169.588
| [0m 79      [0m | [0m-169.6   [0m | [0m 0.8073  [0m | [0m 0.4513  [0m | [0m 2.049   [0m | [0m 2.847   [0m | [0m 0.04362 [0m | [0m 6.887   [0m | [0m 11.14   [0m | [0m 0.09662 [0m | [0m 40.74   [0m |




Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[96]	training's l2: 112.021	valid_1's l2: 167.33
| [0m 80      [0m | [0m-167.3   [0m | [0m 0.5755  [0m | [0m 0.6237  [0m | [0m 3.515   [0m | [0m 2.433   [0m | [0m 0.03779 [0m | [0m 5.237   [0m | [0m 12.91   [0m | [0m 0.07933 [0m | [0m 27.3    [0m |




Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[45]	training's l2: 115.806	valid_1's l2: 168.501
| [0m 81      [0m | [0m-168.5   [0m | [0m 0.8382  [0m | [0m 0.59    [0m | [0m 0.2338  [0m | [0m 1.455   [0m | [0m 0.04895 [0m | [0m 8.12    [0m | [0m 28.47   [0m | [0m 0.03689 [0m | [0m 31.36   [0m |




Training until validation scores don't improve for 100 rounds.
[200]	training's l2: 86.664	valid_1's l2: 167.808
Early stopping, best iteration is:
[155]	training's l2: 94.1805	valid_1's l2: 167.628
| [0m 82      [0m | [0m-167.6   [0m | [0m 0.687   [0m | [0m 0.2474  [0m | [0m 1.994   [0m | [0m 2.547   [0m | [0m 0.0254  [0m | [0m 6.655   [0m | [0m 37.54   [0m | [0m 0.04768 [0m | [0m 40.03   [0m |




Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[38]	training's l2: 100.474	valid_1's l2: 169.968
| [0m 83      [0m | [0m-170.0   [0m | [0m 0.5927  [0m | [0m 0.4305  [0m | [0m 3.846   [0m | [0m 0.1689  [0m | [0m 0.0816  [0m | [0m 5.873   [0m | [0m 22.12   [0m | [0m 0.05996 [0m | [0m 43.22   [0m |




Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[44]	training's l2: 104.339	valid_1's l2: 167.456
| [0m 84      [0m | [0m-167.5   [0m | [0m 0.8223  [0m | [0m 0.2346  [0m | [0m 2.662   [0m | [0m 2.731   [0m | [0m 0.09017 [0m | [0m 6.481   [0m | [0m 46.05   [0m | [0m 0.05867 [0m | [0m 34.84   [0m |




Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[28]	training's l2: 134.859	valid_1's l2: 166.162
| [0m 85      [0m | [0m-166.2   [0m | [0m 0.8123  [0m | [0m 0.2002  [0m | [0m 0.1094  [0m | [0m 0.996   [0m | [0m 0.08762 [0m | [0m 4.342   [0m | [0m 5.575   [0m | [0m 0.09959 [0m | [0m 38.45   [0m |




Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[34]	training's l2: 123.316	valid_1's l2: 170.177
| [0m 86      [0m | [0m-170.2   [0m | [0m 0.8994  [0m | [0m 0.3317  [0m | [0m 0.3305  [0m | [0m 1.114   [0m | [0m 0.06343 [0m | [0m 4.77    [0m | [0m 5.629   [0m | [0m 0.04024 [0m | [0m 38.56   [0m |




Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[45]	training's l2: 133.103	valid_1's l2: 167.85
| [0m 87      [0m | [0m-167.8   [0m | [0m 0.9915  [0m | [0m 0.4521  [0m | [0m 0.004775[0m | [0m 1.019   [0m | [0m 0.05409 [0m | [0m 4.426   [0m | [0m 6.205   [0m | [0m 0.04737 [0m | [0m 38.76   [0m |




Training until validation scores don't improve for 100 rounds.
[200]	training's l2: 132.78	valid_1's l2: 168.464
[400]	training's l2: 114.311	valid_1's l2: 167.117
Early stopping, best iteration is:
[432]	training's l2: 112.173	valid_1's l2: 167.041
| [0m 88      [0m | [0m-167.0   [0m | [0m 0.9153  [0m | [0m 0.3062  [0m | [0m 0.6176  [0m | [0m 2.187   [0m | [0m 0.007796[0m | [0m 7.579   [0m | [0m 48.08   [0m | [0m 0.0801  [0m | [0m 24.61   [0m |




Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[37]	training's l2: 119.306	valid_1's l2: 168.291
| [0m 89      [0m | [0m-168.3   [0m | [0m 0.6884  [0m | [0m 0.3371  [0m | [0m 0.4452  [0m | [0m 1.812   [0m | [0m 0.07127 [0m | [0m 7.973   [0m | [0m 48.24   [0m | [0m 0.07212 [0m | [0m 24.52   [0m |




Training until validation scores don't improve for 100 rounds.
[200]	training's l2: 105.381	valid_1's l2: 167.516
Early stopping, best iteration is:
[184]	training's l2: 108	valid_1's l2: 167.09
| [0m 90      [0m | [0m-167.1   [0m | [0m 0.743   [0m | [0m 0.4704  [0m | [0m 0.4358  [0m | [0m 2.273   [0m | [0m 0.02125 [0m | [0m 7.217   [0m | [0m 48.19   [0m | [0m 0.08612 [0m | [0m 24.52   [0m |




Training until validation scores don't improve for 100 rounds.
[200]	training's l2: 106.211	valid_1's l2: 166.858
Early stopping, best iteration is:
[156]	training's l2: 112.281	valid_1's l2: 166.066
| [0m 91      [0m | [0m-166.1   [0m | [0m 0.8444  [0m | [0m 0.123   [0m | [0m 0.3173  [0m | [0m 1.299   [0m | [0m 0.02582 [0m | [0m 5.29    [0m | [0m 6.397   [0m | [0m 0.08405 [0m | [0m 31.2    [0m |




Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[29]	training's l2: 123.546	valid_1's l2: 170.795
| [0m 92      [0m | [0m-170.8   [0m | [0m 0.8335  [0m | [0m 0.8066  [0m | [0m 3.262   [0m | [0m 0.08361 [0m | [0m 0.05627 [0m | [0m 7.991   [0m | [0m 23.73   [0m | [0m 0.03389 [0m | [0m 31.07   [0m |




Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[36]	training's l2: 126.094	valid_1's l2: 167.672
| [0m 93      [0m | [0m-167.7   [0m | [0m 0.6134  [0m | [0m 0.1164  [0m | [0m 0.5244  [0m | [0m 1.257   [0m | [0m 0.06202 [0m | [0m 5.119   [0m | [0m 6.409   [0m | [0m 0.04686 [0m | [0m 31.23   [0m |




Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[92]	training's l2: 102.475	valid_1's l2: 168.48
| [0m 94      [0m | [0m-168.5   [0m | [0m 0.9115  [0m | [0m 0.6287  [0m | [0m 3.677   [0m | [0m 0.2316  [0m | [0m 0.03923 [0m | [0m 5.886   [0m | [0m 43.73   [0m | [0m 0.0265  [0m | [0m 37.09   [0m |




Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[30]	training's l2: 117.792	valid_1's l2: 168.256
| [0m 95      [0m | [0m-168.3   [0m | [0m 0.7094  [0m | [0m 0.443   [0m | [0m 0.555   [0m | [0m 1.955   [0m | [0m 0.09037 [0m | [0m 7.454   [0m | [0m 48.44   [0m | [0m 0.003101[0m | [0m 25.3    [0m |




Training until validation scores don't improve for 100 rounds.
[200]	training's l2: 92.5277	valid_1's l2: 167.308
Early stopping, best iteration is:
[126]	training's l2: 105.31	valid_1's l2: 166.66
| [0m 96      [0m | [0m-166.7   [0m | [0m 0.8201  [0m | [0m 0.2526  [0m | [0m 1.702   [0m | [0m 2.572   [0m | [0m 0.02158 [0m | [0m 6.788   [0m | [0m 36.95   [0m | [0m 0.08404 [0m | [0m 39.85   [0m |




Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[68]	training's l2: 118.997	valid_1's l2: 164.666
| [0m 97      [0m | [0m-164.7   [0m | [0m 0.6655  [0m | [0m 0.1     [0m | [0m 0.5262  [0m | [0m 2.155   [0m | [0m 0.04652 [0m | [0m 7.362   [0m | [0m 48.07   [0m | [0m 0.07954 [0m | [0m 25.0    [0m |




Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[90]	training's l2: 117.272	valid_1's l2: 165.484
| [0m 98      [0m | [0m-165.5   [0m | [0m 0.7     [0m | [0m 0.1     [0m | [0m 0.573   [0m | [0m 2.186   [0m | [0m 0.03737 [0m | [0m 7.4     [0m | [0m 48.03   [0m | [0m 0.08172 [0m | [0m 25.06   [0m |




Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[36]	training's l2: 120.258	valid_1's l2: 167.054
| [0m 99      [0m | [0m-167.1   [0m | [0m 0.5767  [0m | [0m 0.1233  [0m | [0m 0.1782  [0m | [0m 0.9062  [0m | [0m 0.08046 [0m | [0m 4.581   [0m | [0m 5.786   [0m | [0m 0.003666[0m | [0m 38.21   [0m |




Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[32]	training's l2: 118.145	valid_1's l2: 168.801
| [0m 100     [0m | [0m-168.8   [0m | [0m 0.8526  [0m | [0m 0.3161  [0m | [0m 0.2489  [0m | [0m 1.24    [0m | [0m 0.08615 [0m | [0m 4.551   [0m | [0m 5.949   [0m | [0m 0.01737 [0m | [0m 38.78   [0m |
CPU times: user 25min 47s, sys: 4min 38s, total: 30min 26s
Wall time: 9min 50s


In [23]:
 lgbBO.res[np.argmax([i["target"] for i in lgbBO.res])]

{'params': {'bagging_fraction': 0.8706187283316725,
  'feature_fraction': 0.1,
  'lambda_l1': 0.00019934555725044367,
  'lambda_l2': 1.0830676860368973,
  'learning_rate': 0.010947383797439775,
  'max_depth': 4.673263585901005,
  'min_child_weight': 6.177336281660579,
  'min_split_gain': 0.035035585082948205,
  'num_leaves': 38.47712277120209},
 'target': -164.61567653589103}

In [21]:
best_params = lgbBO.res[np.argmax([i["target"] for i in lgbBO.res])]["params"]

In [22]:
best_params

{'bagging_fraction': 0.8706187283316725,
 'feature_fraction': 0.1,
 'lambda_l1': 0.00019934555725044367,
 'lambda_l2': 1.0830676860368973,
 'learning_rate': 0.010947383797439775,
 'max_depth': 4.673263585901005,
 'min_child_weight': 6.177336281660579,
 'min_split_gain': 0.035035585082948205,
 'num_leaves': 38.47712277120209}

In [None]:
best_params["num_leaves"] = int(round(best_params["num_leaves"]))
best_params["max_depth"] = int(round(best_params["max_depth"]))

In [None]:
best_params.update({'num_iterations': 1000,
                    'early_stopping_round':100,
                    "metric": "mean_squared_error"})

In [None]:
best_params

In [None]:
import pickle

In [None]:
pickle.dump(best_params, open("/Users/chienan/Pycon/github/SportLottery/model/model_params.pkl","wb"),protocol=-1)

In [None]:
%%time 
print("start training...")
model = lgb.train(train_set=train_dataset,
                  valid_sets=[train_dataset, test_dataset] ,
                  verbose_eval=20,
                  params=best_params,
                  evals_result=evals_result)

In [None]:
# model.save_model('/Users/chienan/Pycon/github/SportLottery/model/model_agg_v1.txt')

In [None]:
lgb.plot_importance(booster=model,max_num_features=20,importance_type="split")
plt.show()

In [None]:
sum(model.predict(x_val)>0.5), sum(y_val)

In [None]:
# accuracy 
# 0.6365638766519823
sum((model.predict(x_val)>0.5).astype(int)==y_val)/len(y_val)

In [None]:
from sklearn.metrics import f1_score,precision_score,recall_score,precision_recall_curve

In [None]:
f1_score(y_pred=(model.predict(x_val)>0.5).astype(int),y_true=y_val)

In [None]:
precision_score(y_pred=(model.predict(x_val)>0.5).astype(int),y_true=y_val)

In [None]:
recall_score(y_pred=(model.predict(x_val)>0.5).astype(int),y_true=y_val)

In [None]:
precision_recall_curve(probas_pred=model.predict(x_val),y_true=y_val)

In [None]:
data_lst = [col for col in x_train.columns[model.feature_importance().argsort()][::-1] if "TEAM_ID" not in col]

In [None]:
data_lst = [col for col in x_train.columns[model.feature_importance().argsort()][::-1] if "TEAM_ID" not in col]
lst = []

while data_lst:
    add_item = list(set([data_lst[0] for i in column if (i in data_lst[0])&("_H" in data_lst[0])] + 
                        [data_lst[0] for i in column if (i in data_lst[0])&("_A" in data_lst[0])]))
    lst += add_item
    data_lst = [col for col in data_lst if "_".join(lst[-1].split("_")[3:]) != "_".join(col.split("_")[3:])]

In [None]:
x_train = df_train.loc[:,lst]
x_test = df_test.loc[:,lst]

In [None]:
%%time
print("start training...")
model_v2 = lgb.train(train_set=train_dataset, params=param)

In [None]:
lgb.plot_importance(booster=model_v2,max_num_features=20,importance_type="split")
plt.show()

In [None]:
sum((model_v2.predict(x_test)>0.4).astype(int)==y_test)/len(y_test)

In [None]:
model_v2.predict(x_test)

In [None]:
sum(y_test[model_v2.predict(x_test)>0.4])/sum((model_v2.predict(x_test)>0.4))

In [None]:
sum(model_v2.predict(x_test)[y_test.astype(bool)]>0.4)/sum(y_test)

In [None]:
sum(model_v2.predict(x_test)>0.4)

In [None]:
sum(y_test),len(y_test)