# 사용 패키지

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import lightgbm as lgb

# 데이터 로드

In [2]:
total = pd.read_csv('한국가스공사_시간별 공급량_20181231.csv', encoding='cp949')

In [3]:
total.head()

Unnamed: 0,연월일,시간,구분,공급량
0,2013-01-01,1,A,2497.129
1,2013-01-01,2,A,2363.265
2,2013-01-01,3,A,2258.505
3,2013-01-01,4,A,2243.969
4,2013-01-01,5,A,2344.105


# 전처리

In [4]:
total['구분']

0         A
1         A
2         A
3         A
4         A
         ..
368083    H
368084    H
368085    H
368086    H
368087    H
Name: 구분, Length: 368088, dtype: object

In [5]:
total['구분'].unique()

array(['A', 'B', 'C', 'D', 'E', 'G', 'H'], dtype=object)

In [6]:
# 구분 A~H string을 0~6 integar로 바꿔주기
d_map = {}
for i, d in enumerate(total['구분'].unique()):
    d_map[d] = i
total['구분'] = total['구분'].map(d_map)

In [7]:
total['구분']

0         0
1         0
2         0
3         0
4         0
         ..
368083    6
368084    6
368085    6
368086    6
368087    6
Name: 구분, Length: 368088, dtype: int64

In [8]:
# datatype를 object에서 datetime으로 바궈줌
total['연월일'] = pd.to_datetime(total['연월일'])

In [9]:
total

Unnamed: 0,연월일,시간,구분,공급량
0,2013-01-01,1,0,2497.129
1,2013-01-01,2,0,2363.265
2,2013-01-01,3,0,2258.505
3,2013-01-01,4,0,2243.969
4,2013-01-01,5,0,2344.105
...,...,...,...,...
368083,2018-12-31,20,6,681.033
368084,2018-12-31,21,6,669.961
368085,2018-12-31,22,6,657.941
368086,2018-12-31,23,6,610.953


In [10]:
#연/월/일/weekday(월화수목금토일을 숫자로) 분리
total['year'] = total['연월일'].dt.year
total['month'] = total['연월일'].dt.month
total['day'] = total['연월일'].dt.day
total['weekday'] = total['연월일'].dt.weekday

In [119]:
total.groupby([])

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x000001CC9760A460>

In [12]:
train_years = [2013,2014,2015,2016,2017]
val_years = [2018]

In [13]:
train = total[total['year'].isin(train_years)]
val = total[total['year'].isin(val_years)]

In [14]:
features = ['구분', 'month', 'day', 'weekday', '시간']
train_x = train[features]
train_y = train['공급량']

val_x = val[features]
val_y = val['공급량']

In [101]:
train_x

Unnamed: 0,구분,month,day,weekday,시간
0,0,1,1,1,1
1,0,1,1,1,2
2,0,1,1,1,3
3,0,1,1,1,4
4,0,1,1,1,5
...,...,...,...,...,...
306763,6,12,31,6,20
306764,6,12,31,6,21
306765,6,12,31,6,22
306766,6,12,31,6,23


In [16]:
train_y

0         2497.129
1         2363.265
2         2258.505
3         2243.969
4         2344.105
            ...   
306763     517.264
306764     530.896
306765     506.287
306766     470.638
306767     444.618
Name: 공급량, Length: 306768, dtype: float64

# 학습

### LGBM 주요 파라미터 
boosting : 어떤 알고리즘으로 학습할지 선택

num_boost_round : epoch 수라고 생각하면 될듯

learning_rate : 0.001 / 0.005/ 0.01등 설정 -->ANN처럼 adam optimizer가 없어서 고정값인가?? 아니 optimizer가 없내

metric : mae로 학습할건지 rmse로 학습할건지 mse로 학습할건지 기준 정해주는것

max_depth : 트리가 어디까지 뻗어나갈건지 정해주는 변수

num_leaves : 복잡성을 담당하는 parameter, 2^(max_depth)보다 작아야함.

정확도 <-------------> 과적합

큰 max_bin<----> 작은 max_bin

큰 num_leaves <------>작은 num_leaves

작은 learning rate(정확도)

큰 num_boost_round(정확도)

boosting알고리즘으로 dart 사용(정확도)

## grid search

In [37]:
boosting = ['gbdt', 'dart', 'goss']
learning_rate = [0.01,0.05,0.1,0.5,1]
max_depth = [5, 10, 15]
num_leaves = [5, 10, 15]


In [41]:
num_of_case = len(boosting)*len(learning_rate)*len(max_depth)*len(num_leaves)
num_of_case

135

In [38]:
len(boosting)

3

In [49]:
MAE_grid_search = pd.DataFrame(np.zeros((num_of_case,6)),columns = ['case number','boosting','Lr','max_depth','num_leaves','MAE'])

In [54]:
d_train = lgb.Dataset(train_x, train_y)
d_val = lgb.Dataset(val_x, val_y)
cnt = 0
for i in range (len(boosting)):
    boosting_par = boosting[i]
    
    for j in range(len(learning_rate)):
        Lr_par = learning_rate[j]
        
        for k in range(len(max_depth)):
            max_depth_par = max_depth[k]
            
            for l in range(len(num_leaves)):
                num_leaves_par = num_leaves[l]
                
                print('-'*60)
                print('Case : ',cnt+1)
                print('boosting parameter : ',boosting_par)
                print('Learning_rate : ',Lr_par)
                print('max_depth : ',max_depth_par)
                print('num_leaves : ',num_leaves_par)

                params = {
                    'objective': 'regression',
                    'metric':'mae',
                    'seed':42 ,
                    'boosting' : boosting_par,
                    'learning_rate' : Lr_par,
                    'max_depth' : max_depth_par,
                    'num_leaves' : num_leaves_par
                    
                }

                model = lgb.train(params, d_train, 1000, d_val, verbose_eval=20, early_stopping_rounds=10)
#                 model.best_score['valid_0']['l1']
                
                MAE_grid_search.iloc[cnt,0] = int(cnt+1)
                MAE_grid_search.iloc[cnt,1] = boosting_par
                MAE_grid_search.iloc[cnt,2] = Lr_par
                MAE_grid_search.iloc[cnt,3] = max_depth_par
                MAE_grid_search.iloc[cnt,4] = num_leaves_par
                MAE_grid_search.iloc[cnt,5] = model.best_score['valid_0']['l1']
                
                cnt = cnt+1


------------------------------------------------------------
Case :  1
boosting parameter :  gbdt
Learning_rate :  0.001
max_depth :  5
num_leaves :  5
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84
[LightGBM] [Info] Number of data points in the train set: 306768, number of used features: 5
[LightGBM] [Info] Start training from score 934.864036
Training until validation scores don't improve for 10 rounds
[20]	valid_0's l1: 751.326
[40]	valid_0's l1: 745.362
[60]	valid_0's l1: 739.581
[80]	valid_0's l1: 734.444
[100]	valid_0's l1: 729.391
[120]	valid_0's l1: 724.519
[140]	valid_0's l1: 719.732
[160]	valid_0's l1: 714.687
[180]	valid_0's l1: 709.458
[200]	valid_0's l1: 704.244
[220]	valid_0's l1: 699.27
[240]	valid_0's l1: 694.356
[260]	valid_0's l1: 689.449
[280]	valid_0's l1: 684.728
[300]	valid_0's l1: 680.1
[320]	valid_0's l1: 675.481
[340]	valid_0's l1: 671.038
[360]	valid_0's 

[980]	valid_0's l1: 540.34
[1000]	valid_0's l1: 537.057
Did not meet early stopping. Best iteration is:
[1000]	valid_0's l1: 537.057
------------------------------------------------------------
Case :  5
boosting parameter :  gbdt
Learning_rate :  0.001
max_depth :  10
num_leaves :  10
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84
[LightGBM] [Info] Number of data points in the train set: 306768, number of used features: 5
[LightGBM] [Info] Start training from score 934.864036
Training until validation scores don't improve for 10 rounds
[20]	valid_0's l1: 748.114
[40]	valid_0's l1: 738.909
[60]	valid_0's l1: 729.908
[80]	valid_0's l1: 721.932
[100]	valid_0's l1: 714.161
[120]	valid_0's l1: 706.591
[140]	valid_0's l1: 699.022
[160]	valid_0's l1: 690.881
[180]	valid_0's l1: 682.911
[200]	valid_0's l1: 675.093
[220]	valid_0's l1: 667.432
[240]	valid_0's l1: 659.923
[260]	valid_0's l1

[920]	valid_0's l1: 474.073
[940]	valid_0's l1: 470.182
[960]	valid_0's l1: 466.388
[980]	valid_0's l1: 462.619
[1000]	valid_0's l1: 458.785
Did not meet early stopping. Best iteration is:
[1000]	valid_0's l1: 458.785
------------------------------------------------------------
Case :  9
boosting parameter :  gbdt
Learning_rate :  0.001
max_depth :  15
num_leaves :  15
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84
[LightGBM] [Info] Number of data points in the train set: 306768, number of used features: 5
[LightGBM] [Info] Start training from score 934.864036
Training until validation scores don't improve for 10 rounds
[20]	valid_0's l1: 745.512
[40]	valid_0's l1: 733.872
[60]	valid_0's l1: 722.543
[80]	valid_0's l1: 711.519
[100]	valid_0's l1: 700.63
[120]	valid_0's l1: 690.205
[140]	valid_0's l1: 680.236
[160]	valid_0's l1: 670.509
[180]	valid_0's l1: 661.177
[200]	valid_0's l1

[880]	valid_0's l1: 199.121
[900]	valid_0's l1: 197.254
[920]	valid_0's l1: 195.537
[940]	valid_0's l1: 193.874
[960]	valid_0's l1: 192.234
[980]	valid_0's l1: 190.747
[1000]	valid_0's l1: 189.321
Did not meet early stopping. Best iteration is:
[1000]	valid_0's l1: 189.321
------------------------------------------------------------
Case :  13
boosting parameter :  gbdt
Learning_rate :  0.005
max_depth :  10
num_leaves :  5
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84
[LightGBM] [Info] Number of data points in the train set: 306768, number of used features: 5
[LightGBM] [Info] Start training from score 934.864036
Training until validation scores don't improve for 10 rounds
[20]	valid_0's l1: 729.494
[40]	valid_0's l1: 704.273
[60]	valid_0's l1: 680.133
[80]	valid_0's l1: 658.37
[100]	valid_0's l1: 635.189
[120]	valid_0's l1: 613.041
[140]	valid_0's l1: 592.175
[160]	valid_0's l1

[780]	valid_0's l1: 291.34
[800]	valid_0's l1: 288.098
[820]	valid_0's l1: 285.145
[840]	valid_0's l1: 282.127
[860]	valid_0's l1: 279.299
[880]	valid_0's l1: 276.533
[900]	valid_0's l1: 273.804
[920]	valid_0's l1: 271.202
[940]	valid_0's l1: 268.952
[960]	valid_0's l1: 266.856
[980]	valid_0's l1: 264.814
[1000]	valid_0's l1: 262.899
Did not meet early stopping. Best iteration is:
[1000]	valid_0's l1: 262.899
------------------------------------------------------------
Case :  17
boosting parameter :  gbdt
Learning_rate :  0.005
max_depth :  15
num_leaves :  10
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84
[LightGBM] [Info] Number of data points in the train set: 306768, number of used features: 5
[LightGBM] [Info] Start training from score 934.864036
Training until validation scores don't improve for 10 rounds
[20]	valid_0's l1: 713.902
[40]	valid_0's l1: 675.104
[60]	valid_0's 

[720]	valid_0's l1: 184.615
[740]	valid_0's l1: 183.659
[760]	valid_0's l1: 182.727
[780]	valid_0's l1: 181.833
[800]	valid_0's l1: 181.097
[820]	valid_0's l1: 180.419
[840]	valid_0's l1: 179.749
[860]	valid_0's l1: 179.174
[880]	valid_0's l1: 178.598
[900]	valid_0's l1: 178.079
[920]	valid_0's l1: 177.655
[940]	valid_0's l1: 177.251
[960]	valid_0's l1: 176.889
[980]	valid_0's l1: 176.529
[1000]	valid_0's l1: 176.141
Did not meet early stopping. Best iteration is:
[1000]	valid_0's l1: 176.141
------------------------------------------------------------
Case :  21
boosting parameter :  gbdt
Learning_rate :  0.01
max_depth :  5
num_leaves :  15
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84
[LightGBM] [Info] Number of data points in the train set: 306768, number of used features: 5
[LightGBM] [Info] Start training from score 934.864036
Training until validation scores don't improve 

[600]	valid_0's l1: 173.944
[620]	valid_0's l1: 172.717
[640]	valid_0's l1: 171.481
[660]	valid_0's l1: 170.18
[680]	valid_0's l1: 169.235
[700]	valid_0's l1: 168.286
[720]	valid_0's l1: 167.478
[740]	valid_0's l1: 166.721
[760]	valid_0's l1: 166.076
[780]	valid_0's l1: 165.438
[800]	valid_0's l1: 164.868
[820]	valid_0's l1: 164.288
[840]	valid_0's l1: 163.679
[860]	valid_0's l1: 163.13
[880]	valid_0's l1: 162.708
[900]	valid_0's l1: 162.294
[920]	valid_0's l1: 161.909
[940]	valid_0's l1: 161.513
[960]	valid_0's l1: 161.192
[980]	valid_0's l1: 160.908
[1000]	valid_0's l1: 160.556
Did not meet early stopping. Best iteration is:
[1000]	valid_0's l1: 160.556
------------------------------------------------------------
Case :  25
boosting parameter :  gbdt
Learning_rate :  0.01
max_depth :  15
num_leaves :  5
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84
[LightGBM] [Info] Number of d

[560]	valid_0's l1: 189.601
[580]	valid_0's l1: 188.803
[600]	valid_0's l1: 188.227
[620]	valid_0's l1: 187.711
[640]	valid_0's l1: 187.207
[660]	valid_0's l1: 185.478
[680]	valid_0's l1: 182.707
[700]	valid_0's l1: 181.757
[720]	valid_0's l1: 180.426
[740]	valid_0's l1: 178.482
[760]	valid_0's l1: 176.657
[780]	valid_0's l1: 175.39
[800]	valid_0's l1: 174.487
[820]	valid_0's l1: 173.94
[840]	valid_0's l1: 173.612
[860]	valid_0's l1: 172.549
[880]	valid_0's l1: 172.096
[900]	valid_0's l1: 171.703
[920]	valid_0's l1: 171.567
[940]	valid_0's l1: 171.309
[960]	valid_0's l1: 170.826
[980]	valid_0's l1: 170.52
[1000]	valid_0's l1: 169.924
Did not meet early stopping. Best iteration is:
[1000]	valid_0's l1: 169.924
------------------------------------------------------------
Case :  29
boosting parameter :  gbdt
Learning_rate :  0.05
max_depth :  5
num_leaves :  10
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[Light

[40]	valid_0's l1: 288.135
[60]	valid_0's l1: 230.448
[80]	valid_0's l1: 198.226
[100]	valid_0's l1: 182.289
[120]	valid_0's l1: 172.962
[140]	valid_0's l1: 167.779
[160]	valid_0's l1: 164.415
[180]	valid_0's l1: 162.03
[200]	valid_0's l1: 160.262
[220]	valid_0's l1: 159.05
[240]	valid_0's l1: 157.901
[260]	valid_0's l1: 156.843
[280]	valid_0's l1: 156.156
[300]	valid_0's l1: 155.568
[320]	valid_0's l1: 155.032
[340]	valid_0's l1: 154.254
[360]	valid_0's l1: 153.793
[380]	valid_0's l1: 153.358
[400]	valid_0's l1: 152.858
[420]	valid_0's l1: 152.398
[440]	valid_0's l1: 151.94
Early stopping, best iteration is:
[447]	valid_0's l1: 151.818
------------------------------------------------------------
Case :  34
boosting parameter :  gbdt
Learning_rate :  0.05
max_depth :  15
num_leaves :  5
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84
[LightGBM] [Info] Number of data points in the t

[40]	valid_0's l1: 219.548
[60]	valid_0's l1: 190.071
[80]	valid_0's l1: 180.542
[100]	valid_0's l1: 175.6
[120]	valid_0's l1: 173.086
[140]	valid_0's l1: 170.541
[160]	valid_0's l1: 168.567
[180]	valid_0's l1: 166.321
[200]	valid_0's l1: 165.011
[220]	valid_0's l1: 163.66
[240]	valid_0's l1: 162.461
[260]	valid_0's l1: 161.274
[280]	valid_0's l1: 160.564
[300]	valid_0's l1: 159.685
[320]	valid_0's l1: 158.556
[340]	valid_0's l1: 157.82
[360]	valid_0's l1: 156.599
[380]	valid_0's l1: 156.106
[400]	valid_0's l1: 155.282
[420]	valid_0's l1: 154.661
[440]	valid_0's l1: 154.342
[460]	valid_0's l1: 153.943
Early stopping, best iteration is:
[469]	valid_0's l1: 153.69
------------------------------------------------------------
Case :  39
boosting parameter :  gbdt
Learning_rate :  0.1
max_depth :  5
num_leaves :  15
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84
[LightGBM] [Info] Numbe

[300]	valid_0's l1: 154.42
[320]	valid_0's l1: 153.33
[340]	valid_0's l1: 152.827
[360]	valid_0's l1: 152.047
[380]	valid_0's l1: 151.294
[400]	valid_0's l1: 150.985
[420]	valid_0's l1: 150.538
[440]	valid_0's l1: 150.064
Early stopping, best iteration is:
[438]	valid_0's l1: 150.044
------------------------------------------------------------
Case :  45
boosting parameter :  gbdt
Learning_rate :  0.1
max_depth :  15
num_leaves :  15
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84
[LightGBM] [Info] Number of data points in the train set: 306768, number of used features: 5
[LightGBM] [Info] Start training from score 934.864036
Training until validation scores don't improve for 10 rounds
[20]	valid_0's l1: 285.418
[40]	valid_0's l1: 196.786
[60]	valid_0's l1: 172.421
[80]	valid_0's l1: 163.956
[100]	valid_0's l1: 159.696
[120]	valid_0's l1: 157.97
[140]	valid_0's l1: 156.496
[160]	va

[320]	valid_0's l1: 805.006
[340]	valid_0's l1: 807.802
[360]	valid_0's l1: 812.336
[380]	valid_0's l1: 811.681
[400]	valid_0's l1: 813.461
[420]	valid_0's l1: 811.306
[440]	valid_0's l1: 815.226
[460]	valid_0's l1: 813.992
[480]	valid_0's l1: 811.682
[500]	valid_0's l1: 813.416
[520]	valid_0's l1: 818.109
[540]	valid_0's l1: 815.726
[560]	valid_0's l1: 812.88
[580]	valid_0's l1: 814.716
[600]	valid_0's l1: 813.314
[620]	valid_0's l1: 808.478
[640]	valid_0's l1: 810.034
[660]	valid_0's l1: 808.334
[680]	valid_0's l1: 805.867
[700]	valid_0's l1: 804.419
[720]	valid_0's l1: 800.696
[740]	valid_0's l1: 798.442
[760]	valid_0's l1: 795.494
[780]	valid_0's l1: 793.95
[800]	valid_0's l1: 792.319
[820]	valid_0's l1: 787.472
[840]	valid_0's l1: 784.755
[860]	valid_0's l1: 783.8
[880]	valid_0's l1: 780.462
[900]	valid_0's l1: 778.76
[920]	valid_0's l1: 780.188
[940]	valid_0's l1: 778.3
[960]	valid_0's l1: 779.545
[980]	valid_0's l1: 776.11
[1000]	valid_0's l1: 773.893
---------------------------

[600]	valid_0's l1: 800.961
[620]	valid_0's l1: 794.642
[640]	valid_0's l1: 796.001
[660]	valid_0's l1: 793.773
[680]	valid_0's l1: 790.759
[700]	valid_0's l1: 788.754
[720]	valid_0's l1: 784.168
[740]	valid_0's l1: 781.372
[760]	valid_0's l1: 777.74
[780]	valid_0's l1: 775.688
[800]	valid_0's l1: 773.476
[820]	valid_0's l1: 767.504
[840]	valid_0's l1: 763.975
[860]	valid_0's l1: 762.751
[880]	valid_0's l1: 758.446
[900]	valid_0's l1: 756.473
[920]	valid_0's l1: 758.064
[940]	valid_0's l1: 755.747
[960]	valid_0's l1: 756.982
[980]	valid_0's l1: 752.604
[1000]	valid_0's l1: 749.855
------------------------------------------------------------
Case :  54
boosting parameter :  dart
Learning_rate :  0.001
max_depth :  15
num_leaves :  15
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84
[LightGBM] [Info] Number of data points in the train set: 306768, number of used features: 5
[LightGBM]

[880]	valid_0's l1: 427.6
[900]	valid_0's l1: 425.276
[920]	valid_0's l1: 430.062
[940]	valid_0's l1: 426.746
[960]	valid_0's l1: 428.083
[980]	valid_0's l1: 416.998
[1000]	valid_0's l1: 412.588
------------------------------------------------------------
Case :  58
boosting parameter :  dart
Learning_rate :  0.005
max_depth :  10
num_leaves :  5
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84
[LightGBM] [Info] Number of data points in the train set: 306768, number of used features: 5
[LightGBM] [Info] Start training from score 934.864036
[20]	valid_0's l1: 737.838
[40]	valid_0's l1: 722.256
[60]	valid_0's l1: 713.871
[80]	valid_0's l1: 736.821
[100]	valid_0's l1: 726.887
[120]	valid_0's l1: 737.93
[140]	valid_0's l1: 748.821
[160]	valid_0's l1: 742.808
[180]	valid_0's l1: 741.999
[200]	valid_0's l1: 747.905
[220]	valid_0's l1: 744.576
[240]	valid_0's l1: 732.045
[260]	valid_0's l1

[40]	valid_0's l1: 707.365
[60]	valid_0's l1: 695.364
[80]	valid_0's l1: 717.332
[100]	valid_0's l1: 704.815
[120]	valid_0's l1: 716.741
[140]	valid_0's l1: 726.717
[160]	valid_0's l1: 719.568
[180]	valid_0's l1: 715.92
[200]	valid_0's l1: 722.261
[220]	valid_0's l1: 716.464
[240]	valid_0's l1: 701.217
[260]	valid_0's l1: 690.421
[280]	valid_0's l1: 683.028
[300]	valid_0's l1: 675.176
[320]	valid_0's l1: 660.071
[340]	valid_0's l1: 662.064
[360]	valid_0's l1: 665.029
[380]	valid_0's l1: 653.513
[400]	valid_0's l1: 654.079
[420]	valid_0's l1: 644.69
[440]	valid_0's l1: 647.215
[460]	valid_0's l1: 642.238
[480]	valid_0's l1: 624.284
[500]	valid_0's l1: 619.241
[520]	valid_0's l1: 622.99
[540]	valid_0's l1: 613.696
[560]	valid_0's l1: 602.654
[580]	valid_0's l1: 606.185
[600]	valid_0's l1: 596.127
[620]	valid_0's l1: 577.548
[640]	valid_0's l1: 570.907
[660]	valid_0's l1: 561.526
[680]	valid_0's l1: 553.799
[700]	valid_0's l1: 545.649
[720]	valid_0's l1: 530.678
[740]	valid_0's l1: 524.27

[320]	valid_0's l1: 494.775
[340]	valid_0's l1: 498.274
[360]	valid_0's l1: 502.672
[380]	valid_0's l1: 484.057
[400]	valid_0's l1: 485.663
[420]	valid_0's l1: 473.084
[440]	valid_0's l1: 476.939
[460]	valid_0's l1: 470.4
[480]	valid_0's l1: 444.392
[500]	valid_0's l1: 438.119
[520]	valid_0's l1: 443.711
[540]	valid_0's l1: 432.615
[560]	valid_0's l1: 418.677
[580]	valid_0's l1: 424.402
[600]	valid_0's l1: 412.146
[620]	valid_0's l1: 390.569
[640]	valid_0's l1: 382.518
[660]	valid_0's l1: 373.341
[680]	valid_0's l1: 366.534
[700]	valid_0's l1: 359.271
[720]	valid_0's l1: 345.392
[740]	valid_0's l1: 341.506
[760]	valid_0's l1: 335.101
[780]	valid_0's l1: 330.18
[800]	valid_0's l1: 325.266
[820]	valid_0's l1: 315.417
[840]	valid_0's l1: 308.028
[860]	valid_0's l1: 306.497
[880]	valid_0's l1: 301.305
[900]	valid_0's l1: 300.621
[920]	valid_0's l1: 305.846
[940]	valid_0's l1: 303.403
[960]	valid_0's l1: 305.474
[980]	valid_0's l1: 296.261
[1000]	valid_0's l1: 294.75
-----------------------

[600]	valid_0's l1: 507.74
[620]	valid_0's l1: 487.916
[640]	valid_0's l1: 479.411
[660]	valid_0's l1: 470.287
[680]	valid_0's l1: 464.418
[700]	valid_0's l1: 455.406
[720]	valid_0's l1: 440.488
[740]	valid_0's l1: 436.602
[760]	valid_0's l1: 429.782
[780]	valid_0's l1: 423.798
[800]	valid_0's l1: 417.781
[820]	valid_0's l1: 406.223
[840]	valid_0's l1: 398.381
[860]	valid_0's l1: 395.081
[880]	valid_0's l1: 388.869
[900]	valid_0's l1: 387.387
[920]	valid_0's l1: 392.145
[940]	valid_0's l1: 390.672
[960]	valid_0's l1: 390.53
[980]	valid_0's l1: 381.206
[1000]	valid_0's l1: 377.445
------------------------------------------------------------
Case :  71
boosting parameter :  dart
Learning_rate :  0.01
max_depth :  15
num_leaves :  10
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84
[LightGBM] [Info] Number of data points in the train set: 306768, number of used features: 5
[LightGBM] [

[880]	valid_0's l1: 179.099
[900]	valid_0's l1: 180.754
[920]	valid_0's l1: 184.983
[940]	valid_0's l1: 183.597
[960]	valid_0's l1: 184.978
[980]	valid_0's l1: 179.235
[1000]	valid_0's l1: 179.021
------------------------------------------------------------
Case :  75
boosting parameter :  dart
Learning_rate :  0.05
max_depth :  5
num_leaves :  15
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84
[LightGBM] [Info] Number of data points in the train set: 306768, number of used features: 5
[LightGBM] [Info] Start training from score 934.864036
[20]	valid_0's l1: 486.407
[40]	valid_0's l1: 429.522
[60]	valid_0's l1: 373.913
[80]	valid_0's l1: 373.595
[100]	valid_0's l1: 337.011
[120]	valid_0's l1: 350.3
[140]	valid_0's l1: 349.016
[160]	valid_0's l1: 334.498
[180]	valid_0's l1: 305.656
[200]	valid_0's l1: 310.103
[220]	valid_0's l1: 293.423
[240]	valid_0's l1: 270.622
[260]	valid_0's l1

[40]	valid_0's l1: 531.815
[60]	valid_0's l1: 482.205
[80]	valid_0's l1: 475.156
[100]	valid_0's l1: 435.856
[120]	valid_0's l1: 448.867
[140]	valid_0's l1: 443.63
[160]	valid_0's l1: 427.928
[180]	valid_0's l1: 391.793
[200]	valid_0's l1: 390.306
[220]	valid_0's l1: 374.327
[240]	valid_0's l1: 351.998
[260]	valid_0's l1: 340.58
[280]	valid_0's l1: 324.566
[300]	valid_0's l1: 320.128
[320]	valid_0's l1: 305.648
[340]	valid_0's l1: 310.015
[360]	valid_0's l1: 313.329
[380]	valid_0's l1: 297.759
[400]	valid_0's l1: 300.904
[420]	valid_0's l1: 291.683
[440]	valid_0's l1: 294.518
[460]	valid_0's l1: 292.519
[480]	valid_0's l1: 274.746
[500]	valid_0's l1: 270.322
[520]	valid_0's l1: 275.611
[540]	valid_0's l1: 268.578
[560]	valid_0's l1: 261.116
[580]	valid_0's l1: 265.63
[600]	valid_0's l1: 258.234
[620]	valid_0's l1: 248.327
[640]	valid_0's l1: 244.163
[660]	valid_0's l1: 240.954
[680]	valid_0's l1: 238.174
[700]	valid_0's l1: 234.655
[720]	valid_0's l1: 227.774
[740]	valid_0's l1: 228.12

[320]	valid_0's l1: 192.018
[340]	valid_0's l1: 197.243
[360]	valid_0's l1: 200.038
[380]	valid_0's l1: 190.712
[400]	valid_0's l1: 193.552
[420]	valid_0's l1: 188.21
[440]	valid_0's l1: 191.266
[460]	valid_0's l1: 189.551
[480]	valid_0's l1: 179.019
[500]	valid_0's l1: 179.802
[520]	valid_0's l1: 186.426
[540]	valid_0's l1: 179.993
[560]	valid_0's l1: 176.571
[580]	valid_0's l1: 181.213
[600]	valid_0's l1: 176.174
[620]	valid_0's l1: 171.128
[640]	valid_0's l1: 170.645
[660]	valid_0's l1: 169.859
[680]	valid_0's l1: 169.949
[700]	valid_0's l1: 169.059
[720]	valid_0's l1: 165.372
[740]	valid_0's l1: 166.386
[760]	valid_0's l1: 165.866
[780]	valid_0's l1: 165.69
[800]	valid_0's l1: 164.628
[820]	valid_0's l1: 162.694
[840]	valid_0's l1: 162.553
[860]	valid_0's l1: 163.703
[880]	valid_0's l1: 161.718
[900]	valid_0's l1: 162.44
[920]	valid_0's l1: 165.975
[940]	valid_0's l1: 164.203
[960]	valid_0's l1: 165.085
[980]	valid_0's l1: 161.493
[1000]	valid_0's l1: 162.143
----------------------

[600]	valid_0's l1: 166.308
[620]	valid_0's l1: 161.069
[640]	valid_0's l1: 160.769
[660]	valid_0's l1: 160.099
[680]	valid_0's l1: 160.188
[700]	valid_0's l1: 159.04
[720]	valid_0's l1: 155.969
[740]	valid_0's l1: 157.521
[760]	valid_0's l1: 156.698
[780]	valid_0's l1: 157.099
[800]	valid_0's l1: 156.193
[820]	valid_0's l1: 154.037
[840]	valid_0's l1: 153.732
[860]	valid_0's l1: 154.944
[880]	valid_0's l1: 153.382
[900]	valid_0's l1: 154.326
[920]	valid_0's l1: 157.703
[940]	valid_0's l1: 155.95
[960]	valid_0's l1: 157.068
[980]	valid_0's l1: 153.282
[1000]	valid_0's l1: 153.633
------------------------------------------------------------
Case :  88
boosting parameter :  dart
Learning_rate :  0.1
max_depth :  15
num_leaves :  5
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84
[LightGBM] [Info] Number of data points in the train set: 306768, number of used features: 5
[LightGBM] [In

[820]	valid_0's l1: 568.381
[840]	valid_0's l1: 564.687
[860]	valid_0's l1: 561.045
[880]	valid_0's l1: 557.368
[900]	valid_0's l1: 553.735
[920]	valid_0's l1: 550.333
[940]	valid_0's l1: 546.936
[960]	valid_0's l1: 543.626
[980]	valid_0's l1: 540.34
[1000]	valid_0's l1: 537.057
Did not meet early stopping. Best iteration is:
[1000]	valid_0's l1: 537.057
------------------------------------------------------------
Case :  92
boosting parameter :  goss
Learning_rate :  0.001
max_depth :  5
num_leaves :  10
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84
[LightGBM] [Info] Number of data points in the train set: 306768, number of used features: 5
[LightGBM] [Info] Using GOSS
[LightGBM] [Info] Start training from score 934.864036
Training until validation scores don't improve for 10 rounds
[20]	valid_0's l1: 748.114
[40]	valid_0's l1: 738.909
[60]	valid_0's l1: 729.908
[80]	valid_0's l

[600]	valid_0's l1: 546.98
[620]	valid_0's l1: 541.688
[640]	valid_0's l1: 536.588
[660]	valid_0's l1: 531.538
[680]	valid_0's l1: 526.526
[700]	valid_0's l1: 521.712
[720]	valid_0's l1: 516.984
[740]	valid_0's l1: 512.344
[760]	valid_0's l1: 507.839
[780]	valid_0's l1: 503.365
[800]	valid_0's l1: 499.007
[820]	valid_0's l1: 494.625
[840]	valid_0's l1: 490.347
[860]	valid_0's l1: 486.188
[880]	valid_0's l1: 482.061
[900]	valid_0's l1: 478.017
[920]	valid_0's l1: 474.073
[940]	valid_0's l1: 470.182
[960]	valid_0's l1: 466.388
[980]	valid_0's l1: 462.619
[1000]	valid_0's l1: 458.785
Did not meet early stopping. Best iteration is:
[1000]	valid_0's l1: 458.785
------------------------------------------------------------
Case :  96
boosting parameter :  goss
Learning_rate :  0.001
max_depth :  10
num_leaves :  15
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84
[LightGBM] [Info] Number o

[400]	valid_0's l1: 568.666
[420]	valid_0's l1: 561.198
[440]	valid_0's l1: 553.776
[460]	valid_0's l1: 546.623
[480]	valid_0's l1: 539.674
[500]	valid_0's l1: 532.865
[520]	valid_0's l1: 526.267
[540]	valid_0's l1: 519.828
[560]	valid_0's l1: 513.545
[580]	valid_0's l1: 507.377
[600]	valid_0's l1: 501.583
[620]	valid_0's l1: 495.967
[640]	valid_0's l1: 490.519
[660]	valid_0's l1: 485.224
[680]	valid_0's l1: 480.036
[700]	valid_0's l1: 475.045
[720]	valid_0's l1: 470.085
[740]	valid_0's l1: 465.251
[760]	valid_0's l1: 460.495
[780]	valid_0's l1: 455.901
[800]	valid_0's l1: 451.318
[820]	valid_0's l1: 446.846
[840]	valid_0's l1: 442.448
[860]	valid_0's l1: 438.174
[880]	valid_0's l1: 433.965
[900]	valid_0's l1: 429.913
[920]	valid_0's l1: 426.032
[940]	valid_0's l1: 422.211
[960]	valid_0's l1: 418.439
[980]	valid_0's l1: 414.773
[1000]	valid_0's l1: 411.127
Did not meet early stopping. Best iteration is:
[1000]	valid_0's l1: 411.127
------------------------------------------------------

[240]	valid_0's l1: 506.196
[260]	valid_0's l1: 491.51
[280]	valid_0's l1: 477.78
[300]	valid_0's l1: 463.959
[320]	valid_0's l1: 451.614
[340]	valid_0's l1: 438.643
[360]	valid_0's l1: 427.429
[380]	valid_0's l1: 416.98
[400]	valid_0's l1: 406.822
[420]	valid_0's l1: 397.741
[440]	valid_0's l1: 388.793
[460]	valid_0's l1: 380.305
[480]	valid_0's l1: 372.776
[500]	valid_0's l1: 365.461
[520]	valid_0's l1: 358.053
[540]	valid_0's l1: 352.206
[560]	valid_0's l1: 345.506
[580]	valid_0's l1: 339.97
[600]	valid_0's l1: 333.94
[620]	valid_0's l1: 328.501
[640]	valid_0's l1: 322.43
[660]	valid_0's l1: 316.383
[680]	valid_0's l1: 311.836
[700]	valid_0's l1: 307.428
[720]	valid_0's l1: 302.531
[740]	valid_0's l1: 298.604
[760]	valid_0's l1: 294.142
[780]	valid_0's l1: 290.114
[800]	valid_0's l1: 286.648
[820]	valid_0's l1: 283.281
[840]	valid_0's l1: 280.429
[860]	valid_0's l1: 277.8
[880]	valid_0's l1: 274.986
[900]	valid_0's l1: 272.342
[920]	valid_0's l1: 269.751
[940]	valid_0's l1: 267.42
[

[40]	valid_0's l1: 675.104
[60]	valid_0's l1: 638.245
[80]	valid_0's l1: 604.996
[100]	valid_0's l1: 574.53
[120]	valid_0's l1: 546.548
[140]	valid_0's l1: 521.343
[160]	valid_0's l1: 498.572
[180]	valid_0's l1: 477.585
[200]	valid_0's l1: 458.57
[220]	valid_0's l1: 440.712
[240]	valid_0's l1: 424.502
[260]	valid_0's l1: 409.933
[280]	valid_0's l1: 395.873
[300]	valid_0's l1: 382.689
[320]	valid_0's l1: 370.087
[340]	valid_0's l1: 358.264
[360]	valid_0's l1: 347.847
[380]	valid_0's l1: 337.437
[400]	valid_0's l1: 328.364
[420]	valid_0's l1: 319.598
[440]	valid_0's l1: 311.216
[460]	valid_0's l1: 303.717
[480]	valid_0's l1: 296.687
[500]	valid_0's l1: 290.054
[520]	valid_0's l1: 283.763
[540]	valid_0's l1: 277.898
[560]	valid_0's l1: 272.309
[580]	valid_0's l1: 267.165
[600]	valid_0's l1: 262.325
[620]	valid_0's l1: 257.428
[640]	valid_0's l1: 253.105
[660]	valid_0's l1: 248.994
[680]	valid_0's l1: 244.969
[700]	valid_0's l1: 241.016
[720]	valid_0's l1: 237.495
[740]	valid_0's l1: 234.0

[60]	valid_0's l1: 503.912
[80]	valid_0's l1: 453.907
[100]	valid_0's l1: 413.897
[120]	valid_0's l1: 380.258
[140]	valid_0's l1: 353.203
[160]	valid_0's l1: 330.977
[180]	valid_0's l1: 313.19
[200]	valid_0's l1: 295.714
[220]	valid_0's l1: 280.651
[240]	valid_0's l1: 267.231
[260]	valid_0's l1: 255.934
[280]	valid_0's l1: 245.703
[300]	valid_0's l1: 236.836
[320]	valid_0's l1: 228.412
[340]	valid_0's l1: 221.467
[360]	valid_0's l1: 215.147
[380]	valid_0's l1: 208.943
[400]	valid_0's l1: 204.258
[420]	valid_0's l1: 199.448
[440]	valid_0's l1: 195.67
[460]	valid_0's l1: 192.134
[480]	valid_0's l1: 189.046
[500]	valid_0's l1: 186.265
[520]	valid_0's l1: 183.763
[540]	valid_0's l1: 181.628
[560]	valid_0's l1: 179.893
[580]	valid_0's l1: 178.204
[600]	valid_0's l1: 176.658
[620]	valid_0's l1: 175.508
[640]	valid_0's l1: 174.406
[660]	valid_0's l1: 173.447
[680]	valid_0's l1: 172.75
[700]	valid_0's l1: 171.768
[720]	valid_0's l1: 171.028
[740]	valid_0's l1: 170.212
[760]	valid_0's l1: 169.5

[20]	valid_0's l1: 704.189
[40]	valid_0's l1: 658.198
[60]	valid_0's l1: 612.808
[80]	valid_0's l1: 572.365
[100]	valid_0's l1: 536.343
[120]	valid_0's l1: 505.551
[140]	valid_0's l1: 475.763
[160]	valid_0's l1: 450.521
[180]	valid_0's l1: 426.605
[200]	valid_0's l1: 405.926
[220]	valid_0's l1: 387.525
[240]	valid_0's l1: 371.84
[260]	valid_0's l1: 357.482
[280]	valid_0's l1: 344.901
[300]	valid_0's l1: 333.513
[320]	valid_0's l1: 322.068
[340]	valid_0's l1: 311.104
[360]	valid_0's l1: 302.283
[380]	valid_0's l1: 293.804
[400]	valid_0's l1: 286.232
[420]	valid_0's l1: 280.056
[440]	valid_0's l1: 274.058
[460]	valid_0's l1: 269.197
[480]	valid_0's l1: 264.912
[500]	valid_0's l1: 260.71
[520]	valid_0's l1: 257.335
[540]	valid_0's l1: 253.986
[560]	valid_0's l1: 251.349
[580]	valid_0's l1: 248.266
[600]	valid_0's l1: 245.628
[620]	valid_0's l1: 243.047
[640]	valid_0's l1: 240.791
[660]	valid_0's l1: 238.773
[680]	valid_0's l1: 236.719
[700]	valid_0's l1: 235.22
[720]	valid_0's l1: 233.687

[40]	valid_0's l1: 323.898
[60]	valid_0's l1: 261.414
[80]	valid_0's l1: 224.637
[100]	valid_0's l1: 201.539
[120]	valid_0's l1: 189.398
[140]	valid_0's l1: 184.085
[160]	valid_0's l1: 180.18
[180]	valid_0's l1: 176.858
[200]	valid_0's l1: 175.129
[220]	valid_0's l1: 173.332
[240]	valid_0's l1: 172.379
[260]	valid_0's l1: 171.44
[280]	valid_0's l1: 170.436
[300]	valid_0's l1: 169.576
[320]	valid_0's l1: 168.535
[340]	valid_0's l1: 167.449
[360]	valid_0's l1: 166.475
[380]	valid_0's l1: 165.575
[400]	valid_0's l1: 164.853
[420]	valid_0's l1: 164.324
[440]	valid_0's l1: 163.372
[460]	valid_0's l1: 162.785
[480]	valid_0's l1: 162.12
[500]	valid_0's l1: 161.558
[520]	valid_0's l1: 160.934
[540]	valid_0's l1: 160.211
[560]	valid_0's l1: 159.814
[580]	valid_0's l1: 159.375
[600]	valid_0's l1: 159.14
[620]	valid_0's l1: 158.585
[640]	valid_0's l1: 158.147
[660]	valid_0's l1: 157.736
[680]	valid_0's l1: 157.306
[700]	valid_0's l1: 156.854
[720]	valid_0's l1: 156.58
[740]	valid_0's l1: 156.19
[

[460]	valid_0's l1: 150.199
[480]	valid_0's l1: 149.81
Early stopping, best iteration is:
[483]	valid_0's l1: 149.697
------------------------------------------------------------
Case :  124
boosting parameter :  goss
Learning_rate :  0.05
max_depth :  15
num_leaves :  5
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84
[LightGBM] [Info] Number of data points in the train set: 306768, number of used features: 5
[LightGBM] [Info] Using GOSS
[LightGBM] [Info] Start training from score 934.864036
Training until validation scores don't improve for 10 rounds
[20]	valid_0's l1: 529.954
[40]	valid_0's l1: 403.681
[60]	valid_0's l1: 329.266
[80]	valid_0's l1: 283.738
[100]	valid_0's l1: 258.515
[120]	valid_0's l1: 243.786
[140]	valid_0's l1: 234.575
[160]	valid_0's l1: 227.689
[180]	valid_0's l1: 223.175
[200]	valid_0's l1: 219.57
[220]	valid_0's l1: 217.308
[240]	valid_0's l1: 214.815
[260]

[380]	valid_0's l1: 156.029
[400]	valid_0's l1: 155.285
[420]	valid_0's l1: 154.848
[440]	valid_0's l1: 154.187
[460]	valid_0's l1: 153.796
[480]	valid_0's l1: 153.182
[500]	valid_0's l1: 152.912
[520]	valid_0's l1: 152.532
[540]	valid_0's l1: 151.99
[560]	valid_0's l1: 151.661
Early stopping, best iteration is:
[564]	valid_0's l1: 151.54
------------------------------------------------------------
Case :  129
boosting parameter :  goss
Learning_rate :  0.1
max_depth :  5
num_leaves :  15
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84
[LightGBM] [Info] Number of data points in the train set: 306768, number of used features: 5
[LightGBM] [Info] Using GOSS
[LightGBM] [Info] Start training from score 934.864036
Training until validation scores don't improve for 10 rounds
[20]	valid_0's l1: 289.767
[40]	valid_0's l1: 199.734
[60]	valid_0's l1: 175.061
[80]	valid_0's l1: 167.605
[100]	

[360]	valid_0's l1: 153.93
[380]	valid_0's l1: 153.092
[400]	valid_0's l1: 152.635
[420]	valid_0's l1: 152.167
Early stopping, best iteration is:
[417]	valid_0's l1: 152.071
------------------------------------------------------------
Case :  135
boosting parameter :  goss
Learning_rate :  0.1
max_depth :  15
num_leaves :  15
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84
[LightGBM] [Info] Number of data points in the train set: 306768, number of used features: 5
[LightGBM] [Info] Using GOSS
[LightGBM] [Info] Start training from score 934.864036
Training until validation scores don't improve for 10 rounds
[20]	valid_0's l1: 287.055
[40]	valid_0's l1: 197.334
[60]	valid_0's l1: 172.815
[80]	valid_0's l1: 165.205
[100]	valid_0's l1: 161.52
[120]	valid_0's l1: 159.04
[140]	valid_0's l1: 157.482
[160]	valid_0's l1: 155.946
[180]	valid_0's l1: 154.448
[200]	valid_0's l1: 153.185
[220]	

In [55]:
MAE_grid_search

Unnamed: 0,case number,boosting,Lr,max_depth,num_leaves,MAE
0,1.0,gbdt,0.001,5.0,5.0,537.056790
1,2.0,gbdt,0.001,5.0,10.0,458.751083
2,3.0,gbdt,0.001,5.0,15.0,414.755716
3,4.0,gbdt,0.001,10.0,5.0,537.056790
4,5.0,gbdt,0.001,10.0,10.0,458.785211
...,...,...,...,...,...,...
130,131.0,goss,0.100,10.0,10.0,152.071347
131,132.0,goss,0.100,10.0,15.0,150.491967
132,133.0,goss,0.100,15.0,5.0,164.729394
133,134.0,goss,0.100,15.0,10.0,152.071347


In [65]:
pd.set_option('display.max_rows',135)
MAE_grid_search_rank = MAE_grid_search.sort_values(MAE_grid_search.columns[-1],ascending = True)
MAE_grid_search_rank

Unnamed: 0,case number,boosting,Lr,max_depth,num_leaves,MAE
35,36.0,gbdt,0.05,15.0,15.0,147.704784
41,42.0,gbdt,0.1,10.0,15.0,148.149539
44,45.0,gbdt,0.1,15.0,15.0,149.402471
134,135.0,goss,0.1,15.0,15.0,149.605157
122,123.0,goss,0.05,10.0,15.0,149.696827
40,41.0,gbdt,0.1,10.0,10.0,150.044331
43,44.0,gbdt,0.1,15.0,10.0,150.044331
125,126.0,goss,0.05,15.0,15.0,150.175329
131,132.0,goss,0.1,10.0,15.0,150.491967
121,122.0,goss,0.05,10.0,10.0,151.468401


## 최고성능 재학습

In [67]:
d_train = lgb.Dataset(train_x, train_y)
d_val = lgb.Dataset(val_x, val_y)

params = {
    'objective': 'regression',
    'metric':'mae',
    'seed':42 ,
    'boosting' : MAE_grid_search_rank.iloc[0,1],
    'learning_rate' : MAE_grid_search_rank.iloc[0,2],
    'max_depth' : int(MAE_grid_search_rank.iloc[0,3]),
    'num_leaves' : int(MAE_grid_search_rank.iloc[0,4])
}

model = lgb.train(params, d_train, 3000, d_val, verbose_eval=20, early_stopping_rounds=10)


You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84
[LightGBM] [Info] Number of data points in the train set: 306768, number of used features: 5
[LightGBM] [Info] Start training from score 934.864036
Training until validation scores don't improve for 10 rounds
[20]	valid_0's l1: 403.537
[40]	valid_0's l1: 288.135
[60]	valid_0's l1: 230.448
[80]	valid_0's l1: 198.226
[100]	valid_0's l1: 182.289
[120]	valid_0's l1: 172.962
[140]	valid_0's l1: 167.782
[160]	valid_0's l1: 164.347
[180]	valid_0's l1: 161.96
[200]	valid_0's l1: 160.2
[220]	valid_0's l1: 159.153
[240]	valid_0's l1: 157.994
[260]	valid_0's l1: 157.21
[280]	valid_0's l1: 156.357
[300]	valid_0's l1: 155.635
[320]	valid_0's l1: 155.23
[340]	valid_0's l1: 154.689
[360]	valid_0's l1: 154.284
[380]	valid_0's l1: 153.733
[400]	valid_0's l1: 153.434
[420]	valid_0's l1: 153.021
[440]	valid_0's l1: 152.798
[460]	valid_0's l1: 152.477
[4

# 추론 및 결과 제출

In [68]:
test = pd.read_csv('data/test.csv')
submission = pd.read_csv('data/sample_submission.csv')

In [69]:
test.head()

Unnamed: 0,일자|시간|구분
0,2019-01-01 01 A
1,2019-01-01 02 A
2,2019-01-01 03 A
3,2019-01-01 04 A
4,2019-01-01 05 A


In [70]:
submission.head()

Unnamed: 0,일자|시간|구분,공급량
0,2019-01-01 01 A,0
1,2019-01-01 02 A,0
2,2019-01-01 03 A,0
3,2019-01-01 04 A,0
4,2019-01-01 05 A,0


In [71]:
test['일자'] = test['일자|시간|구분'].str.split(' ').str[0]
test['시간'] = test['일자|시간|구분'].str.split(' ').str[1].astype(int)
test['구분'] = test['일자|시간|구분'].str.split(' ').str[2]

In [72]:
test['일자'] = pd.to_datetime(test['일자'])
test['year'] = test['일자'].dt.year
test['month'] = test['일자'].dt.month
test['day'] = test['일자'].dt.day
test['weekday'] = test['일자'].dt.weekday

In [73]:
test['구분'] = test['구분'].map(d_map)

In [74]:
test_x = test[features]

In [75]:
test_x

Unnamed: 0,구분,month,day,weekday,시간
0,0,1,1,1,1
1,0,1,1,1,2
2,0,1,1,1,3
3,0,1,1,1,4
4,0,1,1,1,5
...,...,...,...,...,...
15115,6,3,31,6,20
15116,6,3,31,6,21
15117,6,3,31,6,22
15118,6,3,31,6,23


In [76]:
preds = model.predict(test_x)

In [77]:
submission['공급량'] = preds

In [78]:
submission

Unnamed: 0,일자|시간|구분,공급량
0,2019-01-01 01 A,2162.578630
1,2019-01-01 02 A,1972.627825
2,2019-01-01 03 A,1851.811462
3,2019-01-01 04 A,1897.705573
4,2019-01-01 05 A,2061.499260
...,...,...
15115,2019-03-31 20 H,393.660227
15116,2019-03-31 21 H,388.241693
15117,2019-03-31 22 H,373.396772
15118,2019-03-31 23 H,343.524613


In [80]:
submission.to_csv('./prediction results/grid_search_1.0.csv', index=False)