In [16]:
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import lightgbm as lgb
import pandas as pd
import numpy as np
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import optuna, joblib

In [17]:
sc = StandardScaler()

In [18]:
### create an objective function 
def objective(
    trial #this is the ONLY arguement is default and a must
    ):
    
    #get data
    df_train = pd.read_csv('train_data.csv')
    df_test = pd.read_csv('test_data.csv')
    df_train.columns = ['Aerosol', 
              'Blue', 
              'Green', 
              'Red', 
              'NIR', 
              'SWIR-1', 
              'SWIR-2',
              'Chl-a']
              
    df_test.columns = ['Aerosol', 
              'Blue', 
              'Green', 
              'Red', 
              'NIR', 
              'SWIR-1', 
              'SWIR-2',
              'Chl-a']
    
    #separate X and y
    # and compute NDVI
    X_train, y_train = df_train.iloc[:,:-1], df_train.iloc[:,-1]
    X_test, y_test = df_test.iloc[:,:-1], df_test.iloc[:,-1]
    
    #add NDVI as features
    X_train['NDVI'] = (X_train['NIR'] - X_train['Red']) / (X_train['NIR'] + X_train['Red']) 
    X_test['NDVI'] = (X_test['NIR'] - X_test['Red']) / (X_test['NIR'] + X_test['Red']) 
    
    X_train_sc = sc.fit_transform(X_train)
    X_test_sc = sc.transform(X_test)
    
    dtrain = lgb.Dataset(X_train_sc, label = y_train)
    dvalid = lgb.Dataset(X_test_sc, label = y_test)
    
    # in optuna, setting of parameters is as follows:
    '''
    dictionary = {
    hyperparameter_alias: trial.suggest_distribution(
                           'hyperparameter_alias',
                           ['distribution values]'
                           )
    }    
    
    distribution to choose from:
    
    uniform — float values
    loguniform — float values
    discrete_uniform — float values with intervals
    int — integer values
    categorical — categorical values from a list
    '''
    _EPS = 1e-12
    #example:
    params = {
        'task': 'train',
        'boosting_type': trial.suggest_categorical('boosting_type', ['gbdt', 'rf']),
        'metric': 'rmse',
        'objective': trial.suggest_categorical('objective', ['regression_l1', 'regression_l2']),
        'verbosity': 0,
        "seed": 42,
        "learning_rate": trial.suggest_loguniform('learning_rate', 0.05, 1),
        'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
        'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
        'num_leaves': trial.suggest_int('num_leaves', 2, 256),
        'feature_fraction': min(trial.suggest_float("feature_fraction", 0.4, 1.0 + 1e-12), 1.0),
        'bagging_fraction': min(trial.suggest_float("bagging_fraction", 0.4, 1.0 + 1e-12), 1.0),
        'bagging_freq': trial.suggest_int('bagging_freq', 1, 20),
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 100)
        }          
    
    pruning_callback = optuna.integration.LightGBMPruningCallback(trial, "rmse", 'eval')

    model = lgb.train(
            params, 
            dtrain, 
            valid_names = ['eval', 'train'], 
            valid_sets = [dvalid, dtrain],
            early_stopping_rounds = 500,
            num_boost_round  = 10000,
            )

    joblib.dump(model, 'OPTIMIZED_MODEL.sav') #save model for future use

    prediction = model.predict(X_test_sc) #apply to test
    
    rmse = np.sqrt(mean_squared_error(y_test, prediction))
    return rmse

In [28]:
#optimize hyper param
study = optuna.create_study(direction = 'minimize') # minimize for regression, maximize for classification
study.optimize(objective, n_trials = 30)

[32m[I 2021-03-03 17:19:07,368][0m A new study created in memory with name: no-name-8724e0bb-7629-4527-abef-0ca4ddafd628[0m


[1]	train's rmse: 2.16969	eval's rmse: 2.60823
Training until validation scores don't improve for 500 rounds
[2]	train's rmse: 2.16969	eval's rmse: 2.60823
[3]	train's rmse: 2.16969	eval's rmse: 2.60823
[4]	train's rmse: 2.16969	eval's rmse: 2.60823
[5]	train's rmse: 2.07459	eval's rmse: 2.48474
[6]	train's rmse: 2.04167	eval's rmse: 2.42885
[7]	train's rmse: 2.03397	eval's rmse: 2.40292
[8]	train's rmse: 2.03698	eval's rmse: 2.39139
[9]	train's rmse: 1.98904	eval's rmse: 2.33033
[10]	train's rmse: 1.96283	eval's rmse: 2.29392
[11]	train's rmse: 1.94976	eval's rmse: 2.27276
[12]	train's rmse: 1.94474	eval's rmse: 2.26124
[13]	train's rmse: 1.92544	eval's rmse: 2.24435
[14]	train's rmse: 1.91393	eval's rmse: 2.23538
[15]	train's rmse: 1.90779	eval's rmse: 2.23175
[16]	train's rmse: 1.90536	eval's rmse: 2.23176
[17]	train's rmse: 1.89952	eval's rmse: 2.22582
[18]	train's rmse: 1.89696	eval's rmse: 2.22302
[19]	train's rmse: 1.89678	eval's rmse: 2.2225
[20]	train's rmse: 1.89832	eval's rm

[32m[I 2021-03-03 17:19:09,921][0m Trial 0 finished with value: 2.156299000455626 and parameters: {'boosting_type': 'rf', 'objective': 'regression_l2', 'learning_rate': 0.7276693042893765, 'lambda_l1': 3.3529176656274756e-05, 'lambda_l2': 0.0011762597260586286, 'num_leaves': 108, 'feature_fraction': 0.9657941456050193, 'bagging_fraction': 0.743053336476548, 'bagging_freq': 4, 'min_child_samples': 13}. Best is trial 0 with value: 2.156299000455626.[0m



[521]	train's rmse: 1.87379	eval's rmse: 2.18972
[522]	train's rmse: 1.87395	eval's rmse: 2.19023
[523]	train's rmse: 1.87412	eval's rmse: 2.19074
[524]	train's rmse: 1.87429	eval's rmse: 2.19125
[525]	train's rmse: 1.87447	eval's rmse: 2.19128
[526]	train's rmse: 1.87466	eval's rmse: 2.19131
[527]	train's rmse: 1.87484	eval's rmse: 2.19135
[528]	train's rmse: 1.87503	eval's rmse: 2.19139
[529]	train's rmse: 1.87497	eval's rmse: 2.19139
Early stopping, best iteration is:
[29]	train's rmse: 1.87072	eval's rmse: 2.1563
[1]	train's rmse: 4.05475	eval's rmse: 3.97313
Training until validation scores don't improve for 500 rounds
[2]	train's rmse: 3.56808	eval's rmse: 3.52964
[3]	train's rmse: 3.18426	eval's rmse: 3.19049
[4]	train's rmse: 2.83096	eval's rmse: 2.8829
[5]	train's rmse: 2.58138	eval's rmse: 2.668
[6]	train's rmse: 2.39737	eval's rmse: 2.5227
[7]	train's rmse: 2.21509	eval's rmse: 2.38765
[8]	train's rmse: 2.07694	eval's rmse: 2.29277
[9]	train's rmse: 1.95879	eval's rmse: 2.2

[32m[I 2021-03-03 17:19:12,155][0m Trial 1 finished with value: 1.9947815643578437 and parameters: {'boosting_type': 'gbdt', 'objective': 'regression_l2', 'learning_rate': 0.17568147026536265, 'lambda_l1': 5.3812219108034475e-06, 'lambda_l2': 4.663884306269143e-08, 'num_leaves': 50, 'feature_fraction': 0.5088299735497699, 'bagging_fraction': 0.7456699147282794, 'bagging_freq': 2, 'min_child_samples': 15}. Best is trial 1 with value: 1.9947815643578437.[0m



[549]	train's rmse: 0.104965	eval's rmse: 2.04778
[550]	train's rmse: 0.104848	eval's rmse: 2.0475
[551]	train's rmse: 0.104647	eval's rmse: 2.04776
[552]	train's rmse: 0.104588	eval's rmse: 2.0479
[553]	train's rmse: 0.104439	eval's rmse: 2.04816
[554]	train's rmse: 0.104354	eval's rmse: 2.04814
[555]	train's rmse: 0.104324	eval's rmse: 2.04755
[556]	train's rmse: 0.104428	eval's rmse: 2.04732
[557]	train's rmse: 0.104111	eval's rmse: 2.04743
[558]	train's rmse: 0.103887	eval's rmse: 2.04755
[559]	train's rmse: 0.103636	eval's rmse: 2.04763
[560]	train's rmse: 0.103564	eval's rmse: 2.04777
[561]	train's rmse: 0.103447	eval's rmse: 2.04787
[562]	train's rmse: 0.103387	eval's rmse: 2.04789
[563]	train's rmse: 0.103191	eval's rmse: 2.0479
[564]	train's rmse: 0.103033	eval's rmse: 2.04772
[565]	train's rmse: 0.102918	eval's rmse: 2.04776
[566]	train's rmse: 0.102849	eval's rmse: 2.04775
[567]	train's rmse: 0.102571	eval's rmse: 2.04783
[568]	train's rmse: 0.102336	eval's rmse: 2.04797
[5

[32m[I 2021-03-03 17:19:12,815][0m Trial 2 finished with value: 3.034844807238789 and parameters: {'boosting_type': 'rf', 'objective': 'regression_l2', 'learning_rate': 0.16518129757776293, 'lambda_l1': 9.692327750492452e-08, 'lambda_l2': 1.7908355442387087e-07, 'num_leaves': 111, 'feature_fraction': 0.9930634979545682, 'bagging_fraction': 0.7485595629355742, 'bagging_freq': 5, 'min_child_samples': 99}. Best is trial 1 with value: 1.9947815643578437.[0m



[583]	train's rmse: 3.00261	eval's rmse: 3.06788
[584]	train's rmse: 3.00261	eval's rmse: 3.06785
[585]	train's rmse: 3.00261	eval's rmse: 3.06783
Early stopping, best iteration is:
[85]	train's rmse: 2.9902	eval's rmse: 3.03484
[1]	train's rmse: 4.30937	eval's rmse: 4.15798
Training until validation scores don't improve for 500 rounds
[2]	train's rmse: 4.06495	eval's rmse: 3.92012
[3]	train's rmse: 3.81264	eval's rmse: 3.68015
[4]	train's rmse: 3.59758	eval's rmse: 3.46555
[5]	train's rmse: 3.40098	eval's rmse: 3.28529
[6]	train's rmse: 3.25175	eval's rmse: 3.13324
[7]	train's rmse: 3.12213	eval's rmse: 3.02954
[8]	train's rmse: 3.0237	eval's rmse: 2.92022
[9]	train's rmse: 2.954	eval's rmse: 2.86178
[10]	train's rmse: 2.89806	eval's rmse: 2.79078
[11]	train's rmse: 2.84048	eval's rmse: 2.74442
[12]	train's rmse: 2.78144	eval's rmse: 2.69771
[13]	train's rmse: 2.74312	eval's rmse: 2.66558
[14]	train's rmse: 2.70515	eval's rmse: 2.61901
[15]	train's rmse: 2.67674	eval's rmse: 2.59813


[32m[I 2021-03-03 17:19:16,716][0m Trial 3 finished with value: 2.0709616824595507 and parameters: {'boosting_type': 'gbdt', 'objective': 'regression_l1', 'learning_rate': 0.13621183929565922, 'lambda_l1': 2.5287268212656078e-05, 'lambda_l2': 0.6365404499347811, 'num_leaves': 64, 'feature_fraction': 0.6466846833505775, 'bagging_fraction': 0.24226833416495908, 'bagging_freq': 1, 'min_child_samples': 22}. Best is trial 1 with value: 1.9947815643578437.[0m


[1]	train's rmse: 2.43419	eval's rmse: 2.73003
Training until validation scores don't improve for 500 rounds
[2]	train's rmse: 2.21409	eval's rmse: 2.57908
[3]	train's rmse: 1.97813	eval's rmse: 2.37802
[4]	train's rmse: 1.98812	eval's rmse: 2.37878
[5]	train's rmse: 2.01736	eval's rmse: 2.39956
[6]	train's rmse: 1.98705	eval's rmse: 2.35894
[7]	train's rmse: 2.00785	eval's rmse: 2.36496
[8]	train's rmse: 1.94511	eval's rmse: 2.31081
[9]	train's rmse: 1.90711	eval's rmse: 2.26388
[10]	train's rmse: 1.91569	eval's rmse: 2.26251
[11]	train's rmse: 1.91806	eval's rmse: 2.25609
[12]	train's rmse: 1.9192	eval's rmse: 2.25427
[13]	train's rmse: 1.91744	eval's rmse: 2.25417
[14]	train's rmse: 1.92266	eval's rmse: 2.26174
[15]	train's rmse: 1.92033	eval's rmse: 2.25449
[16]	train's rmse: 1.90166	eval's rmse: 2.25064
[17]	train's rmse: 1.90901	eval's rmse: 2.26258
[18]	train's rmse: 1.91148	eval's rmse: 2.26484
[19]	train's rmse: 1.9137	eval's rmse: 2.26254
[20]	train's rmse: 1.92103	eval's rms

[32m[I 2021-03-03 17:19:17,945][0m Trial 4 finished with value: 2.25063667733383 and parameters: {'boosting_type': 'rf', 'objective': 'regression_l1', 'learning_rate': 0.2385918605698641, 'lambda_l1': 5.536683930397035e-07, 'lambda_l2': 2.090869114671507, 'num_leaves': 155, 'feature_fraction': 0.5325609432044951, 'bagging_fraction': 0.982744910502968, 'bagging_freq': 5, 'min_child_samples': 11}. Best is trial 1 with value: 1.9947815643578437.[0m



[499]	train's rmse: 1.95092	eval's rmse: 2.29057
[500]	train's rmse: 1.9499	eval's rmse: 2.28981
[501]	train's rmse: 1.95	eval's rmse: 2.28984
[502]	train's rmse: 1.9501	eval's rmse: 2.28986
[503]	train's rmse: 1.95021	eval's rmse: 2.28989
[504]	train's rmse: 1.95034	eval's rmse: 2.29018
[505]	train's rmse: 1.9505	eval's rmse: 2.29049
[506]	train's rmse: 1.95074	eval's rmse: 2.29057
[507]	train's rmse: 1.95065	eval's rmse: 2.29059
[508]	train's rmse: 1.95062	eval's rmse: 2.29061
[509]	train's rmse: 1.95057	eval's rmse: 2.29037
[510]	train's rmse: 1.95052	eval's rmse: 2.29013
[511]	train's rmse: 1.95073	eval's rmse: 2.29012
[512]	train's rmse: 1.95091	eval's rmse: 2.29023
[513]	train's rmse: 1.95	eval's rmse: 2.28908
[514]	train's rmse: 1.95002	eval's rmse: 2.28892
[515]	train's rmse: 1.95014	eval's rmse: 2.28877
[516]	train's rmse: 1.9504	eval's rmse: 2.28881
Early stopping, best iteration is:
[16]	train's rmse: 1.90166	eval's rmse: 2.25064
[1]	train's rmse: 3.23073	eval's rmse: 3.242

[32m[I 2021-03-03 17:19:18,390][0m Trial 5 finished with value: 3.0233064517613144 and parameters: {'boosting_type': 'rf', 'objective': 'regression_l2', 'learning_rate': 0.1708185883418954, 'lambda_l1': 6.019023282236183e-08, 'lambda_l2': 3.325814854134496e-08, 'num_leaves': 103, 'feature_fraction': 0.8202132787840883, 'bagging_fraction': 0.27803721017644956, 'bagging_freq': 3, 'min_child_samples': 58}. Best is trial 1 with value: 1.9947815643578437.[0m



[341]	train's rmse: 3.10596	eval's rmse: 3.03372
[342]	train's rmse: 3.10588	eval's rmse: 3.0333
[343]	train's rmse: 3.10615	eval's rmse: 3.03346
[344]	train's rmse: 3.10641	eval's rmse: 3.03361
[345]	train's rmse: 3.10667	eval's rmse: 3.03376
[346]	train's rmse: 3.10685	eval's rmse: 3.03396
[347]	train's rmse: 3.10703	eval's rmse: 3.03415
[348]	train's rmse: 3.10722	eval's rmse: 3.03435
[349]	train's rmse: 3.10734	eval's rmse: 3.0344
[350]	train's rmse: 3.10746	eval's rmse: 3.03445
[351]	train's rmse: 3.10717	eval's rmse: 3.03495
[352]	train's rmse: 3.10722	eval's rmse: 3.03488
[353]	train's rmse: 3.10726	eval's rmse: 3.03481
[354]	train's rmse: 3.1073	eval's rmse: 3.03474
[355]	train's rmse: 3.10748	eval's rmse: 3.0349
[356]	train's rmse: 3.10766	eval's rmse: 3.03505
[357]	train's rmse: 3.10784	eval's rmse: 3.03521
[358]	train's rmse: 3.1074	eval's rmse: 3.03542
[359]	train's rmse: 3.10725	eval's rmse: 3.03498
[360]	train's rmse: 3.10682	eval's rmse: 3.0352
[361]	train's rmse: 3.107

[32m[I 2021-03-03 17:19:19,372][0m Trial 6 finished with value: 3.0309048485581855 and parameters: {'boosting_type': 'rf', 'objective': 'regression_l2', 'learning_rate': 0.4830704665120263, 'lambda_l1': 1.3979057327742972, 'lambda_l2': 0.038985795187525304, 'num_leaves': 247, 'feature_fraction': 0.4754217938114377, 'bagging_fraction': 0.4287873573507357, 'bagging_freq': 1, 'min_child_samples': 91}. Best is trial 1 with value: 1.9947815643578437.[0m



[402]	train's rmse: 3.0834	eval's rmse: 3.05418
[403]	train's rmse: 3.08363	eval's rmse: 3.05442
[404]	train's rmse: 3.08361	eval's rmse: 3.05423
[405]	train's rmse: 3.08366	eval's rmse: 3.05414
[406]	train's rmse: 3.08389	eval's rmse: 3.05442
[407]	train's rmse: 3.08399	eval's rmse: 3.05436
[408]	train's rmse: 3.08397	eval's rmse: 3.05405
[409]	train's rmse: 3.08448	eval's rmse: 3.0548
[410]	train's rmse: 3.08456	eval's rmse: 3.0549
[411]	train's rmse: 3.08455	eval's rmse: 3.05496
[412]	train's rmse: 3.08451	eval's rmse: 3.05481
[413]	train's rmse: 3.08427	eval's rmse: 3.05492
[414]	train's rmse: 3.08435	eval's rmse: 3.05463
[415]	train's rmse: 3.08419	eval's rmse: 3.05484
[416]	train's rmse: 3.08421	eval's rmse: 3.05466
[417]	train's rmse: 3.08417	eval's rmse: 3.05457
[418]	train's rmse: 3.08421	eval's rmse: 3.0544
[419]	train's rmse: 3.0843	eval's rmse: 3.05474
[420]	train's rmse: 3.08441	eval's rmse: 3.05469
[421]	train's rmse: 3.08502	eval's rmse: 3.05514
[422]	train's rmse: 3.08

[32m[I 2021-03-03 17:19:21,853][0m Trial 7 finished with value: 2.88068616528071 and parameters: {'boosting_type': 'rf', 'objective': 'regression_l1', 'learning_rate': 0.20758696334344026, 'lambda_l1': 0.001418312474605732, 'lambda_l2': 0.0940050819707316, 'num_leaves': 217, 'feature_fraction': 0.832650453031106, 'bagging_fraction': 0.42138853084840167, 'bagging_freq': 5, 'min_child_samples': 38}. Best is trial 1 with value: 1.9947815643578437.[0m


[1]	train's rmse: 2.59392	eval's rmse: 2.6947
Training until validation scores don't improve for 500 rounds
[2]	train's rmse: 2.54303	eval's rmse: 2.66166
[3]	train's rmse: 2.37084	eval's rmse: 2.58374
[4]	train's rmse: 2.36487	eval's rmse: 2.55904
[5]	train's rmse: 2.38253	eval's rmse: 2.56507
[6]	train's rmse: 2.35145	eval's rmse: 2.50468
[7]	train's rmse: 2.31475	eval's rmse: 2.46143
[8]	train's rmse: 2.27226	eval's rmse: 2.43546
[9]	train's rmse: 2.25189	eval's rmse: 2.44692
[10]	train's rmse: 2.24009	eval's rmse: 2.43938
[11]	train's rmse: 2.23541	eval's rmse: 2.42143
[12]	train's rmse: 2.23733	eval's rmse: 2.43241
[13]	train's rmse: 2.23516	eval's rmse: 2.41956
[14]	train's rmse: 2.24109	eval's rmse: 2.41555
[15]	train's rmse: 2.23891	eval's rmse: 2.4201
[16]	train's rmse: 2.2239	eval's rmse: 2.4145
[17]	train's rmse: 2.21946	eval's rmse: 2.40869
[18]	train's rmse: 2.22065	eval's rmse: 2.40537
[19]	train's rmse: 2.21661	eval's rmse: 2.39167
[20]	train's rmse: 2.20857	eval's rmse:

[32m[I 2021-03-03 17:19:23,309][0m Trial 8 finished with value: 2.2740004949109305 and parameters: {'boosting_type': 'rf', 'objective': 'regression_l2', 'learning_rate': 0.5972367892590309, 'lambda_l1': 2.973847047434891e-05, 'lambda_l2': 0.02298662747651347, 'num_leaves': 219, 'feature_fraction': 0.5066765236569417, 'bagging_fraction': 0.4166975994658211, 'bagging_freq': 6, 'min_child_samples': 11}. Best is trial 1 with value: 1.9947815643578437.[0m



[548]	train's rmse: 2.18395	eval's rmse: 2.31682
[549]	train's rmse: 2.18405	eval's rmse: 2.31681
[550]	train's rmse: 2.18405	eval's rmse: 2.31681
[551]	train's rmse: 2.18412	eval's rmse: 2.31678
[552]	train's rmse: 2.18412	eval's rmse: 2.31684
[553]	train's rmse: 2.18425	eval's rmse: 2.31696
[554]	train's rmse: 2.18438	eval's rmse: 2.31707
[555]	train's rmse: 2.18449	eval's rmse: 2.31708
[556]	train's rmse: 2.18445	eval's rmse: 2.31715
[557]	train's rmse: 2.1844	eval's rmse: 2.31702
[558]	train's rmse: 2.18435	eval's rmse: 2.31678
[559]	train's rmse: 2.18438	eval's rmse: 2.31677
[560]	train's rmse: 2.18428	eval's rmse: 2.3165
[561]	train's rmse: 2.18431	eval's rmse: 2.3167
[562]	train's rmse: 2.18421	eval's rmse: 2.31651
[563]	train's rmse: 2.18412	eval's rmse: 2.31625
[564]	train's rmse: 2.18405	eval's rmse: 2.31618
[565]	train's rmse: 2.18402	eval's rmse: 2.31647
[566]	train's rmse: 2.1837	eval's rmse: 2.31671
[567]	train's rmse: 2.18357	eval's rmse: 2.31635
[568]	train's rmse: 2.1

[32m[I 2021-03-03 17:19:24,555][0m Trial 9 finished with value: 2.7347770001992417 and parameters: {'boosting_type': 'gbdt', 'objective': 'regression_l2', 'learning_rate': 0.297727769626863, 'lambda_l1': 9.176092400770591e-08, 'lambda_l2': 0.06905815522661087, 'num_leaves': 233, 'feature_fraction': 0.502560647585235, 'bagging_fraction': 0.1726614596645667, 'bagging_freq': 5, 'min_child_samples': 76}. Best is trial 1 with value: 1.9947815643578437.[0m


[1]	train's rmse: 4.52194	eval's rmse: 4.39742
Training until validation scores don't improve for 500 rounds
[2]	train's rmse: 4.39853	eval's rmse: 4.27572
[3]	train's rmse: 4.20502	eval's rmse: 4.08582
[4]	train's rmse: 4.09866	eval's rmse: 3.99713
[5]	train's rmse: 3.94474	eval's rmse: 3.84676
[6]	train's rmse: 3.79847	eval's rmse: 3.70721
[7]	train's rmse: 3.66668	eval's rmse: 3.57963
[8]	train's rmse: 3.5756	eval's rmse: 3.50057
[9]	train's rmse: 3.46588	eval's rmse: 3.39392
[10]	train's rmse: 3.41159	eval's rmse: 3.34144
[11]	train's rmse: 3.36098	eval's rmse: 3.30881
[12]	train's rmse: 3.28556	eval's rmse: 3.24143
[13]	train's rmse: 3.20174	eval's rmse: 3.15862
[14]	train's rmse: 3.13853	eval's rmse: 3.10251
[15]	train's rmse: 3.06508	eval's rmse: 3.04139
[16]	train's rmse: 3.02596	eval's rmse: 3.01011
[17]	train's rmse: 2.97782	eval's rmse: 2.97376
[18]	train's rmse: 2.94082	eval's rmse: 2.94968
[19]	train's rmse: 2.89401	eval's rmse: 2.91065
[20]	train's rmse: 2.8537	eval's rms

[32m[I 2021-03-03 17:19:26,953][0m Trial 10 finished with value: 2.060143982404156 and parameters: {'boosting_type': 'gbdt', 'objective': 'regression_l2', 'learning_rate': 0.07563805404387626, 'lambda_l1': 0.017368599902021695, 'lambda_l2': 2.4587565262923294e-06, 'num_leaves': 9, 'feature_fraction': 0.40181289806416975, 'bagging_fraction': 0.9670721026777411, 'bagging_freq': 2, 'min_child_samples': 36}. Best is trial 1 with value: 1.9947815643578437.[0m


[1]	train's rmse: 4.61124	eval's rmse: 4.48556
Training until validation scores don't improve for 500 rounds
[2]	train's rmse: 4.5489	eval's rmse: 4.42806
[3]	train's rmse: 4.43982	eval's rmse: 4.32242
[4]	train's rmse: 4.40523	eval's rmse: 4.29425
[5]	train's rmse: 4.3134	eval's rmse: 4.20989
[6]	train's rmse: 4.22222	eval's rmse: 4.12366
[7]	train's rmse: 4.1396	eval's rmse: 4.04266
[8]	train's rmse: 4.10257	eval's rmse: 4.0059
[9]	train's rmse: 4.02989	eval's rmse: 3.93565
[10]	train's rmse: 3.99745	eval's rmse: 3.90857
[11]	train's rmse: 3.97099	eval's rmse: 3.88894
[12]	train's rmse: 3.94062	eval's rmse: 3.86051
[13]	train's rmse: 3.87875	eval's rmse: 3.80353
[14]	train's rmse: 3.8297	eval's rmse: 3.75673
[15]	train's rmse: 3.77614	eval's rmse: 3.70682
[16]	train's rmse: 3.75198	eval's rmse: 3.68493
[17]	train's rmse: 3.72679	eval's rmse: 3.66388
[18]	train's rmse: 3.70614	eval's rmse: 3.64865
[19]	train's rmse: 3.66849	eval's rmse: 3.61004
[20]	train's rmse: 3.6345	eval's rmse: 3

[32m[I 2021-03-03 17:19:35,038][0m Trial 11 finished with value: 2.232113428293098 and parameters: {'boosting_type': 'gbdt', 'objective': 'regression_l2', 'learning_rate': 0.06009440696248616, 'lambda_l1': 0.051578654779332024, 'lambda_l2': 3.4115281200737103e-06, 'num_leaves': 3, 'feature_fraction': 0.4340786601021296, 'bagging_fraction': 0.8893891902223117, 'bagging_freq': 2, 'min_child_samples': 40}. Best is trial 1 with value: 1.9947815643578437.[0m


[1]	train's rmse: 4.48476	eval's rmse: 4.35577
Training until validation scores don't improve for 500 rounds
[2]	train's rmse: 4.35283	eval's rmse: 4.22885
[3]	train's rmse: 4.19232	eval's rmse: 4.06938
[4]	train's rmse: 4.03106	eval's rmse: 3.91465
[5]	train's rmse: 3.88308	eval's rmse: 3.76883
[6]	train's rmse: 3.74839	eval's rmse: 3.63713
[7]	train's rmse: 3.6522	eval's rmse: 3.54619
[8]	train's rmse: 3.54219	eval's rmse: 3.44271
[9]	train's rmse: 3.43311	eval's rmse: 3.34547
[10]	train's rmse: 3.33809	eval's rmse: 3.25892
[11]	train's rmse: 3.26235	eval's rmse: 3.1925
[12]	train's rmse: 3.1974	eval's rmse: 3.13497
[13]	train's rmse: 3.12412	eval's rmse: 3.06754
[14]	train's rmse: 3.05981	eval's rmse: 3.00915
[15]	train's rmse: 3.0057	eval's rmse: 2.95652
[16]	train's rmse: 2.9515	eval's rmse: 2.90602
[17]	train's rmse: 2.90584	eval's rmse: 2.86715
[18]	train's rmse: 2.8708	eval's rmse: 2.84027
[19]	train's rmse: 2.83095	eval's rmse: 2.80842
[20]	train's rmse: 2.79825	eval's rmse: 2

[32m[I 2021-03-03 17:19:38,022][0m Trial 12 finished with value: 2.034704351213985 and parameters: {'boosting_type': 'gbdt', 'objective': 'regression_l2', 'learning_rate': 0.07242905830447663, 'lambda_l1': 0.005837337611469361, 'lambda_l2': 6.382867774779913e-06, 'num_leaves': 8, 'feature_fraction': 0.6184253872094609, 'bagging_fraction': 0.763171726750777, 'bagging_freq': 2, 'min_child_samples': 30}. Best is trial 1 with value: 1.9947815643578437.[0m


[1]	train's rmse: 4.43245	eval's rmse: 4.313
Training until validation scores don't improve for 500 rounds
[2]	train's rmse: 4.24537	eval's rmse: 4.12733
[3]	train's rmse: 4.04791	eval's rmse: 3.94958
[4]	train's rmse: 3.85468	eval's rmse: 3.76732
[5]	train's rmse: 3.66588	eval's rmse: 3.58478
[6]	train's rmse: 3.515	eval's rmse: 3.43999
[7]	train's rmse: 3.36739	eval's rmse: 3.3142
[8]	train's rmse: 3.24732	eval's rmse: 3.20221
[9]	train's rmse: 3.13324	eval's rmse: 3.09896
[10]	train's rmse: 3.01722	eval's rmse: 2.99667
[11]	train's rmse: 2.91975	eval's rmse: 2.91465
[12]	train's rmse: 2.83534	eval's rmse: 2.84595
[13]	train's rmse: 2.75438	eval's rmse: 2.77736
[14]	train's rmse: 2.68595	eval's rmse: 2.72249
[15]	train's rmse: 2.62188	eval's rmse: 2.67382
[16]	train's rmse: 2.5673	eval's rmse: 2.62557
[17]	train's rmse: 2.51865	eval's rmse: 2.59484
[18]	train's rmse: 2.46848	eval's rmse: 2.55453
[19]	train's rmse: 2.42365	eval's rmse: 2.52043
[20]	train's rmse: 2.38708	eval's rmse: 2

[32m[I 2021-03-03 17:19:39,584][0m Trial 13 finished with value: 2.0160930964710486 and parameters: {'boosting_type': 'gbdt', 'objective': 'regression_l2', 'learning_rate': 0.08144965167215537, 'lambda_l1': 0.0010833687950110409, 'lambda_l2': 2.7274228650490113e-05, 'num_leaves': 41, 'feature_fraction': 0.6289872141850066, 'bagging_fraction': 0.6801531096222778, 'bagging_freq': 3, 'min_child_samples': 30}. Best is trial 1 with value: 1.9947815643578437.[0m



[776]	train's rmse: 0.623113	eval's rmse: 2.04391
[777]	train's rmse: 0.622753	eval's rmse: 2.04341
[778]	train's rmse: 0.622022	eval's rmse: 2.0434
[779]	train's rmse: 0.621647	eval's rmse: 2.04318
[780]	train's rmse: 0.621078	eval's rmse: 2.04328
[781]	train's rmse: 0.620255	eval's rmse: 2.04308
[782]	train's rmse: 0.619824	eval's rmse: 2.04231
[783]	train's rmse: 0.619096	eval's rmse: 2.042
[784]	train's rmse: 0.618532	eval's rmse: 2.04227
[785]	train's rmse: 0.617855	eval's rmse: 2.04275
[786]	train's rmse: 0.617341	eval's rmse: 2.04308
[787]	train's rmse: 0.616707	eval's rmse: 2.04291
[788]	train's rmse: 0.616308	eval's rmse: 2.04336
[789]	train's rmse: 0.615877	eval's rmse: 2.04334
[790]	train's rmse: 0.615295	eval's rmse: 2.0441
[791]	train's rmse: 0.615087	eval's rmse: 2.04551
[792]	train's rmse: 0.614837	eval's rmse: 2.0463
[793]	train's rmse: 0.614231	eval's rmse: 2.04571
[794]	train's rmse: 0.613534	eval's rmse: 2.04536
[795]	train's rmse: 0.61303	eval's rmse: 2.04538
[796]

[32m[I 2021-03-03 17:19:40,880][0m Trial 14 finished with value: 2.030092769121891 and parameters: {'boosting_type': 'gbdt', 'objective': 'regression_l2', 'learning_rate': 0.11852450371157618, 'lambda_l1': 1.5596881975351924e-06, 'lambda_l2': 0.00019825056325597186, 'num_leaves': 51, 'feature_fraction': 0.7342275957824272, 'bagging_fraction': 0.6002335382526289, 'bagging_freq': 3, 'min_child_samples': 53}. Best is trial 1 with value: 1.9947815643578437.[0m


[1]	train's rmse: 4.30374	eval's rmse: 4.21029
Training until validation scores don't improve for 500 rounds
[2]	train's rmse: 3.97873	eval's rmse: 3.93302
[3]	train's rmse: 3.67757	eval's rmse: 3.66449
[4]	train's rmse: 3.40145	eval's rmse: 3.42736
[5]	train's rmse: 3.15765	eval's rmse: 3.21854
[6]	train's rmse: 2.94369	eval's rmse: 3.04324
[7]	train's rmse: 2.75646	eval's rmse: 2.91411
[8]	train's rmse: 2.58236	eval's rmse: 2.76993
[9]	train's rmse: 2.4282	eval's rmse: 2.65357
[10]	train's rmse: 2.27593	eval's rmse: 2.5439
[11]	train's rmse: 2.15245	eval's rmse: 2.4656
[12]	train's rmse: 2.045	eval's rmse: 2.40944
[13]	train's rmse: 1.94372	eval's rmse: 2.3537
[14]	train's rmse: 1.85326	eval's rmse: 2.30789
[15]	train's rmse: 1.77619	eval's rmse: 2.26402
[16]	train's rmse: 1.71063	eval's rmse: 2.22966
[17]	train's rmse: 1.6521	eval's rmse: 2.1966
[18]	train's rmse: 1.60177	eval's rmse: 2.16779
[19]	train's rmse: 1.5501	eval's rmse: 2.13774
[20]	train's rmse: 1.50487	eval's rmse: 2.12

[32m[I 2021-03-03 17:19:42,635][0m Trial 15 finished with value: 2.0165943841883767 and parameters: {'boosting_type': 'gbdt', 'objective': 'regression_l2', 'learning_rate': 0.10052631892238205, 'lambda_l1': 0.00031267386471048, 'lambda_l2': 1.859653191748108e-08, 'num_leaves': 56, 'feature_fraction': 0.6064010015663394, 'bagging_fraction': 0.6078478099942884, 'bagging_freq': 3, 'min_child_samples': 5}. Best is trial 1 with value: 1.9947815643578437.[0m



[501]	train's rmse: 0.0702393	eval's rmse: 2.0686
[502]	train's rmse: 0.0699577	eval's rmse: 2.06859
[503]	train's rmse: 0.0696505	eval's rmse: 2.06874
[504]	train's rmse: 0.0694261	eval's rmse: 2.06881
[505]	train's rmse: 0.0691434	eval's rmse: 2.06874
[506]	train's rmse: 0.0689165	eval's rmse: 2.06874
[507]	train's rmse: 0.0687673	eval's rmse: 2.06874
[508]	train's rmse: 0.0685773	eval's rmse: 2.06875
[509]	train's rmse: 0.0684267	eval's rmse: 2.06868
[510]	train's rmse: 0.068254	eval's rmse: 2.0688
[511]	train's rmse: 0.0680219	eval's rmse: 2.0689
[512]	train's rmse: 0.0678161	eval's rmse: 2.06883
[513]	train's rmse: 0.0677699	eval's rmse: 2.06874
[514]	train's rmse: 0.0674988	eval's rmse: 2.0688
[515]	train's rmse: 0.0672695	eval's rmse: 2.06883
[516]	train's rmse: 0.0670695	eval's rmse: 2.06874
[517]	train's rmse: 0.0668533	eval's rmse: 2.06869
[518]	train's rmse: 0.0667125	eval's rmse: 2.06866
[519]	train's rmse: 0.0666193	eval's rmse: 2.06849
[520]	train's rmse: 0.0665249	eval'

[32m[I 2021-03-03 17:19:45,245][0m Trial 16 finished with value: 1.9895474080017175 and parameters: {'boosting_type': 'gbdt', 'objective': 'regression_l2', 'learning_rate': 0.05080622756854391, 'lambda_l1': 0.5926327294349881, 'lambda_l2': 0.00015776333086992518, 'num_leaves': 160, 'feature_fraction': 0.716282006108701, 'bagging_fraction': 0.841919376111883, 'bagging_freq': 1, 'min_child_samples': 24}. Best is trial 16 with value: 1.9895474080017175.[0m


[1]	train's rmse: 3.45814	eval's rmse: 3.43369
Training until validation scores don't improve for 500 rounds
[2]	train's rmse: 2.84055	eval's rmse: 2.92368
[3]	train's rmse: 2.42802	eval's rmse: 2.61124
[4]	train's rmse: 2.22488	eval's rmse: 2.50382
[5]	train's rmse: 2.10243	eval's rmse: 2.4497
[6]	train's rmse: 2.03062	eval's rmse: 2.41194
[7]	train's rmse: 1.98854	eval's rmse: 2.39249
[8]	train's rmse: 1.94913	eval's rmse: 2.36204
[9]	train's rmse: 1.92004	eval's rmse: 2.33959
[10]	train's rmse: 1.90083	eval's rmse: 2.32896
[11]	train's rmse: 1.87559	eval's rmse: 2.3231
[12]	train's rmse: 1.84455	eval's rmse: 2.31077
[13]	train's rmse: 1.82303	eval's rmse: 2.31018
[14]	train's rmse: 1.80298	eval's rmse: 2.29753
[15]	train's rmse: 1.78151	eval's rmse: 2.29562
[16]	train's rmse: 1.76531	eval's rmse: 2.29444
[17]	train's rmse: 1.74196	eval's rmse: 2.28739
[18]	train's rmse: 1.72833	eval's rmse: 2.28568
[19]	train's rmse: 1.7152	eval's rmse: 2.29023
[20]	train's rmse: 1.70068	eval's rmse

[32m[I 2021-03-03 17:20:00,945][0m Trial 17 finished with value: 2.1497788127258994 and parameters: {'boosting_type': 'gbdt', 'objective': 'regression_l1', 'learning_rate': 0.3612085848761273, 'lambda_l1': 0.1317493439865974, 'lambda_l2': 0.0019127874167081424, 'num_leaves': 174, 'feature_fraction': 0.7486476609311644, 'bagging_fraction': 0.859756032266961, 'bagging_freq': 1, 'min_child_samples': 22}. Best is trial 16 with value: 1.9895474080017175.[0m


[1]	train's rmse: 2.84145	eval's rmse: 2.97026
Training until validation scores don't improve for 500 rounds
[2]	train's rmse: 2.49229	eval's rmse: 2.67239
[3]	train's rmse: 2.30919	eval's rmse: 2.58565
[4]	train's rmse: 2.19731	eval's rmse: 2.59811
[5]	train's rmse: 2.08193	eval's rmse: 2.52119
[6]	train's rmse: 2.01232	eval's rmse: 2.49631
[7]	train's rmse: 1.93861	eval's rmse: 2.4691
[8]	train's rmse: 1.87289	eval's rmse: 2.44184
[9]	train's rmse: 1.81288	eval's rmse: 2.41546
[10]	train's rmse: 1.75932	eval's rmse: 2.37938
[11]	train's rmse: 1.72256	eval's rmse: 2.34583
[12]	train's rmse: 1.69782	eval's rmse: 2.34455
[13]	train's rmse: 1.66109	eval's rmse: 2.32535
[14]	train's rmse: 1.62715	eval's rmse: 2.31808
[15]	train's rmse: 1.59292	eval's rmse: 2.34029
[16]	train's rmse: 1.56182	eval's rmse: 2.3368
[17]	train's rmse: 1.5395	eval's rmse: 2.3112
[18]	train's rmse: 1.5121	eval's rmse: 2.30215
[19]	train's rmse: 1.48917	eval's rmse: 2.32574
[20]	train's rmse: 1.47024	eval's rmse: 

[32m[I 2021-03-03 17:20:01,821][0m Trial 18 finished with value: 2.281549771061013 and parameters: {'boosting_type': 'gbdt', 'objective': 'regression_l2', 'learning_rate': 0.9517089040947135, 'lambda_l1': 3.877082548590693, 'lambda_l2': 2.858594620989545e-07, 'num_leaves': 180, 'feature_fraction': 0.8270158955261117, 'bagging_fraction': 0.8493946326492454, 'bagging_freq': 1, 'min_child_samples': 47}. Best is trial 16 with value: 1.9895474080017175.[0m



[512]	train's rmse: 0.451953	eval's rmse: 2.30831
[513]	train's rmse: 0.451687	eval's rmse: 2.30859
[514]	train's rmse: 0.451198	eval's rmse: 2.30875
[515]	train's rmse: 0.450596	eval's rmse: 2.30904
[516]	train's rmse: 0.45035	eval's rmse: 2.31021
[517]	train's rmse: 0.45026	eval's rmse: 2.3099
[518]	train's rmse: 0.450443	eval's rmse: 2.30908
[519]	train's rmse: 0.450302	eval's rmse: 2.3084
[520]	train's rmse: 0.449988	eval's rmse: 2.30826
[521]	train's rmse: 0.449795	eval's rmse: 2.30992
[522]	train's rmse: 0.449315	eval's rmse: 2.31079
[523]	train's rmse: 0.449044	eval's rmse: 2.31069
[524]	train's rmse: 0.448961	eval's rmse: 2.31191
[525]	train's rmse: 0.448633	eval's rmse: 2.31331
[526]	train's rmse: 0.448318	eval's rmse: 2.31295
[527]	train's rmse: 0.448298	eval's rmse: 2.31232
[528]	train's rmse: 0.448134	eval's rmse: 2.31199
[529]	train's rmse: 0.448106	eval's rmse: 2.31209
[530]	train's rmse: 0.44805	eval's rmse: 2.31208
[531]	train's rmse: 0.447773	eval's rmse: 2.3133
[532]

[32m[I 2021-03-03 17:20:03,817][0m Trial 19 finished with value: 2.087225166837079 and parameters: {'boosting_type': 'gbdt', 'objective': 'regression_l2', 'learning_rate': 0.05863536466571313, 'lambda_l1': 2.186508950904536e-06, 'lambda_l2': 8.114822644233425e-05, 'num_leaves': 148, 'feature_fraction': 0.9154901000125841, 'bagging_fraction': 0.5293833711458342, 'bagging_freq': 2, 'min_child_samples': 70}. Best is trial 16 with value: 1.9895474080017175.[0m


[1]	train's rmse: 4.50295	eval's rmse: 4.39138
Training until validation scores don't improve for 500 rounds
[2]	train's rmse: 4.33899	eval's rmse: 4.24237
[3]	train's rmse: 4.17569	eval's rmse: 4.08702
[4]	train's rmse: 4.01972	eval's rmse: 3.94392
[5]	train's rmse: 3.87353	eval's rmse: 3.80934
[6]	train's rmse: 3.73666	eval's rmse: 3.68196
[7]	train's rmse: 3.60676	eval's rmse: 3.5674
[8]	train's rmse: 3.48747	eval's rmse: 3.45832
[9]	train's rmse: 3.37093	eval's rmse: 3.35388
[10]	train's rmse: 3.26288	eval's rmse: 3.26214
[11]	train's rmse: 3.16336	eval's rmse: 3.17815
[12]	train's rmse: 3.07092	eval's rmse: 3.10219
[13]	train's rmse: 2.98121	eval's rmse: 3.0262
[14]	train's rmse: 2.89983	eval's rmse: 2.95786
[15]	train's rmse: 2.81827	eval's rmse: 2.89176
[16]	train's rmse: 2.74068	eval's rmse: 2.83023
[17]	train's rmse: 2.67397	eval's rmse: 2.78243
[18]	train's rmse: 2.60271	eval's rmse: 2.72703
[19]	train's rmse: 2.53874	eval's rmse: 2.67392
[20]	train's rmse: 2.47793	eval's rms

[32m[I 2021-03-03 17:20:06,069][0m Trial 20 finished with value: 1.9838247881245883 and parameters: {'boosting_type': 'gbdt', 'objective': 'regression_l2', 'learning_rate': 0.05125931930526879, 'lambda_l1': 0.7660709828348844, 'lambda_l2': 2.806909736507475e-07, 'num_leaves': 79, 'feature_fraction': 0.6925704170701107, 'bagging_fraction': 0.9908106214091581, 'bagging_freq': 7, 'min_child_samples': 20}. Best is trial 20 with value: 1.9838247881245883.[0m



[680]	train's rmse: 0.378493	eval's rmse: 1.99415
[681]	train's rmse: 0.378129	eval's rmse: 1.99407
[682]	train's rmse: 0.377794	eval's rmse: 1.99405
[683]	train's rmse: 0.377376	eval's rmse: 1.99432
[684]	train's rmse: 0.37703	eval's rmse: 1.99436
[685]	train's rmse: 0.376729	eval's rmse: 1.99452
[686]	train's rmse: 0.376249	eval's rmse: 1.99458
[687]	train's rmse: 0.375703	eval's rmse: 1.99473
[688]	train's rmse: 0.37524	eval's rmse: 1.99495
[689]	train's rmse: 0.374904	eval's rmse: 1.9948
[690]	train's rmse: 0.374496	eval's rmse: 1.99475
[691]	train's rmse: 0.374005	eval's rmse: 1.99482
[692]	train's rmse: 0.373599	eval's rmse: 1.99461
Early stopping, best iteration is:
[192]	train's rmse: 0.887834	eval's rmse: 1.98382
[1]	train's rmse: 4.49425	eval's rmse: 4.37852
Training until validation scores don't improve for 500 rounds
[2]	train's rmse: 4.32104	eval's rmse: 4.22198
[3]	train's rmse: 4.15818	eval's rmse: 4.06581
[4]	train's rmse: 4.00208	eval's rmse: 3.92469
[5]	train's rmse:

[32m[I 2021-03-03 17:20:09,787][0m Trial 21 finished with value: 1.967063832577413 and parameters: {'boosting_type': 'gbdt', 'objective': 'regression_l2', 'learning_rate': 0.05116211140065773, 'lambda_l1': 1.141658762824769, 'lambda_l2': 1.443605508447683e-07, 'num_leaves': 80, 'feature_fraction': 0.690680760127189, 'bagging_fraction': 0.9927845305109251, 'bagging_freq': 7, 'min_child_samples': 17}. Best is trial 21 with value: 1.967063832577413.[0m



[727]	train's rmse: 0.314112	eval's rmse: 1.97917
[728]	train's rmse: 0.313661	eval's rmse: 1.9789
[729]	train's rmse: 0.313405	eval's rmse: 1.97879
[730]	train's rmse: 0.313107	eval's rmse: 1.97865
[731]	train's rmse: 0.312807	eval's rmse: 1.97874
[732]	train's rmse: 0.312504	eval's rmse: 1.97862
Early stopping, best iteration is:
[232]	train's rmse: 0.705522	eval's rmse: 1.96706
[1]	train's rmse: 4.49979	eval's rmse: 4.38356
Training until validation scores don't improve for 500 rounds
[2]	train's rmse: 4.33917	eval's rmse: 4.23787
[3]	train's rmse: 4.18103	eval's rmse: 4.08726
[4]	train's rmse: 4.02919	eval's rmse: 3.94938
[5]	train's rmse: 3.88797	eval's rmse: 3.81942
[6]	train's rmse: 3.75272	eval's rmse: 3.69561
[7]	train's rmse: 3.62318	eval's rmse: 3.58295
[8]	train's rmse: 3.50722	eval's rmse: 3.47764
[9]	train's rmse: 3.38955	eval's rmse: 3.37054
[10]	train's rmse: 3.28239	eval's rmse: 3.2746
[11]	train's rmse: 3.18352	eval's rmse: 3.19033
[12]	train's rmse: 3.09029	eval's r

[32m[I 2021-03-03 17:20:12,088][0m Trial 22 finished with value: 2.0314757093671734 and parameters: {'boosting_type': 'gbdt', 'objective': 'regression_l2', 'learning_rate': 0.0513703296926648, 'lambda_l1': 0.4920045261539695, 'lambda_l2': 3.148944411986182e-07, 'num_leaves': 81, 'feature_fraction': 0.6928725908760576, 'bagging_fraction': 0.9999378777484, 'bagging_freq': 7, 'min_child_samples': 23}. Best is trial 21 with value: 1.967063832577413.[0m



[725]	train's rmse: 0.430954	eval's rmse: 2.05706
[726]	train's rmse: 0.430333	eval's rmse: 2.05713
[727]	train's rmse: 0.429767	eval's rmse: 2.05734
[728]	train's rmse: 0.429232	eval's rmse: 2.05768
[729]	train's rmse: 0.428637	eval's rmse: 2.05787
[730]	train's rmse: 0.428105	eval's rmse: 2.05774
[731]	train's rmse: 0.427594	eval's rmse: 2.05823
[732]	train's rmse: 0.427176	eval's rmse: 2.05839
Early stopping, best iteration is:
[232]	train's rmse: 0.891302	eval's rmse: 2.03148
[1]	train's rmse: 4.48766	eval's rmse: 4.37215
Training until validation scores don't improve for 500 rounds
[2]	train's rmse: 4.30839	eval's rmse: 4.21309
[3]	train's rmse: 4.13761	eval's rmse: 4.05392
[4]	train's rmse: 3.97721	eval's rmse: 3.90867
[5]	train's rmse: 3.82554	eval's rmse: 3.77371
[6]	train's rmse: 3.68149	eval's rmse: 3.64811
[7]	train's rmse: 3.54173	eval's rmse: 3.52365
[8]	train's rmse: 3.41486	eval's rmse: 3.40883
[9]	train's rmse: 3.2908	eval's rmse: 3.29847
[10]	train's rmse: 3.17866	eva

[32m[I 2021-03-03 17:20:15,808][0m Trial 23 finished with value: 1.914181570898599 and parameters: {'boosting_type': 'gbdt', 'objective': 'regression_l2', 'learning_rate': 0.051013418735316535, 'lambda_l1': 4.794388279256722, 'lambda_l2': 7.14288739917065e-07, 'num_leaves': 84, 'feature_fraction': 0.7801327027938199, 'bagging_fraction': 0.9077382376670677, 'bagging_freq': 7, 'min_child_samples': 6}. Best is trial 23 with value: 1.914181570898599.[0m


[1]	train's rmse: 4.31116	eval's rmse: 4.2172
Training until validation scores don't improve for 500 rounds
[2]	train's rmse: 3.98905	eval's rmse: 3.9323
[3]	train's rmse: 3.69518	eval's rmse: 3.6628
[4]	train's rmse: 3.4337	eval's rmse: 3.43789
[5]	train's rmse: 3.2037	eval's rmse: 3.24336
[6]	train's rmse: 2.99176	eval's rmse: 3.06592
[7]	train's rmse: 2.80327	eval's rmse: 2.9146
[8]	train's rmse: 2.63905	eval's rmse: 2.77831
[9]	train's rmse: 2.49053	eval's rmse: 2.66464
[10]	train's rmse: 2.35407	eval's rmse: 2.56359
[11]	train's rmse: 2.23472	eval's rmse: 2.48157
[12]	train's rmse: 2.12946	eval's rmse: 2.41258
[13]	train's rmse: 2.03241	eval's rmse: 2.34653
[14]	train's rmse: 1.95437	eval's rmse: 2.28802
[15]	train's rmse: 1.87601	eval's rmse: 2.24422
[16]	train's rmse: 1.8032	eval's rmse: 2.2099
[17]	train's rmse: 1.73382	eval's rmse: 2.18034
[18]	train's rmse: 1.673	eval's rmse: 2.14771
[19]	train's rmse: 1.62433	eval's rmse: 2.12789
[20]	train's rmse: 1.57507	eval's rmse: 2.112

[32m[I 2021-03-03 17:20:17,269][0m Trial 24 finished with value: 1.9439124660161309 and parameters: {'boosting_type': 'gbdt', 'objective': 'regression_l2', 'learning_rate': 0.09713737899359695, 'lambda_l1': 4.508290114414209, 'lambda_l2': 1.0513333707727614e-06, 'num_leaves': 85, 'feature_fraction': 0.7956309037500651, 'bagging_fraction': 0.9344583285831574, 'bagging_freq': 7, 'min_child_samples': 6}. Best is trial 23 with value: 1.914181570898599.[0m


[1]	train's rmse: 4.34975	eval's rmse: 4.23863
Training until validation scores don't improve for 500 rounds
[2]	train's rmse: 4.07357	eval's rmse: 3.98576
[3]	train's rmse: 3.80767	eval's rmse: 3.73846
[4]	train's rmse: 3.56991	eval's rmse: 3.51996
[5]	train's rmse: 3.35937	eval's rmse: 3.32835
[6]	train's rmse: 3.16395	eval's rmse: 3.15511
[7]	train's rmse: 2.99591	eval's rmse: 3.014
[8]	train's rmse: 2.8376	eval's rmse: 2.87501
[9]	train's rmse: 2.70001	eval's rmse: 2.7553
[10]	train's rmse: 2.57407	eval's rmse: 2.65763
[11]	train's rmse: 2.46312	eval's rmse: 2.58788
[12]	train's rmse: 2.37114	eval's rmse: 2.52695
[13]	train's rmse: 2.27651	eval's rmse: 2.46283
[14]	train's rmse: 2.19119	eval's rmse: 2.3951
[15]	train's rmse: 2.1209	eval's rmse: 2.34811
[16]	train's rmse: 2.05922	eval's rmse: 2.30626
[17]	train's rmse: 2.00438	eval's rmse: 2.27705
[18]	train's rmse: 1.94935	eval's rmse: 2.24302
[19]	train's rmse: 1.9043	eval's rmse: 2.21851
[20]	train's rmse: 1.86529	eval's rmse: 2.

[32m[I 2021-03-03 17:20:19,473][0m Trial 25 finished with value: 1.9495095872971457 and parameters: {'boosting_type': 'gbdt', 'objective': 'regression_l2', 'learning_rate': 0.09265126601578909, 'lambda_l1': 4.769233427361934, 'lambda_l2': 9.11304182958068e-07, 'num_leaves': 29, 'feature_fraction': 0.7735644977482523, 'bagging_fraction': 0.9265158488344648, 'bagging_freq': 6, 'min_child_samples': 5}. Best is trial 23 with value: 1.914181570898599.[0m


[1]	train's rmse: 4.27547	eval's rmse: 4.17209
Training until validation scores don't improve for 500 rounds
[2]	train's rmse: 3.93089	eval's rmse: 3.86708
[3]	train's rmse: 3.61999	eval's rmse: 3.58002
[4]	train's rmse: 3.35546	eval's rmse: 3.34784
[5]	train's rmse: 3.12029	eval's rmse: 3.14795
[6]	train's rmse: 2.90271	eval's rmse: 2.96787
[7]	train's rmse: 2.71601	eval's rmse: 2.82818
[8]	train's rmse: 2.55374	eval's rmse: 2.69748
[9]	train's rmse: 2.41222	eval's rmse: 2.5852
[10]	train's rmse: 2.28685	eval's rmse: 2.49285
[11]	train's rmse: 2.17933	eval's rmse: 2.42551
[12]	train's rmse: 2.08556	eval's rmse: 2.377
[13]	train's rmse: 2.0084	eval's rmse: 2.32554
[14]	train's rmse: 1.9323	eval's rmse: 2.27957
[15]	train's rmse: 1.86628	eval's rmse: 2.24919
[16]	train's rmse: 1.81545	eval's rmse: 2.22436
[17]	train's rmse: 1.75468	eval's rmse: 2.20191
[18]	train's rmse: 1.70121	eval's rmse: 2.1734
[19]	train's rmse: 1.66211	eval's rmse: 2.15275
[20]	train's rmse: 1.61849	eval's rmse: 2

[32m[I 2021-03-03 17:20:21,012][0m Trial 26 finished with value: 1.9806886645231647 and parameters: {'boosting_type': 'gbdt', 'objective': 'regression_l2', 'learning_rate': 0.10978680888082062, 'lambda_l1': 7.031671552275464, 'lambda_l2': 1.6884196605289259e-06, 'num_leaves': 131, 'feature_fraction': 0.7907867773379031, 'bagging_fraction': 0.9153002851073271, 'bagging_freq': 6, 'min_child_samples': 6}. Best is trial 23 with value: 1.914181570898599.[0m


[1]	train's rmse: 4.36493	eval's rmse: 4.23428
Training until validation scores don't improve for 500 rounds
[2]	train's rmse: 4.05744	eval's rmse: 3.94397
[3]	train's rmse: 3.79038	eval's rmse: 3.70043
[4]	train's rmse: 3.5537	eval's rmse: 3.48925
[5]	train's rmse: 3.3341	eval's rmse: 3.30166
[6]	train's rmse: 3.13953	eval's rmse: 3.13211
[7]	train's rmse: 2.97905	eval's rmse: 2.9951
[8]	train's rmse: 2.83996	eval's rmse: 2.87775
[9]	train's rmse: 2.7119	eval's rmse: 2.76798
[10]	train's rmse: 2.60059	eval's rmse: 2.68248
[11]	train's rmse: 2.5067	eval's rmse: 2.61403
[12]	train's rmse: 2.42387	eval's rmse: 2.54993
[13]	train's rmse: 2.34945	eval's rmse: 2.48891
[14]	train's rmse: 2.28272	eval's rmse: 2.43728
[15]	train's rmse: 2.22686	eval's rmse: 2.39533
[16]	train's rmse: 2.17198	eval's rmse: 2.35063
[17]	train's rmse: 2.12449	eval's rmse: 2.32106
[18]	train's rmse: 2.08641	eval's rmse: 2.29639
[19]	train's rmse: 2.05458	eval's rmse: 2.27721
[20]	train's rmse: 2.02434	eval's rmse: 

[32m[I 2021-03-03 17:20:29,341][0m Trial 27 finished with value: 1.9609647375068022 and parameters: {'boosting_type': 'gbdt', 'objective': 'regression_l1', 'learning_rate': 0.0874635728653908, 'lambda_l1': 4.477912242072013, 'lambda_l2': 1.3371656539749254e-05, 'num_leaves': 29, 'feature_fraction': 0.9025788051558927, 'bagging_fraction': 0.9256697617505112, 'bagging_freq': 6, 'min_child_samples': 9}. Best is trial 23 with value: 1.914181570898599.[0m


[1]	train's rmse: 4.46087	eval's rmse: 4.34473
Training until validation scores don't improve for 500 rounds
[2]	train's rmse: 4.24634	eval's rmse: 4.13754
[3]	train's rmse: 4.05318	eval's rmse: 3.9544
[4]	train's rmse: 3.88174	eval's rmse: 3.79377
[5]	train's rmse: 3.72136	eval's rmse: 3.644
[6]	train's rmse: 3.56719	eval's rmse: 3.49845
[7]	train's rmse: 3.42828	eval's rmse: 3.37516
[8]	train's rmse: 3.29636	eval's rmse: 3.25561
[9]	train's rmse: 3.17548	eval's rmse: 3.14997
[10]	train's rmse: 3.05944	eval's rmse: 3.05381
[11]	train's rmse: 2.95623	eval's rmse: 2.969
[12]	train's rmse: 2.8604	eval's rmse: 2.89155
[13]	train's rmse: 2.77421	eval's rmse: 2.81927
[14]	train's rmse: 2.69705	eval's rmse: 2.75582
[15]	train's rmse: 2.62767	eval's rmse: 2.69796
[16]	train's rmse: 2.56479	eval's rmse: 2.64852
[17]	train's rmse: 2.50661	eval's rmse: 2.60002
[18]	train's rmse: 2.45782	eval's rmse: 2.56284
[19]	train's rmse: 2.40767	eval's rmse: 2.53142
[20]	train's rmse: 2.36127	eval's rmse: 2

[32m[I 2021-03-03 17:20:31,235][0m Trial 28 finished with value: 1.9777134884898724 and parameters: {'boosting_type': 'gbdt', 'objective': 'regression_l2', 'learning_rate': 0.06650012235541161, 'lambda_l1': 9.696298021406411, 'lambda_l2': 1.020536375223314e-06, 'num_leaves': 26, 'feature_fraction': 0.886328521698448, 'bagging_fraction': 0.8145880166135706, 'bagging_freq': 7, 'min_child_samples': 7}. Best is trial 23 with value: 1.914181570898599.[0m


[1]	train's rmse: 4.1438	eval's rmse: 4.07434
Training until validation scores don't improve for 500 rounds
[2]	train's rmse: 3.69522	eval's rmse: 3.68218
[3]	train's rmse: 3.30578	eval's rmse: 3.33578
[4]	train's rmse: 2.97639	eval's rmse: 3.06671
[5]	train's rmse: 2.69957	eval's rmse: 2.84522
[6]	train's rmse: 2.4556	eval's rmse: 2.66047
[7]	train's rmse: 2.23155	eval's rmse: 2.49644
[8]	train's rmse: 2.04347	eval's rmse: 2.36215
[9]	train's rmse: 1.88726	eval's rmse: 2.26962
[10]	train's rmse: 1.74868	eval's rmse: 2.18526
[11]	train's rmse: 1.63275	eval's rmse: 2.13336
[12]	train's rmse: 1.53937	eval's rmse: 2.10248
[13]	train's rmse: 1.45459	eval's rmse: 2.06784
[14]	train's rmse: 1.37852	eval's rmse: 2.03631
[15]	train's rmse: 1.31476	eval's rmse: 2.01323
[16]	train's rmse: 1.25887	eval's rmse: 1.99237
[17]	train's rmse: 1.21257	eval's rmse: 1.97779
[18]	train's rmse: 1.17013	eval's rmse: 1.97196
[19]	train's rmse: 1.11449	eval's rmse: 1.96159
[20]	train's rmse: 1.06669	eval's rms

[32m[I 2021-03-03 17:20:33,319][0m Trial 29 finished with value: 1.9238289144636769 and parameters: {'boosting_type': 'gbdt', 'objective': 'regression_l2', 'learning_rate': 0.13604398840185541, 'lambda_l1': 0.15044842982607262, 'lambda_l2': 0.0010771980704158725, 'num_leaves': 104, 'feature_fraction': 0.7656775620185373, 'bagging_fraction': 0.7920969697453408, 'bagging_freq': 6, 'min_child_samples': 6}. Best is trial 23 with value: 1.914181570898599.[0m



[489]	train's rmse: 0.0502476	eval's rmse: 1.96005
[490]	train's rmse: 0.0502351	eval's rmse: 1.96004
[491]	train's rmse: 0.0502259	eval's rmse: 1.96002
[492]	train's rmse: 0.0502169	eval's rmse: 1.96002
[493]	train's rmse: 0.0502569	eval's rmse: 1.95997
[494]	train's rmse: 0.0503348	eval's rmse: 1.95991
[495]	train's rmse: 0.0504173	eval's rmse: 1.95985
[496]	train's rmse: 0.0505408	eval's rmse: 1.95981
[497]	train's rmse: 0.0506227	eval's rmse: 1.9598
[498]	train's rmse: 0.050713	eval's rmse: 1.9598
[499]	train's rmse: 0.0505693	eval's rmse: 1.95979
[500]	train's rmse: 0.0504621	eval's rmse: 1.95979
[501]	train's rmse: 0.0503762	eval's rmse: 1.95977
[502]	train's rmse: 0.0503046	eval's rmse: 1.95977
[503]	train's rmse: 0.0502486	eval's rmse: 1.95979
[504]	train's rmse: 0.0501987	eval's rmse: 1.95977
[505]	train's rmse: 0.05014	eval's rmse: 1.9598
[506]	train's rmse: 0.0500968	eval's rmse: 1.95982
[507]	train's rmse: 0.0500683	eval's rmse: 1.95985
[508]	train's rmse: 0.0500396	eval's

In [29]:
study.best_trial #contains the hyperparameter settings

FrozenTrial(number=23, value=1.914181570898599, datetime_start=datetime.datetime(2021, 3, 3, 17, 20, 12, 88625), datetime_complete=datetime.datetime(2021, 3, 3, 17, 20, 15, 808867), params={'boosting_type': 'gbdt', 'objective': 'regression_l2', 'learning_rate': 0.051013418735316535, 'lambda_l1': 4.794388279256722, 'lambda_l2': 7.14288739917065e-07, 'num_leaves': 84, 'feature_fraction': 0.7801327027938199, 'bagging_fraction': 0.9077382376670677, 'bagging_freq': 7, 'min_child_samples': 6}, distributions={'boosting_type': CategoricalDistribution(choices=('gbdt', 'rf')), 'objective': CategoricalDistribution(choices=('regression_l1', 'regression_l2')), 'learning_rate': LogUniformDistribution(high=1, low=0.05), 'lambda_l1': LogUniformDistribution(high=10.0, low=1e-08), 'lambda_l2': LogUniformDistribution(high=10.0, low=1e-08), 'num_leaves': IntUniformDistribution(high=256, low=2, step=1), 'feature_fraction': UniformDistribution(high=1.000000000001, low=0.4), 'bagging_fraction': UniformDistri

In [21]:
print('Best trial:')
trial = study.best_trial

print('  Value: {}'.format(trial.value))

print('  Params: ')
for key, value in trial.params.items():
    print('    "{}": {},'.format(key, value))

Best trial:
  Value: 2.269627402687017
  Params: 
    "boosting_type": gbdt,
    "objective": regression_l2,
    "learning_rate": 0.32875122929920914,
    "lambda_l1": 4.302884701416492e-05,
    "lambda_l2": 0.00446641166459144,
    "num_leaves": 7,
    "feature_fraction": 0.902653559382219,
    "bagging_fraction": 0.20289579693458237,
    "bagging_freq": 4,
    "min_child_samples": 36,


In [22]:
model_tuned = joblib.load('OPTIMIZED_MODEL.sav') #call saved model

In [23]:
df_test = pd.read_csv('test_data.csv')
df_test.columns = ['Aerosol', 
          'Blue', 
          'Green', 
          'Red', 
          'NIR', 
          'SWIR-1', 
          'SWIR-2',
          'Chl-a']

In [24]:
X_test, y_test = df_test.iloc[:,:-1], df_test.iloc[:,-1]
X_test['NDVI'] = (X_test['NIR'] - X_test['Red']) / (X_test['NIR'] + X_test['Red']) 

In [25]:
X_test_sc = sc.fit_transform(X_test) #standardize

In [26]:
pred_test = model_tuned.predict(X_test_sc)
rmse = np.sqrt(mean_squared_error(y_test, pred_test))
rmse

3.2018627584470645

In [27]:
r2_score(y_test, pred_test)

0.5003501807833579