In [1]:
# Import library
import pandas as pd
import numpy as np
import os, random, warnings, gc, psutil, datetime
from tqdm import tqdm_notebook, tqdm

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import GroupKFold, StratifiedKFold, KFold
from sklearn.metrics import mean_squared_error
from math import sqrt

import lightgbm as lgbm
from catboost import CatBoostRegressor

from glob import glob
from IPython.display import display

import seaborn as sns
import matplotlib.pyplot as plt

# Set options
pd.set_option('max_columns',500)
pd.set_option('max_rows',500)
pd.options.display.max_colwidth = 300

warnings.filterwarnings('ignore')

%matplotlib inline
sns.set_palette('bright')

In [2]:
def seed_everything(seed=0):
    random.seed(seed)
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    
seed_everything()

In [3]:
train = pd.read_pickle('../Create_Features/preprocessed_train.pickle')
test = pd.read_pickle('../Create_Features/preprocessed_test.pickle')

In [4]:
n_trn = 415423
target_col = '18~20_ride'

In [5]:
train.head()

Unnamed: 0,18~20_ride,bus_route_id,in_out,latitude,longitude,station_code,dayofweek,weekend,ride_total,takeoff_total,ride_go_to_work,takeoff_go_to_work,dis_jejusi,dis_seoquipo,bus_route_id_station_code,bus_route_id_station_code_weekend,date_fq_enc,station_code_fq_enc,bus_route_id_fq_enc,bus_route_id_station_code_fq_enc,date_bus_route_id_fq_enc,date_station_code_fq_enc,date_bus_route_id_station_code_fq_enc,7~8_ride_date_mean,7~8_ride_date_bus_route_id_mean,8~9_ride_date_mean,8~9_ride_date_bus_route_id_mean,9~10_ride_date_mean,9~10_ride_date_bus_route_id_mean,station_sequence,station_reverse_sequence,weekday,is_national_holiday,getin_total,morning_getin,morning_takeoff,noon_getin,noon_takeoff,station_morning_getin_sum,station_morning_takeoff_sum,bus_route_getin_sum,bus_route_takeoff_sum,station_morning_getin_mean,station_morning_takeoff_mean,bus_route_getin_mean,bus_route_takeoff_mean,kmeans1,kmeans2,regular_commuter_count,afternoon_takeoff,next_bus_time_diff,getin_user_count1_morning,getin_user_count2_morning,takeoff_user_count1_noon,takeoff_user_count2_noon,hourly_rain,prev_daily_rain,hourly_cloud,latlong_second,total_population,man_population,woman_population,avg_time_diff,passengers_in,passengers_out,latitude_rank,longitude_rank
0,0.0,0,1,33.4899,126.49373,322,6,1,16.0,0.0,8.0,0.0,2.95492,26.256744,31053,53745,11538,46,1189,46,25,1,1,0.391576,1.12,0.49246,0.72,0.543855,1.52,1,25,6,0,16.0,3.0,0.0,13.0,0.0,3.0,0.0,50.0,8.0,3.0,0.0,2.0,0.32,15,9,9.0,0.0,3650.490741,7.0,1.0,,,0.2,0.0,88,2411,43217.0,21189.0,22028.0,3113.015748,15.23913,1.909091,404772.5,214964.5
1,5.0,0,1,33.48944,126.48508,335,6,1,22.0,0.0,10.0,0.0,3.720275,26.403025,31054,53747,11538,2303,1189,46,25,45,1,0.391576,1.12,0.49246,0.72,0.543855,1.52,2,24,6,0,22.0,9.0,0.0,13.0,0.0,183.0,116.0,50.0,8.0,4.066667,2.577778,2.0,0.32,15,9,466.0,0.0,3650.490741,197.0,,95.0,,0.2,0.0,88,2403,43217.0,21189.0,22028.0,1837.940774,594.0,428.0,396531.5,189710.0
2,2.0,0,1,33.48181,126.47352,408,6,1,4.0,0.0,3.0,0.0,5.036124,25.893305,31057,53753,11538,1154,1189,46,25,21,1,0.391576,1.12,0.49246,0.72,0.543855,1.52,3,23,6,0,4.0,2.0,0.0,2.0,0.0,71.0,11.0,50.0,8.0,3.380952,0.52381,2.0,0.32,15,45,164.0,0.0,3650.490741,76.0,,11.0,2.0,0.2,0.0,88,2347,56223.0,27761.0,28462.0,2448.248012,166.913043,28.0,342909.5,156930.5
3,53.0,0,0,33.50577,126.49252,1448,6,1,79.0,0.0,49.0,0.0,2.864166,27.997494,31020,53682,11538,49,1189,46,25,1,1,0.391576,1.12,0.49246,0.72,0.543855,1.52,4,22,6,0,79.0,23.0,0.0,56.0,0.0,23.0,0.0,50.0,8.0,23.0,0.0,2.0,0.32,4,3,2.0,0.0,3650.490741,49.0,,,,0.2,0.0,88,2980,15673.0,7904.0,7769.0,3961.540412,89.130435,3.6,518315.0,210660.0
4,0.0,0,0,33.25579,126.4126,1510,6,1,0.0,1.0,0.0,1.0,29.040353,13.574693,31022,53686,11538,386,1189,39,25,10,1,0.391576,1.12,0.49246,0.72,0.543855,1.52,5,21,6,0,0.0,0.0,0.0,0.0,1.0,1.0,5.0,50.0,8.0,0.1,0.5,2.0,0.32,85,136,3.0,0.0,3650.490741,4.0,,5.0,,0.2,0.0,88,562,4414.0,2252.0,2162.0,1058.012605,6.847826,27.456522,98750.5,93507.5


In [6]:
drop_cols = ['hourly_rain',
       'prev_daily_rain', 'hourly_cloud', 'latlong_second', 'total_population',
       'man_population', 'woman_population']

train= train.drop(drop_cols,1)
test= test.drop(drop_cols,1)


In [7]:
train.head()

Unnamed: 0,18~20_ride,bus_route_id,in_out,latitude,longitude,station_code,dayofweek,weekend,ride_total,takeoff_total,ride_go_to_work,takeoff_go_to_work,dis_jejusi,dis_seoquipo,bus_route_id_station_code,bus_route_id_station_code_weekend,date_fq_enc,station_code_fq_enc,bus_route_id_fq_enc,bus_route_id_station_code_fq_enc,date_bus_route_id_fq_enc,date_station_code_fq_enc,date_bus_route_id_station_code_fq_enc,7~8_ride_date_mean,7~8_ride_date_bus_route_id_mean,8~9_ride_date_mean,8~9_ride_date_bus_route_id_mean,9~10_ride_date_mean,9~10_ride_date_bus_route_id_mean,station_sequence,station_reverse_sequence,weekday,is_national_holiday,getin_total,morning_getin,morning_takeoff,noon_getin,noon_takeoff,station_morning_getin_sum,station_morning_takeoff_sum,bus_route_getin_sum,bus_route_takeoff_sum,station_morning_getin_mean,station_morning_takeoff_mean,bus_route_getin_mean,bus_route_takeoff_mean,kmeans1,kmeans2,regular_commuter_count,afternoon_takeoff,next_bus_time_diff,getin_user_count1_morning,getin_user_count2_morning,takeoff_user_count1_noon,takeoff_user_count2_noon,avg_time_diff,passengers_in,passengers_out,latitude_rank,longitude_rank
0,0.0,0,1,33.4899,126.49373,322,6,1,16.0,0.0,8.0,0.0,2.95492,26.256744,31053,53745,11538,46,1189,46,25,1,1,0.391576,1.12,0.49246,0.72,0.543855,1.52,1,25,6,0,16.0,3.0,0.0,13.0,0.0,3.0,0.0,50.0,8.0,3.0,0.0,2.0,0.32,15,9,9.0,0.0,3650.490741,7.0,1.0,,,3113.015748,15.23913,1.909091,404772.5,214964.5
1,5.0,0,1,33.48944,126.48508,335,6,1,22.0,0.0,10.0,0.0,3.720275,26.403025,31054,53747,11538,2303,1189,46,25,45,1,0.391576,1.12,0.49246,0.72,0.543855,1.52,2,24,6,0,22.0,9.0,0.0,13.0,0.0,183.0,116.0,50.0,8.0,4.066667,2.577778,2.0,0.32,15,9,466.0,0.0,3650.490741,197.0,,95.0,,1837.940774,594.0,428.0,396531.5,189710.0
2,2.0,0,1,33.48181,126.47352,408,6,1,4.0,0.0,3.0,0.0,5.036124,25.893305,31057,53753,11538,1154,1189,46,25,21,1,0.391576,1.12,0.49246,0.72,0.543855,1.52,3,23,6,0,4.0,2.0,0.0,2.0,0.0,71.0,11.0,50.0,8.0,3.380952,0.52381,2.0,0.32,15,45,164.0,0.0,3650.490741,76.0,,11.0,2.0,2448.248012,166.913043,28.0,342909.5,156930.5
3,53.0,0,0,33.50577,126.49252,1448,6,1,79.0,0.0,49.0,0.0,2.864166,27.997494,31020,53682,11538,49,1189,46,25,1,1,0.391576,1.12,0.49246,0.72,0.543855,1.52,4,22,6,0,79.0,23.0,0.0,56.0,0.0,23.0,0.0,50.0,8.0,23.0,0.0,2.0,0.32,4,3,2.0,0.0,3650.490741,49.0,,,,3961.540412,89.130435,3.6,518315.0,210660.0
4,0.0,0,0,33.25579,126.4126,1510,6,1,0.0,1.0,0.0,1.0,29.040353,13.574693,31022,53686,11538,386,1189,39,25,10,1,0.391576,1.12,0.49246,0.72,0.543855,1.52,5,21,6,0,0.0,0.0,0.0,0.0,1.0,1.0,5.0,50.0,8.0,0.1,0.5,2.0,0.32,85,136,3.0,0.0,3650.490741,4.0,,5.0,,1058.012605,6.847826,27.456522,98750.5,93507.5


In [8]:
# Before modeling
train_set = train.drop(target_col,1)
test_set = test.drop(target_col,1)

train_label = train[target_col]
test_label = test[target_col]

In [9]:
# Basic LGBM Model
n_splits= 5
NUM_BOOST_ROUND = 100000
SEED = 1993
lgbm_param = {'objective':'rmse',
              'boosting_type': 'gbdt',
              'random_state':1993,
              'learning_rate':0.05,
              'subsample':0.7,
              'tree_learner': 'serial',
              'colsample_bytree':0.78,
              'early_stopping_rounds':50,
              'subsample_freq': 1,
              'reg_lambda':7,
              'reg_alpha': 5,
              'num_leaves': 96,
              'seed' : SEED
            }

In [10]:
# StractifiedKfold & bus_route_id
split_col = 'bus_route_id'
len_seeds = 1

outer_stractified_busroute_oof_train = np.zeros( train.shape[0] )
outer_stractified_busroute_oof_test = np.zeros( test.shape[0] )

for _ in tqdm_notebook(range(len_seeds)):
    
    seed = random.randint(1, 100000)
    
    cv_list = []

    oof_train = np.zeros( train.shape[0] )
    final_test = np.zeros( test.shape[0] )

    kfolds = StratifiedKFold(n_splits = n_splits, shuffle=True, random_state=seed )

    for ind, (trn_ind, val_ind) in tqdm_notebook( enumerate(kfolds.split(train_set, train_set[split_col])) ):

        X_train, y_train = train_set.iloc[trn_ind], train_label[trn_ind]
        X_valid, y_valid = train_set.iloc[val_ind], train_label[val_ind]
        
        dtrain = lgbm.Dataset( X_train, y_train )
        dvalid = lgbm.Dataset( X_valid, y_valid ,reference=dtrain)
        
        lgbm_param['seed'] = seed

        model = lgbm.train(lgbm_param , dtrain, NUM_BOOST_ROUND, valid_sets=(dtrain, dvalid), valid_names=('train','valid'), 
                            categorical_feature=['bus_route_id','station_code','weekday',\
                                                                'kmeans1','kmeans2',
                                                                ] ,
                           verbose_eval= 100)

        valid_pred = model.predict(X_valid)
        test_pred  = model.predict(test_set)

        oof_train[val_ind] += valid_pred
        final_test += test_pred

        cv_list.append( sqrt(mean_squared_error(y_valid, valid_pred)) )

        print('='*80)

    final_test /= n_splits

    print(f"Average CV : {np.mean(cv_list)}")
    print(f"RMSE for OOF: {sqrt(mean_squared_error(train_label, oof_train))}")
    
    outer_stractified_busroute_oof_train += oof_train
    outer_stractified_busroute_oof_test += final_test
    
outer_stractified_busroute_oof_train /=len_seeds
outer_stractified_busroute_oof_test /= len_seeds

print(f"Overall for OOF: {sqrt(mean_squared_error(train_label, outer_stractified_busroute_oof_train))}")

# PostProcessing
outer_stractified_busroute_oof_train = [x if x>0 else 0 for x in  outer_stractified_busroute_oof_train]
outer_stractified_busroute_oof_test = [x if x>0 else 0 for x in  outer_stractified_busroute_oof_test]


print(f"RMSE for OOF: {sqrt(mean_squared_error(train_label, outer_stractified_busroute_oof_train))}")

HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.87798	valid's rmse: 2.05096
[200]	train's rmse: 1.64976	valid's rmse: 2.0218
Early stopping, best iteration is:
[231]	train's rmse: 1.60414	valid's rmse: 2.01623
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.84434	valid's rmse: 2.31732
[200]	train's rmse: 1.62809	valid's rmse: 2.27343
[300]	train's rmse: 1.50146	valid's rmse: 2.26811
Early stopping, best iteration is:
[281]	train's rmse: 1.52221	valid's rmse: 2.26741
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.83417	valid's rmse: 2.38316
[200]	train's rmse: 1.61469	valid's rmse: 2.33944
[300]	train's rmse: 1.49065	valid's rmse: 2.33147
[400]	train's rmse: 1.40186	valid's rmse: 2.32681
[500]	train's rmse: 1.33241	valid's rmse: 2.31792
[600]	train's rmse: 1.27531	valid's rmse: 2.31544
Early stopping, best iteration is:
[604]	train's rmse: 1.27338	valid's rmse: 2.31529
Training unti

In [11]:
df_sub = pd.read_csv('../raw_dataset/submission_sample.csv')
df_sub['18~20_ride'] = outer_stractified_busroute_oof_test

df_sub.to_csv('../submission/after.csv',index=False)

In [12]:
# StractifiedKfold & bus_route_id
split_col = 'bus_route_id'
len_seeds = 40

outer_stractified_busroute_oof_train = np.zeros( train.shape[0] )
outer_stractified_busroute_oof_test = np.zeros( test.shape[0] )

for _ in tqdm_notebook(range(len_seeds)):
    
    seed = random.randint(1, 100000)
    
    cv_list = []

    oof_train = np.zeros( train.shape[0] )
    final_test = np.zeros( test.shape[0] )

    kfolds = StratifiedKFold(n_splits = n_splits, shuffle=True, random_state=seed )

    for ind, (trn_ind, val_ind) in tqdm_notebook( enumerate(kfolds.split(train_set, train_set[split_col])) ):

        X_train, y_train = train_set.iloc[trn_ind], train_label[trn_ind]
        X_valid, y_valid = train_set.iloc[val_ind], train_label[val_ind]
        
        dtrain = lgbm.Dataset( X_train, y_train )
        dvalid = lgbm.Dataset( X_valid, y_valid ,reference=dtrain)
        
        lgbm_param['seed'] = seed

        model = lgbm.train(lgbm_param , dtrain, NUM_BOOST_ROUND, valid_sets=(dtrain, dvalid), valid_names=('train','valid'), 
                            categorical_feature=['bus_route_id','station_code','weekday',\
                                                                'kmeans1','kmeans2',
                                                                ] ,
                           verbose_eval= 100)

        valid_pred = model.predict(X_valid)
        test_pred  = model.predict(test_set)

        oof_train[val_ind] += valid_pred
        final_test += test_pred

        cv_list.append( sqrt(mean_squared_error(y_valid, valid_pred)) )

        print('='*80)

    final_test /= n_splits

    print(f"Average CV : {np.mean(cv_list)}")
    print(f"RMSE for OOF: {sqrt(mean_squared_error(train_label, oof_train))}")
    
    outer_stractified_busroute_oof_train += oof_train
    outer_stractified_busroute_oof_test += final_test
    
outer_stractified_busroute_oof_train /=len_seeds
outer_stractified_busroute_oof_test /= len_seeds

print(f"Overall for OOF: {sqrt(mean_squared_error(train_label, outer_stractified_busroute_oof_train))}")

# PostProcessing
outer_stractified_busroute_oof_train = [x if x>0 else 0 for x in  outer_stractified_busroute_oof_train]
outer_stractified_busroute_oof_test = [x if x>0 else 0 for x in  outer_stractified_busroute_oof_test]


print(f"RMSE for OOF: {sqrt(mean_squared_error(train_label, outer_stractified_busroute_oof_train))}")

HBox(children=(IntProgress(value=0, max=40), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.83657	valid's rmse: 2.23956
[200]	train's rmse: 1.61828	valid's rmse: 2.22512
Early stopping, best iteration is:
[200]	train's rmse: 1.61828	valid's rmse: 2.22512
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.86582	valid's rmse: 2.14657
[200]	train's rmse: 1.64402	valid's rmse: 2.1241
[300]	train's rmse: 1.51256	valid's rmse: 2.11787
[400]	train's rmse: 1.4202	valid's rmse: 2.11268
[500]	train's rmse: 1.34762	valid's rmse: 2.111
Early stopping, best iteration is:
[503]	train's rmse: 1.34545	valid's rmse: 2.11054
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.86359	valid's rmse: 2.18658
[200]	train's rmse: 1.63914	valid's rmse: 2.14577
[300]	train's rmse: 1.50899	valid's rmse: 2.13736
[400]	train's rmse: 1.41446	valid's rmse: 2.12998
Early stopping, best iteration is:
[439]	train's rmse: 1.38426	valid's rmse: 2.12734
Training until v

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.87028	valid's rmse: 2.17168
[200]	train's rmse: 1.64828	valid's rmse: 2.14186
[300]	train's rmse: 1.51719	valid's rmse: 2.12708
[400]	train's rmse: 1.42267	valid's rmse: 2.12151
[500]	train's rmse: 1.34918	valid's rmse: 2.11837
Early stopping, best iteration is:
[516]	train's rmse: 1.33882	valid's rmse: 2.11689
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.84528	valid's rmse: 2.25942
[200]	train's rmse: 1.62479	valid's rmse: 2.21655
[300]	train's rmse: 1.49813	valid's rmse: 2.20859
[400]	train's rmse: 1.406	valid's rmse: 2.20361
[500]	train's rmse: 1.33524	valid's rmse: 2.20127
[600]	train's rmse: 1.27789	valid's rmse: 2.20055
[700]	train's rmse: 1.23081	valid's rmse: 2.19818
[800]	train's rmse: 1.19028	valid's rmse: 2.19753
Early stopping, best iteration is:
[754]	train's rmse: 1.20804	valid's rmse: 2.1972
Training until validation scores don't improve for 50 rounds


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.85864	valid's rmse: 2.19229
[200]	train's rmse: 1.63846	valid's rmse: 2.17174
Early stopping, best iteration is:
[192]	train's rmse: 1.65096	valid's rmse: 2.17131
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.86931	valid's rmse: 2.14892
[200]	train's rmse: 1.64349	valid's rmse: 2.118
[300]	train's rmse: 1.51295	valid's rmse: 2.11138
[400]	train's rmse: 1.41737	valid's rmse: 2.10704
Early stopping, best iteration is:
[362]	train's rmse: 1.45023	valid's rmse: 2.10545
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.87148	valid's rmse: 2.15865
[200]	train's rmse: 1.64609	valid's rmse: 2.113
[300]	train's rmse: 1.51514	valid's rmse: 2.0984
[400]	train's rmse: 1.42161	valid's rmse: 2.08962
[500]	train's rmse: 1.34848	valid's rmse: 2.08731
[600]	train's rmse: 1.29099	valid's rmse: 2.08443
Early stopping, best iteration is:
[576]	train's rms

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.86762	valid's rmse: 2.07533
[200]	train's rmse: 1.64115	valid's rmse: 2.04745
[300]	train's rmse: 1.50959	valid's rmse: 2.04036
[400]	train's rmse: 1.41717	valid's rmse: 2.035
Early stopping, best iteration is:
[355]	train's rmse: 1.45559	valid's rmse: 2.03468
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.86263	valid's rmse: 2.15979
[200]	train's rmse: 1.63679	valid's rmse: 2.12476
[300]	train's rmse: 1.50392	valid's rmse: 2.10617
[400]	train's rmse: 1.41123	valid's rmse: 2.10058
[500]	train's rmse: 1.33952	valid's rmse: 2.09524
[600]	train's rmse: 1.28077	valid's rmse: 2.09202
[700]	train's rmse: 1.23261	valid's rmse: 2.08856
Early stopping, best iteration is:
[744]	train's rmse: 1.21363	valid's rmse: 2.08818
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.86026	valid's rmse: 2.1877
[200]	train's rmse: 1.63486	valid's rmse: 2.156
[3

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.85429	valid's rmse: 2.23496
[200]	train's rmse: 1.63185	valid's rmse: 2.20196
[300]	train's rmse: 1.50203	valid's rmse: 2.18558
[400]	train's rmse: 1.41014	valid's rmse: 2.17728
[500]	train's rmse: 1.34014	valid's rmse: 2.1758
[600]	train's rmse: 1.28165	valid's rmse: 2.1744
[700]	train's rmse: 1.23347	valid's rmse: 2.17367
Early stopping, best iteration is:
[664]	train's rmse: 1.2503	valid's rmse: 2.17267
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.83485	valid's rmse: 2.3356
[200]	train's rmse: 1.61857	valid's rmse: 2.29931
[300]	train's rmse: 1.49012	valid's rmse: 2.28662
[400]	train's rmse: 1.39817	valid's rmse: 2.28014
[500]	train's rmse: 1.32868	valid's rmse: 2.276
Early stopping, best iteration is:
[506]	train's rmse: 1.32502	valid's rmse: 2.2757
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.87067	valid's rmse: 2.1596
[200]

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.88581	valid's rmse: 2.0237
[200]	train's rmse: 1.65034	valid's rmse: 2.00057
[300]	train's rmse: 1.51597	valid's rmse: 2.00067
Early stopping, best iteration is:
[267]	train's rmse: 1.55481	valid's rmse: 1.99608
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.8504	valid's rmse: 2.25917
[200]	train's rmse: 1.63082	valid's rmse: 2.22969
[300]	train's rmse: 1.50304	valid's rmse: 2.22126
[400]	train's rmse: 1.41056	valid's rmse: 2.21444
[500]	train's rmse: 1.33928	valid's rmse: 2.20813
[600]	train's rmse: 1.28221	valid's rmse: 2.20532
Early stopping, best iteration is:
[623]	train's rmse: 1.27061	valid's rmse: 2.20421
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.85141	valid's rmse: 2.33319
[200]	train's rmse: 1.62812	valid's rmse: 2.27363
[300]	train's rmse: 1.49949	valid's rmse: 2.26069
[400]	train's rmse: 1.40693	valid's rmse: 2.25439

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.8428	valid's rmse: 2.29473
[200]	train's rmse: 1.62405	valid's rmse: 2.25264
[300]	train's rmse: 1.49447	valid's rmse: 2.2427
[400]	train's rmse: 1.40174	valid's rmse: 2.23872
Early stopping, best iteration is:
[383]	train's rmse: 1.41549	valid's rmse: 2.23737
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.85917	valid's rmse: 2.23194
[200]	train's rmse: 1.63604	valid's rmse: 2.19543
[300]	train's rmse: 1.50631	valid's rmse: 2.18068
[400]	train's rmse: 1.41213	valid's rmse: 2.17211
[500]	train's rmse: 1.33964	valid's rmse: 2.16804
Early stopping, best iteration is:
[499]	train's rmse: 1.34028	valid's rmse: 2.16798
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.8486	valid's rmse: 2.19857
[200]	train's rmse: 1.62986	valid's rmse: 2.16324
[300]	train's rmse: 1.50389	valid's rmse: 2.15306
[400]	train's rmse: 1.41059	valid's rmse: 2.14954


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.85286	valid's rmse: 2.189
[200]	train's rmse: 1.63231	valid's rmse: 2.15664
[300]	train's rmse: 1.50357	valid's rmse: 2.14824
[400]	train's rmse: 1.41199	valid's rmse: 2.14322
[500]	train's rmse: 1.34042	valid's rmse: 2.14247
Early stopping, best iteration is:
[462]	train's rmse: 1.36535	valid's rmse: 2.1417
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.84659	valid's rmse: 2.35175
[200]	train's rmse: 1.6288	valid's rmse: 2.30149
[300]	train's rmse: 1.5003	valid's rmse: 2.29004
[400]	train's rmse: 1.40918	valid's rmse: 2.28081
[500]	train's rmse: 1.33861	valid's rmse: 2.27629
[600]	train's rmse: 1.28179	valid's rmse: 2.27043
[700]	train's rmse: 1.23431	valid's rmse: 2.26752
Early stopping, best iteration is:
[721]	train's rmse: 1.22545	valid's rmse: 2.2675
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.85417	valid's rmse: 2.16454
[20

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.85375	valid's rmse: 2.21462
[200]	train's rmse: 1.63444	valid's rmse: 2.17525
[300]	train's rmse: 1.50693	valid's rmse: 2.16413
[400]	train's rmse: 1.41427	valid's rmse: 2.15826
[500]	train's rmse: 1.34326	valid's rmse: 2.15411
Early stopping, best iteration is:
[458]	train's rmse: 1.37145	valid's rmse: 2.15332
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.84706	valid's rmse: 2.294
[200]	train's rmse: 1.62687	valid's rmse: 2.2334
[300]	train's rmse: 1.49682	valid's rmse: 2.21783
[400]	train's rmse: 1.40565	valid's rmse: 2.20589
[500]	train's rmse: 1.33334	valid's rmse: 2.20097
[600]	train's rmse: 1.27663	valid's rmse: 2.1972
Early stopping, best iteration is:
[619]	train's rmse: 1.26729	valid's rmse: 2.19538
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.84965	valid's rmse: 2.26363
[200]	train's rmse: 1.62837	valid's rmse: 2.22119
[

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.83796	valid's rmse: 2.24545
[200]	train's rmse: 1.62553	valid's rmse: 2.21822
[300]	train's rmse: 1.50129	valid's rmse: 2.21103
[400]	train's rmse: 1.4092	valid's rmse: 2.207
[500]	train's rmse: 1.33823	valid's rmse: 2.20375
[600]	train's rmse: 1.28155	valid's rmse: 2.20114
[700]	train's rmse: 1.23295	valid's rmse: 2.2004
Early stopping, best iteration is:
[696]	train's rmse: 1.23487	valid's rmse: 2.19986
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.85739	valid's rmse: 2.21108
[200]	train's rmse: 1.63724	valid's rmse: 2.18084
[300]	train's rmse: 1.50842	valid's rmse: 2.17083
[400]	train's rmse: 1.41497	valid's rmse: 2.16236
Early stopping, best iteration is:
[412]	train's rmse: 1.40547	valid's rmse: 2.16029
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.87589	valid's rmse: 2.09248
[200]	train's rmse: 1.64649	valid's rmse: 2.06199
[

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.84592	valid's rmse: 2.22867
[200]	train's rmse: 1.62747	valid's rmse: 2.20235
[300]	train's rmse: 1.49934	valid's rmse: 2.19507
Early stopping, best iteration is:
[315]	train's rmse: 1.48387	valid's rmse: 2.19388
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.86358	valid's rmse: 2.14603
[200]	train's rmse: 1.64076	valid's rmse: 2.10566
[300]	train's rmse: 1.51107	valid's rmse: 2.09707
[400]	train's rmse: 1.41862	valid's rmse: 2.09232
[500]	train's rmse: 1.34561	valid's rmse: 2.08964
Early stopping, best iteration is:
[491]	train's rmse: 1.35147	valid's rmse: 2.08825
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.82526	valid's rmse: 2.33182
[200]	train's rmse: 1.60974	valid's rmse: 2.30791
[300]	train's rmse: 1.48542	valid's rmse: 2.30456
Early stopping, best iteration is:
[302]	train's rmse: 1.4835	valid's rmse: 2.30367
Training unti

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.84969	valid's rmse: 2.20917
[200]	train's rmse: 1.62983	valid's rmse: 2.18145
[300]	train's rmse: 1.50115	valid's rmse: 2.17788
Early stopping, best iteration is:
[284]	train's rmse: 1.51899	valid's rmse: 2.17537
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.84834	valid's rmse: 2.24142
[200]	train's rmse: 1.62762	valid's rmse: 2.21996
[300]	train's rmse: 1.49856	valid's rmse: 2.21366
Early stopping, best iteration is:
[312]	train's rmse: 1.48586	valid's rmse: 2.21187
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.83546	valid's rmse: 2.38691
[200]	train's rmse: 1.61977	valid's rmse: 2.33633
[300]	train's rmse: 1.49394	valid's rmse: 2.32031
[400]	train's rmse: 1.40367	valid's rmse: 2.31771
Early stopping, best iteration is:
[392]	train's rmse: 1.40955	valid's rmse: 2.31658
Training until validation scores don't improve for 50 rounds
[

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.85644	valid's rmse: 2.17625
[200]	train's rmse: 1.63714	valid's rmse: 2.14234
[300]	train's rmse: 1.50644	valid's rmse: 2.13264
[400]	train's rmse: 1.41473	valid's rmse: 2.13057
Early stopping, best iteration is:
[376]	train's rmse: 1.43434	valid's rmse: 2.12995
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.85866	valid's rmse: 2.20293
[200]	train's rmse: 1.63604	valid's rmse: 2.15906
[300]	train's rmse: 1.50779	valid's rmse: 2.14347
[400]	train's rmse: 1.41363	valid's rmse: 2.1369
Early stopping, best iteration is:
[438]	train's rmse: 1.38365	valid's rmse: 2.13402
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.84201	valid's rmse: 2.30903
[200]	train's rmse: 1.62335	valid's rmse: 2.25744
[300]	train's rmse: 1.49633	valid's rmse: 2.24082
[400]	train's rmse: 1.40581	valid's rmse: 2.23304
[500]	train's rmse: 1.3352	valid's rmse: 2.22861

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.87186	valid's rmse: 2.17721
[200]	train's rmse: 1.64422	valid's rmse: 2.1572
[300]	train's rmse: 1.51072	valid's rmse: 2.15212
Early stopping, best iteration is:
[279]	train's rmse: 1.5343	valid's rmse: 2.15051
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.85494	valid's rmse: 2.19693
[200]	train's rmse: 1.62874	valid's rmse: 2.16602
[300]	train's rmse: 1.49852	valid's rmse: 2.16133
Early stopping, best iteration is:
[304]	train's rmse: 1.49435	valid's rmse: 2.16126
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.85716	valid's rmse: 2.20569
[200]	train's rmse: 1.63637	valid's rmse: 2.18185
[300]	train's rmse: 1.50765	valid's rmse: 2.16889
[400]	train's rmse: 1.41469	valid's rmse: 2.16589
[500]	train's rmse: 1.34285	valid's rmse: 2.16115
Early stopping, best iteration is:
[537]	train's rmse: 1.31969	valid's rmse: 2.1605
Training until 

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.87828	valid's rmse: 2.07447
[200]	train's rmse: 1.65418	valid's rmse: 2.05082
[300]	train's rmse: 1.52302	valid's rmse: 2.04398
[400]	train's rmse: 1.42913	valid's rmse: 2.04052
[500]	train's rmse: 1.35548	valid's rmse: 2.03372
[600]	train's rmse: 1.29626	valid's rmse: 2.03064
Early stopping, best iteration is:
[615]	train's rmse: 1.28844	valid's rmse: 2.03015
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.84491	valid's rmse: 2.25809
[200]	train's rmse: 1.62568	valid's rmse: 2.21189
[300]	train's rmse: 1.49902	valid's rmse: 2.19528
Early stopping, best iteration is:
[331]	train's rmse: 1.46789	valid's rmse: 2.19222
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.85928	valid's rmse: 2.17441
[200]	train's rmse: 1.63359	valid's rmse: 2.14758
[300]	train's rmse: 1.50519	valid's rmse: 2.14072
[400]	train's rmse: 1.41232	valid's rmse: 2.137

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.83549	valid's rmse: 2.3353
[200]	train's rmse: 1.62019	valid's rmse: 2.29352
[300]	train's rmse: 1.49324	valid's rmse: 2.2815
[400]	train's rmse: 1.40385	valid's rmse: 2.2786
[500]	train's rmse: 1.3351	valid's rmse: 2.27947
Early stopping, best iteration is:
[457]	train's rmse: 1.36289	valid's rmse: 2.27755
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.84734	valid's rmse: 2.28911
[200]	train's rmse: 1.63122	valid's rmse: 2.26205
[300]	train's rmse: 1.50284	valid's rmse: 2.25006
[400]	train's rmse: 1.40974	valid's rmse: 2.241
[500]	train's rmse: 1.33886	valid's rmse: 2.23787
[600]	train's rmse: 1.28119	valid's rmse: 2.23299
Early stopping, best iteration is:
[636]	train's rmse: 1.26197	valid's rmse: 2.23267
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.85893	valid's rmse: 2.20586
[200]	train's rmse: 1.63798	valid's rmse: 2.1704
[300

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.85828	valid's rmse: 2.18062
[200]	train's rmse: 1.62735	valid's rmse: 2.16633
[300]	train's rmse: 1.5004	valid's rmse: 2.16407
Early stopping, best iteration is:
[271]	train's rmse: 1.53222	valid's rmse: 2.16199
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.85558	valid's rmse: 2.24775
[200]	train's rmse: 1.63034	valid's rmse: 2.2038
[300]	train's rmse: 1.50274	valid's rmse: 2.19536
Early stopping, best iteration is:
[269]	train's rmse: 1.53684	valid's rmse: 2.1932
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.87696	valid's rmse: 2.18083
[200]	train's rmse: 1.6515	valid's rmse: 2.13348
[300]	train's rmse: 1.5181	valid's rmse: 2.1225
[400]	train's rmse: 1.42279	valid's rmse: 2.113
[500]	train's rmse: 1.34962	valid's rmse: 2.11111
Early stopping, best iteration is:
[510]	train's rmse: 1.34354	valid's rmse: 2.11057
Training until valid

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.817	valid's rmse: 2.39085
[200]	train's rmse: 1.60802	valid's rmse: 2.35584
[300]	train's rmse: 1.48367	valid's rmse: 2.34472
[400]	train's rmse: 1.39375	valid's rmse: 2.33553
[500]	train's rmse: 1.32379	valid's rmse: 2.33476
[600]	train's rmse: 1.26885	valid's rmse: 2.32892
[700]	train's rmse: 1.22259	valid's rmse: 2.32752
[800]	train's rmse: 1.18333	valid's rmse: 2.32485
Early stopping, best iteration is:
[836]	train's rmse: 1.17028	valid's rmse: 2.32337
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.87139	valid's rmse: 2.0972
[200]	train's rmse: 1.6485	valid's rmse: 2.07132
[300]	train's rmse: 1.51586	valid's rmse: 2.06856
[400]	train's rmse: 1.42091	valid's rmse: 2.06454
Early stopping, best iteration is:
[415]	train's rmse: 1.40871	valid's rmse: 2.06372
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.86332	valid's rmse: 2.28885
[

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.84032	valid's rmse: 2.24129
[200]	train's rmse: 1.61779	valid's rmse: 2.22086
[300]	train's rmse: 1.48917	valid's rmse: 2.21335
Early stopping, best iteration is:
[285]	train's rmse: 1.50511	valid's rmse: 2.21233
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.86296	valid's rmse: 2.18052
[200]	train's rmse: 1.63976	valid's rmse: 2.14904
[300]	train's rmse: 1.50945	valid's rmse: 2.14047
Early stopping, best iteration is:
[270]	train's rmse: 1.54377	valid's rmse: 2.14023
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.85327	valid's rmse: 2.18482
[200]	train's rmse: 1.6324	valid's rmse: 2.14711
[300]	train's rmse: 1.50274	valid's rmse: 2.13514
[400]	train's rmse: 1.40799	valid's rmse: 2.12982
[500]	train's rmse: 1.33699	valid's rmse: 2.12328
[600]	train's rmse: 1.27931	valid's rmse: 2.1227
Early stopping, best iteration is:
[555]	train's 

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.86113	valid's rmse: 2.12171
[200]	train's rmse: 1.63209	valid's rmse: 2.09791
[300]	train's rmse: 1.50088	valid's rmse: 2.09022
Early stopping, best iteration is:
[299]	train's rmse: 1.50215	valid's rmse: 2.0901
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.85486	valid's rmse: 2.27297
[200]	train's rmse: 1.63449	valid's rmse: 2.22534
[300]	train's rmse: 1.50508	valid's rmse: 2.21089
[400]	train's rmse: 1.41314	valid's rmse: 2.20394
[500]	train's rmse: 1.3418	valid's rmse: 2.2018
[600]	train's rmse: 1.28417	valid's rmse: 2.19893
[700]	train's rmse: 1.23615	valid's rmse: 2.19648
[800]	train's rmse: 1.19568	valid's rmse: 2.19457
[900]	train's rmse: 1.15987	valid's rmse: 2.19408
Early stopping, best iteration is:
[917]	train's rmse: 1.15426	valid's rmse: 2.19323
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.8588	valid's rmse: 2.24171
[

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.86203	valid's rmse: 2.15706
[200]	train's rmse: 1.64027	valid's rmse: 2.1278
[300]	train's rmse: 1.50989	valid's rmse: 2.12578
[400]	train's rmse: 1.41631	valid's rmse: 2.12407
Early stopping, best iteration is:
[371]	train's rmse: 1.44105	valid's rmse: 2.12301
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.84602	valid's rmse: 2.28706
[200]	train's rmse: 1.62935	valid's rmse: 2.24802
Early stopping, best iteration is:
[240]	train's rmse: 1.57362	valid's rmse: 2.24065
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.8485	valid's rmse: 2.28043
[200]	train's rmse: 1.63067	valid's rmse: 2.23544
[300]	train's rmse: 1.50245	valid's rmse: 2.22083
[400]	train's rmse: 1.41048	valid's rmse: 2.20861
[500]	train's rmse: 1.33973	valid's rmse: 2.20373
Early stopping, best iteration is:
[484]	train's rmse: 1.35018	valid's rmse: 2.20324
Training until

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.8476	valid's rmse: 2.25589
[200]	train's rmse: 1.6247	valid's rmse: 2.23514
[300]	train's rmse: 1.49672	valid's rmse: 2.22452
[400]	train's rmse: 1.40555	valid's rmse: 2.21751
[500]	train's rmse: 1.3357	valid's rmse: 2.2146
[600]	train's rmse: 1.27901	valid's rmse: 2.21383
Early stopping, best iteration is:
[565]	train's rmse: 1.29778	valid's rmse: 2.21302
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.84779	valid's rmse: 2.23387
[200]	train's rmse: 1.62918	valid's rmse: 2.19386
[300]	train's rmse: 1.49969	valid's rmse: 2.18203
[400]	train's rmse: 1.4084	valid's rmse: 2.17468
[500]	train's rmse: 1.33804	valid's rmse: 2.17157
[600]	train's rmse: 1.28104	valid's rmse: 2.16766
[700]	train's rmse: 1.23338	valid's rmse: 2.16556
[800]	train's rmse: 1.19288	valid's rmse: 2.16497
Early stopping, best iteration is:
[817]	train's rmse: 1.18664	valid's rmse: 2.16427
Training unti

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.86246	valid's rmse: 2.16644
[200]	train's rmse: 1.63902	valid's rmse: 2.12692
[300]	train's rmse: 1.51064	valid's rmse: 2.12422
[400]	train's rmse: 1.41757	valid's rmse: 2.12233
Early stopping, best iteration is:
[418]	train's rmse: 1.4026	valid's rmse: 2.12102
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.84003	valid's rmse: 2.31983
[200]	train's rmse: 1.62132	valid's rmse: 2.28931
[300]	train's rmse: 1.49338	valid's rmse: 2.27557
[400]	train's rmse: 1.40388	valid's rmse: 2.26799
[500]	train's rmse: 1.33368	valid's rmse: 2.26591
Early stopping, best iteration is:
[529]	train's rmse: 1.31608	valid's rmse: 2.26417
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.85159	valid's rmse: 2.20642
[200]	train's rmse: 1.63193	valid's rmse: 2.17002
[300]	train's rmse: 1.50352	valid's rmse: 2.16342
[400]	train's rmse: 1.41079	valid's rmse: 2.1583

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.83837	valid's rmse: 2.24572
[200]	train's rmse: 1.61888	valid's rmse: 2.21982
[300]	train's rmse: 1.49391	valid's rmse: 2.20824
[400]	train's rmse: 1.40516	valid's rmse: 2.20598
[500]	train's rmse: 1.33571	valid's rmse: 2.20303
[600]	train's rmse: 1.27956	valid's rmse: 2.20078
[700]	train's rmse: 1.23219	valid's rmse: 2.19823
[800]	train's rmse: 1.1923	valid's rmse: 2.19766
[900]	train's rmse: 1.15652	valid's rmse: 2.197
Early stopping, best iteration is:
[903]	train's rmse: 1.15562	valid's rmse: 2.19678
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.87107	valid's rmse: 2.1369
[200]	train's rmse: 1.65052	valid's rmse: 2.08732
[300]	train's rmse: 1.5193	valid's rmse: 2.07554
[400]	train's rmse: 1.42439	valid's rmse: 2.06363
[500]	train's rmse: 1.35189	valid's rmse: 2.06064
Early stopping, best iteration is:
[534]	train's rmse: 1.33056	valid's rmse: 2.05894
Training unti

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.84781	valid's rmse: 2.2386
[200]	train's rmse: 1.6326	valid's rmse: 2.20651
Early stopping, best iteration is:
[247]	train's rmse: 1.56386	valid's rmse: 2.19819
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.83893	valid's rmse: 2.27981
[200]	train's rmse: 1.61647	valid's rmse: 2.23478
[300]	train's rmse: 1.48937	valid's rmse: 2.21946
[400]	train's rmse: 1.39971	valid's rmse: 2.21325
[500]	train's rmse: 1.33093	valid's rmse: 2.20776
[600]	train's rmse: 1.27534	valid's rmse: 2.20469
Early stopping, best iteration is:
[591]	train's rmse: 1.27995	valid's rmse: 2.20447
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.85516	valid's rmse: 2.20981
[200]	train's rmse: 1.63474	valid's rmse: 2.17317
[300]	train's rmse: 1.50727	valid's rmse: 2.16837
[400]	train's rmse: 1.41413	valid's rmse: 2.16227
[500]	train's rmse: 1.34178	valid's rmse: 2.15973

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.83303	valid's rmse: 2.35725
[200]	train's rmse: 1.61734	valid's rmse: 2.2979
[300]	train's rmse: 1.49254	valid's rmse: 2.27443
[400]	train's rmse: 1.4012	valid's rmse: 2.26302
[500]	train's rmse: 1.33363	valid's rmse: 2.25776
[600]	train's rmse: 1.27888	valid's rmse: 2.25496
[700]	train's rmse: 1.23175	valid's rmse: 2.25107
[800]	train's rmse: 1.19118	valid's rmse: 2.24822
Early stopping, best iteration is:
[812]	train's rmse: 1.18656	valid's rmse: 2.24786
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.86281	valid's rmse: 2.1602
[200]	train's rmse: 1.63776	valid's rmse: 2.1275
[300]	train's rmse: 1.5076	valid's rmse: 2.11988
Early stopping, best iteration is:
[260]	train's rmse: 1.55347	valid's rmse: 2.11817
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.84962	valid's rmse: 2.203
[200]	train's rmse: 1.62668	valid's rmse: 2.1728
[300]

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.86485	valid's rmse: 2.21065
[200]	train's rmse: 1.63953	valid's rmse: 2.17143
[300]	train's rmse: 1.50705	valid's rmse: 2.15583
[400]	train's rmse: 1.41518	valid's rmse: 2.1489
[500]	train's rmse: 1.3433	valid's rmse: 2.14613
[600]	train's rmse: 1.28515	valid's rmse: 2.14432
Early stopping, best iteration is:
[644]	train's rmse: 1.26248	valid's rmse: 2.14285
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.86246	valid's rmse: 2.21117
[200]	train's rmse: 1.6378	valid's rmse: 2.16932
[300]	train's rmse: 1.50561	valid's rmse: 2.15721
[400]	train's rmse: 1.41384	valid's rmse: 2.1509
[500]	train's rmse: 1.34211	valid's rmse: 2.14545
[600]	train's rmse: 1.28397	valid's rmse: 2.1426
Early stopping, best iteration is:
[580]	train's rmse: 1.29486	valid's rmse: 2.14215
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.86305	valid's rmse: 2.23821
[2

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.87362	valid's rmse: 2.19284
[200]	train's rmse: 1.6443	valid's rmse: 2.15728
[300]	train's rmse: 1.51251	valid's rmse: 2.14285
Early stopping, best iteration is:
[289]	train's rmse: 1.52528	valid's rmse: 2.14147
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.85343	valid's rmse: 2.31367
[200]	train's rmse: 1.63415	valid's rmse: 2.25708
[300]	train's rmse: 1.50579	valid's rmse: 2.23445
[400]	train's rmse: 1.41475	valid's rmse: 2.22635
[500]	train's rmse: 1.34488	valid's rmse: 2.22233
[600]	train's rmse: 1.28674	valid's rmse: 2.21529
[700]	train's rmse: 1.23824	valid's rmse: 2.21515
Early stopping, best iteration is:
[662]	train's rmse: 1.25572	valid's rmse: 2.21369
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.85299	valid's rmse: 2.19174
[200]	train's rmse: 1.62901	valid's rmse: 2.14583
[300]	train's rmse: 1.49832	valid's rmse: 2.1358

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.85967	valid's rmse: 2.23308
[200]	train's rmse: 1.63413	valid's rmse: 2.19533
[300]	train's rmse: 1.50259	valid's rmse: 2.18683
[400]	train's rmse: 1.4108	valid's rmse: 2.18229
[500]	train's rmse: 1.33927	valid's rmse: 2.17744
[600]	train's rmse: 1.28204	valid's rmse: 2.175
[700]	train's rmse: 1.2338	valid's rmse: 2.17235
[800]	train's rmse: 1.19271	valid's rmse: 2.17205
Early stopping, best iteration is:
[777]	train's rmse: 1.20162	valid's rmse: 2.17138
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.83968	valid's rmse: 2.2821
[200]	train's rmse: 1.62002	valid's rmse: 2.24974
[300]	train's rmse: 1.49365	valid's rmse: 2.24516
Early stopping, best iteration is:
[273]	train's rmse: 1.5231	valid's rmse: 2.24446
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.87843	valid's rmse: 2.16186
[200]	train's rmse: 1.65002	valid's rmse: 2.12382
[30

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.85941	valid's rmse: 2.22583
[200]	train's rmse: 1.63908	valid's rmse: 2.19771
[300]	train's rmse: 1.50912	valid's rmse: 2.18991
[400]	train's rmse: 1.41607	valid's rmse: 2.1868
[500]	train's rmse: 1.3438	valid's rmse: 2.18311
Early stopping, best iteration is:
[540]	train's rmse: 1.31904	valid's rmse: 2.18043
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.87368	valid's rmse: 2.16864
[200]	train's rmse: 1.64576	valid's rmse: 2.13801
[300]	train's rmse: 1.5147	valid's rmse: 2.129
[400]	train's rmse: 1.41935	valid's rmse: 2.12553
Early stopping, best iteration is:
[397]	train's rmse: 1.42189	valid's rmse: 2.12515
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.85253	valid's rmse: 2.25801
[200]	train's rmse: 1.63041	valid's rmse: 2.22543
[300]	train's rmse: 1.50252	valid's rmse: 2.21487
Early stopping, best iteration is:
[323]	train's rms

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.85366	valid's rmse: 2.2198
[200]	train's rmse: 1.6331	valid's rmse: 2.19169
[300]	train's rmse: 1.50333	valid's rmse: 2.18294
Early stopping, best iteration is:
[335]	train's rmse: 1.46838	valid's rmse: 2.1812
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.84669	valid's rmse: 2.27548
[200]	train's rmse: 1.62995	valid's rmse: 2.22426
[300]	train's rmse: 1.50436	valid's rmse: 2.21716
[400]	train's rmse: 1.41392	valid's rmse: 2.2091
Early stopping, best iteration is:
[445]	train's rmse: 1.38001	valid's rmse: 2.20509
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.83713	valid's rmse: 2.3117
[200]	train's rmse: 1.61801	valid's rmse: 2.27886
[300]	train's rmse: 1.4925	valid's rmse: 2.26753
Early stopping, best iteration is:
[313]	train's rmse: 1.47897	valid's rmse: 2.26454
Training until validation scores don't improve for 50 rounds
[100]	t

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.87313	valid's rmse: 2.17946
[200]	train's rmse: 1.64732	valid's rmse: 2.14377
[300]	train's rmse: 1.51135	valid's rmse: 2.13715
Early stopping, best iteration is:
[289]	train's rmse: 1.52371	valid's rmse: 2.13477
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.84242	valid's rmse: 2.2916
[200]	train's rmse: 1.62239	valid's rmse: 2.26347
[300]	train's rmse: 1.49579	valid's rmse: 2.25004
[400]	train's rmse: 1.40472	valid's rmse: 2.24587
[500]	train's rmse: 1.33422	valid's rmse: 2.24134
[600]	train's rmse: 1.27663	valid's rmse: 2.23754
[700]	train's rmse: 1.22938	valid's rmse: 2.23664
Early stopping, best iteration is:
[725]	train's rmse: 1.21839	valid's rmse: 2.23531
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.85934	valid's rmse: 2.19394
[200]	train's rmse: 1.63699	valid's rmse: 2.18052
Early stopping, best iteration is:
[208]	train's

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.8479	valid's rmse: 2.28968
[200]	train's rmse: 1.63094	valid's rmse: 2.26091
[300]	train's rmse: 1.50242	valid's rmse: 2.24738
[400]	train's rmse: 1.41157	valid's rmse: 2.23648
[500]	train's rmse: 1.34	valid's rmse: 2.22665
[600]	train's rmse: 1.28232	valid's rmse: 2.22176
[700]	train's rmse: 1.23437	valid's rmse: 2.21936
[800]	train's rmse: 1.19339	valid's rmse: 2.21899
[900]	train's rmse: 1.15758	valid's rmse: 2.2174
Early stopping, best iteration is:
[912]	train's rmse: 1.15373	valid's rmse: 2.21652
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.857	valid's rmse: 2.21931
[200]	train's rmse: 1.63021	valid's rmse: 2.16277
[300]	train's rmse: 1.50161	valid's rmse: 2.15311
[400]	train's rmse: 1.40916	valid's rmse: 2.14142
[500]	train's rmse: 1.33766	valid's rmse: 2.13695
[600]	train's rmse: 1.27968	valid's rmse: 2.13406
[700]	train's rmse: 1.23192	valid's rmse: 2.13359


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.84182	valid's rmse: 2.23627
[200]	train's rmse: 1.62341	valid's rmse: 2.22658
Early stopping, best iteration is:
[229]	train's rmse: 1.57971	valid's rmse: 2.22217
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.83915	valid's rmse: 2.28382
[200]	train's rmse: 1.61788	valid's rmse: 2.25355
[300]	train's rmse: 1.49136	valid's rmse: 2.24706
[400]	train's rmse: 1.4024	valid's rmse: 2.24243
Early stopping, best iteration is:
[359]	train's rmse: 1.43604	valid's rmse: 2.24135
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.83706	valid's rmse: 2.34491
[200]	train's rmse: 1.62626	valid's rmse: 2.28214
[300]	train's rmse: 1.49939	valid's rmse: 2.26329
[400]	train's rmse: 1.41	valid's rmse: 2.25309
[500]	train's rmse: 1.34026	valid's rmse: 2.24486
[600]	train's rmse: 1.28365	valid's rmse: 2.24175
Early stopping, best iteration is:
[637]	train's rm

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.86849	valid's rmse: 2.17314
[200]	train's rmse: 1.64469	valid's rmse: 2.13654
[300]	train's rmse: 1.51365	valid's rmse: 2.11674
[400]	train's rmse: 1.41822	valid's rmse: 2.10692
[500]	train's rmse: 1.34466	valid's rmse: 2.10165
Early stopping, best iteration is:
[493]	train's rmse: 1.34958	valid's rmse: 2.10071
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.85473	valid's rmse: 2.18381
[200]	train's rmse: 1.63498	valid's rmse: 2.15021
Early stopping, best iteration is:
[229]	train's rmse: 1.59206	valid's rmse: 2.14388
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.83512	valid's rmse: 2.34084
[200]	train's rmse: 1.61634	valid's rmse: 2.30742
[300]	train's rmse: 1.49233	valid's rmse: 2.30227
[400]	train's rmse: 1.40378	valid's rmse: 2.29548
[500]	train's rmse: 1.3354	valid's rmse: 2.29285
[600]	train's rmse: 1.27871	valid's rmse: 2.2874

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.8523	valid's rmse: 2.29219
[200]	train's rmse: 1.63136	valid's rmse: 2.23633
[300]	train's rmse: 1.50338	valid's rmse: 2.22998
Early stopping, best iteration is:
[324]	train's rmse: 1.47813	valid's rmse: 2.22871
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.86744	valid's rmse: 2.09595
[200]	train's rmse: 1.6467	valid's rmse: 2.07708
[300]	train's rmse: 1.51388	valid's rmse: 2.06675
Early stopping, best iteration is:
[331]	train's rmse: 1.48131	valid's rmse: 2.0647
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.87129	valid's rmse: 2.06437
[200]	train's rmse: 1.64041	valid's rmse: 2.05229
Early stopping, best iteration is:
[154]	train's rmse: 1.72748	valid's rmse: 2.04874
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.84372	valid's rmse: 2.28655
[200]	train's rmse: 1.62992	valid's rmse: 2.25071
[300

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.8496	valid's rmse: 2.20367
[200]	train's rmse: 1.62568	valid's rmse: 2.18581
[300]	train's rmse: 1.49694	valid's rmse: 2.18512
[400]	train's rmse: 1.40522	valid's rmse: 2.18352
Early stopping, best iteration is:
[368]	train's rmse: 1.43211	valid's rmse: 2.18204
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.84983	valid's rmse: 2.28905
[200]	train's rmse: 1.63124	valid's rmse: 2.23679
[300]	train's rmse: 1.50154	valid's rmse: 2.22908
[400]	train's rmse: 1.41106	valid's rmse: 2.21983
[500]	train's rmse: 1.33897	valid's rmse: 2.21199
[600]	train's rmse: 1.28202	valid's rmse: 2.20827
Early stopping, best iteration is:
[589]	train's rmse: 1.28778	valid's rmse: 2.20724
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.84485	valid's rmse: 2.32901
[200]	train's rmse: 1.62661	valid's rmse: 2.28533
[300]	train's rmse: 1.49754	valid's rmse: 2.2811

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.83249	valid's rmse: 2.37617
[200]	train's rmse: 1.619	valid's rmse: 2.32409
[300]	train's rmse: 1.49265	valid's rmse: 2.30663
Early stopping, best iteration is:
[312]	train's rmse: 1.4807	valid's rmse: 2.30535
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.85403	valid's rmse: 2.17209
[200]	train's rmse: 1.63081	valid's rmse: 2.16428
Early stopping, best iteration is:
[210]	train's rmse: 1.61521	valid's rmse: 2.16263
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.88532	valid's rmse: 2.15956
[200]	train's rmse: 1.65957	valid's rmse: 2.09603
[300]	train's rmse: 1.52645	valid's rmse: 2.07465
[400]	train's rmse: 1.43027	valid's rmse: 2.06601
[500]	train's rmse: 1.3567	valid's rmse: 2.06195
[600]	train's rmse: 1.29687	valid's rmse: 2.05503
Early stopping, best iteration is:
[603]	train's rmse: 1.29522	valid's rmse: 2.05451
Training until v

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.86615	valid's rmse: 2.12054
[200]	train's rmse: 1.64436	valid's rmse: 2.07878
[300]	train's rmse: 1.51254	valid's rmse: 2.07254
[400]	train's rmse: 1.42034	valid's rmse: 2.06968
Early stopping, best iteration is:
[378]	train's rmse: 1.43878	valid's rmse: 2.0687
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.8602	valid's rmse: 2.28038
[200]	train's rmse: 1.6373	valid's rmse: 2.23244
[300]	train's rmse: 1.50652	valid's rmse: 2.22393
[400]	train's rmse: 1.41317	valid's rmse: 2.21857
[500]	train's rmse: 1.34087	valid's rmse: 2.21145
[600]	train's rmse: 1.28219	valid's rmse: 2.20911
Early stopping, best iteration is:
[559]	train's rmse: 1.30515	valid's rmse: 2.20869
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.84711	valid's rmse: 2.35927
[200]	train's rmse: 1.6254	valid's rmse: 2.31839
[300]	train's rmse: 1.49526	valid's rmse: 2.30935
[

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.85686	valid's rmse: 2.17175
[200]	train's rmse: 1.63613	valid's rmse: 2.15395
[300]	train's rmse: 1.5062	valid's rmse: 2.14474
[400]	train's rmse: 1.41473	valid's rmse: 2.1387
[500]	train's rmse: 1.3428	valid's rmse: 2.13622
[600]	train's rmse: 1.28536	valid's rmse: 2.13346
[700]	train's rmse: 1.23586	valid's rmse: 2.13278
Early stopping, best iteration is:
[654]	train's rmse: 1.25823	valid's rmse: 2.13087
Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 1.85554	valid's rmse: 2.22456
[200]	train's rmse: 1.63956	valid's rmse: 2.19153
[300]	train's rmse: 1.50801	valid's rmse: 2.18189
[400]	train's rmse: 1.41601	valid's rmse: 2.17629
[500]	train's rmse: 1.3449	valid's rmse: 2.1748
[600]	train's rmse: 1.28686	valid's rmse: 2.17363
Early stopping, best iteration is:
[587]	train's rmse: 1.29411	valid's rmse: 2.17291
Training until validation scores don't improve for 50 rounds
[1

In [14]:
df_oof = pd.read_csv('../raw_dataset/train.csv', usecols = ['id','18~20_ride'])
df_oof['18~20_ride'] = outer_stractified_busroute_oof_train

# df_oof.to_csv('../oof/lgbm_40_seeds_stractified5k_bus_route_id.csv',index=False)
df_oof.to_csv('../lgbm_40_seeds_stractified5k_bus_route_id.csv',index=False)

df_sub = pd.read_csv('../raw_dataset/submission_sample.csv')
df_sub['18~20_ride'] = outer_stractified_busroute_oof_test

df_sub.to_csv('../submission/lgbm_40_seeds_stractified5k_bus_route_id.csv',index=False)