In [1]:
%matplotlib inline
import matplotlib.pyplot as plt

import gc
import time
import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix, hstack
from time import gmtime, strftime

from sklearn.linear_model import Ridge, LogisticRegression
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split, cross_val_score,KFold
from sklearn.metrics import make_scorer, r2_score, mean_squared_error
import lightgbm as lgb
from sklearn.metrics import mean_squared_log_error
from nltk.stem.porter import PorterStemmer
from sklearn.decomposition import TruncatedSVD

import scipy
import gc
import sys

#Add https://www.kaggle.com/anttip/wordbatch to your kernel Data Sources, 
#until Kaggle admins fix the wordbatch pip package installation
# sys.path.insert(0, '../input/wordbatch/wordbatch/')
##import wordbatch
##from wordbatch.extractors import WordBag, WordHash
##from wordbatch.models import FTRL, FM_FTRL

from nltk.corpus import stopwords
import re

def rmsle(y, y_pred):
     assert len(y) == len(y_pred)
     y = np.array(y)
     y_pred = np.array(y_pred)
     y_pred[y_pred<0] = 0
     y[y<0] = 0   
     
     return np.sqrt(np.mean(np.power(np.log1p(y)-np.log1p(y_pred), 2)))
    

In [3]:
NUM_BRANDS = 4500
NUM_CATEGORIES = 1250

develop = False
# develop= True

start_time = time.time()
print(strftime("%Y-%m-%d %H:%M:%S", gmtime()))

2018-02-06 20:04:59


In [36]:
LOAD_TEST = True
MAKE_SAMPLE = False

start = time.clock() 
 
train = pd.read_table('input/train.tsv', engine='c')
if MAKE_SAMPLE:
    train = train.sample(n=100000).reset_index().drop('index',axis=1).copy(deep=True)
 

print('Train shape: ', train.shape)
merged = pd.concat([train])
nrow_train = train.shape[0]
del train


if LOAD_TEST:
    test = pd.read_table('input/test.tsv', engine='c')
    if MAKE_SAMPLE:
        test = test.sample(n=100000).reset_index().drop('index',axis=1).copy(deep=True)
    
    print('Test shape: ', test.shape)
    merged = pd.concat([merged, test])
    nrow_test = test.shape[0]
    del test
    
print('Merged shape: ', merged.shape)
print ("Time passed min:",(time.clock()-start)/60)

merged.head()

Train shape:  (1482535, 8)
Test shape:  (693359, 7)
Merged shape:  (2175894, 9)
Time passed min: 0.13181709575634007


Unnamed: 0,brand_name,category_name,item_condition_id,item_description,name,price,shipping,test_id,train_id
0,,Men/Tops/T-shirts,3,No description yet,MLB Cincinnati Reds T Shirt Size XL,10.0,1,,0.0
1,Razer,Electronics/Computers & Tablets/Components & P...,3,This keyboard is in great condition and works ...,Razer BlackWidow Chroma Keyboard,52.0,0,,1.0
2,Target,Women/Tops & Blouses/Blouse,1,Adorable top with a hint of lace and a key hol...,AVA-VIV Blouse,10.0,1,,2.0
3,,Home/Home Décor/Home Décor Accents,1,New with tags. Leather horses. Retail for [rm]...,Leather Horse Statues,35.0,1,,3.0
4,,Women/Jewelry/Necklaces,1,Complete with certificate of authenticity,24K GOLD plated rose,44.0,0,,4.0


In [37]:
## preprocess - splitting category names
##


start = time.clock()
tt = pd.DataFrame([ str(x).split("/") for x in merged['category_name']])
cat_col_names = ['general_cat','subcat_1','subcat_2','subcat_3','subcat_4']
tt.columns = ['general_cat','subcat_1','subcat_2','subcat_3','subcat_4']

merged[cat_col_names] = tt
merged["category_name"] = merged["category_name"].fillna("Other").astype("category")

## merged.drop('category_name', axis=1, inplace=True)
del tt
print ("Time passed min:",(time.clock()-start)/60)

Time passed min: 0.18494980450585102


In [38]:
## replace missing values
start = time.clock()
for col_name in  cat_col_names+['brand_name']+['item_description']:
    merged['mis_'+col_name] = 0
    merged[col_name].fillna(value='missing', inplace=True)
    merged.loc[merged[col_name]=='missing','mis_'+col_name] = 1

merged.head()
print ("Time passed min:",(time.clock()-start)/60)

Time passed min: 0.05808154993574798


In [39]:
VAL_MISS_MARGIN = 10

### delete rare brands
##
start = time.clock()
pop_brand = merged['brand_name'].value_counts().loc[lambda x: x<=VAL_MISS_MARGIN].index.values
merged.loc[merged['brand_name'].isin(pop_brand), 'mis_brand_name'] = 1
merged.loc[merged['brand_name'].isin(pop_brand), 'brand_name'] = 'missing'

## delete rare categories
##

pop_category1 = merged['general_cat'].value_counts().loc[lambda x: x<=VAL_MISS_MARGIN].index.values
pop_category2 = merged['subcat_1'].value_counts().loc[lambda x: x<=VAL_MISS_MARGIN].index.values
pop_category3 = merged['subcat_2'].value_counts().loc[lambda x: x<=VAL_MISS_MARGIN].index.values
# merged.loc[merged['general_cat'].isin(pop_brand), 'mis_general_cat'] = 1
# merged.loc[merged['general_cat'].isin(pop_category1), 'general_cat'] = 'missing'
# merged.loc[merged['subcat_1'].isin(pop_brand), 'mis_subcat_1'] = 1
# merged.loc[merged['subcat_1'].isin(pop_category2), 'subcat_1'] = 'missing'
# merged.loc[merged['subcat_2'].isin(pop_brand), 'mis_subcat_2'] = 1
# merged.loc[merged['subcat_2'].isin(pop_category3), 'subcat_2'] = 'missing'
    
print (pop_brand)
print (len(pop_category1),pop_category1[1:5])
print (len(pop_category2),pop_category2[1:5])
print (len(pop_category3),pop_category3[1:5])
print ("Time passed min:",(time.clock()-start)/60)

['Doe' 'Travis Mathew' 'ExerSaucer' ..., 'First Alert' 'Archie Comics'
 'David Meister']
0 []
0 []
154 ['Diaper Stackers & Caddies' 'Calendar' 'Kitchen Safety' 'Presentation']
Time passed min: 0.09121411741102557


In [40]:
## count non-utf characters
s = "☆Please read bio☆ Women's sandal bundle"
def count_nonutf(s):
    s_clean = re.sub(r'[^\x00-\x7F]+','', s)
    return len(s)-len(s_clean)

start = time.clock()
tt = pd.DataFrame([ count_nonutf(x) for x in merged['item_description']])
merged['non_utf_descr'] = tt

tt = pd.DataFrame([ count_nonutf(x) for x in merged['name']])
merged['non_utf_name'] = tt

print ("Time passed min:",(time.clock()-start)/60)

Time passed min: 0.19306816159459003


In [11]:
## Building features with counts of all Non-utf symbols
##
s = "☆Please read bio☆ Women's sandal bundle"

def all_non_utf(s):
    s_clean = re.sub(r'[\x00-\x7F]','', s)
    char_dict = dict()
    for k in s_clean:
        if k not in char_dict.keys():
            char_dict[k]=s_clean.count(k)
    return char_dict

start = time.clock()
print ("Counting non utf symbols")
tt = pd.DataFrame([ all_non_utf(x) for x in merged['item_description']])
tt.fillna(0,inplace=True)

print ("Converting to sparse")
X_non_utf_uniq = scipy.sparse.csr_matrix(X_uniq)

print ("Doing svd")
svd = TruncatedSVD(n_components = 10)
X_non_utf_uniq = svd.fit_transform(X_non_utf_uniq)
X_non_utf_uniq = pd.DataFrame(X_non_utf_uniq)
print ("Time passed min:",(time.clock()-start)/60)

Counting non utf symbols


KeyboardInterrupt: 

In [342]:
ss = "!!sdfdsf"
s_clean = re.sub('[^! ]+','', ss)
s_clean

'!!'

In [41]:
## Count number of letters in upper case
##
def perc_symbols(s,pattern):
    try:
        s_clean = re.sub(pattern,'', s)
        return len(s_clean)/len(s)
    except:
        return 0

def count_symbols(s, pattern):
    try:
        s_clean = re.sub(pattern,'', s)
        return len(s_clean)
    except:
        return 0
    
## upper case
##
start = time.clock()
tt = pd.DataFrame([perc_symbols(x,r'[^A-Z ]+') for x in merged['item_description']])
merged['upper_perc_descr'] = tt

tt = pd.DataFrame([perc_symbols(x,r'[^A-Z ]+') for x in merged['name']])
merged['upper_perc_name'] = tt

tt = pd.DataFrame([count_symbols(x,r'[^A-Z ]+') for x in merged['item_description']])
merged['count_upper_descr'] = tt

tt = pd.DataFrame([count_symbols(x,r'[^A-Z ]+') for x in merged['name']])
merged['count_upper_name'] = tt

## exclamation points
tt = pd.DataFrame([perc_symbols(x,r'[^!]+') for x in merged['item_description']])
merged['upper_perc_excl'] = tt

tt = pd.DataFrame([count_symbols(x,r'[^!]+') for x in merged['item_description']])
merged['count_upper_excl'] = tt

print ("Time passed min:",(time.clock()-start)/60)

Time passed min: 0.7774343186215655


In [42]:
## length of description and brand name
start = time.clock()
tt = pd.DataFrame([len(x) for x in merged['item_description']])
merged['len_descr'] = tt

tt = pd.DataFrame([len(x) for x in merged['name']])
merged['len_name'] = tt
print ("Time passed min:",(time.clock()-start)/60)


Time passed min: 0.026048465785513977


In [43]:
merged.loc[1,'item_description']

1    This keyboard is in great condition and works ...
1    25 pcs NEW 7.5"x12" Kraft Bubble Mailers Lined...
Name: item_description, dtype: object

In [44]:
def search_in_str(s,string_val):
    instr = re.findall(string_val,s,flags=re.IGNORECASE)
    if len(instr)>0:
        return 1
    else:
        return 0

def search_in_merged(string_val):
    tt1 = ([search_in_str(x,string_val) for x in merged['item_description']])
    tt2 = ([search_in_str(x,string_val) for x in merged['name']])
    tt =  pd.DataFrame({'tt1':tt1,'tt2':tt2}).max(axis=1)
    return tt

start = time.clock()
merged['a_free'] = search_in_merged('Free')
merged['a_free_ship'] = search_in_merged('Free ship')
merged['a_new'] = search_in_merged('new')
merged['a_sale'] = search_in_merged('sale')
merged['a_gift'] = search_in_merged('gift')
merged['a_nwt'] = search_in_merged('nwt')
merged['a_100perc'] = search_in_merged('100%')
print ("Time passed min:",(time.clock()-start)/60)

Time passed min: 1.1048959297186287


In [45]:
merged.sample(10)[['name','brand_name','item_description','a_free','a_free_ship','a_new','a_sale','a_gift','a_nwt','a_100perc']]

Unnamed: 0,name,brand_name,item_description,a_free,a_free_ship,a_new,a_sale,a_gift,a_nwt,a_100perc
311357,Npc show bikini,missing,Leather look black bodybuilding competition su...,0,0,0,0,0,0,0
609991,Delias shimmer long sleeve sweater m,dELiA*s,Delias silver shimmer long sleeve sweater size M,0,0,0,0,0,0,0
1086690,Grey lulu long sleeve,Lululemon,"Size 6, no flaws at all, Wore it a couple of t...",0,0,0,0,0,0,0
127216,Victoria secret pink,PINK,Full zip hoodie size small,0,0,0,0,0,0,0
1381053,Sale! Guy Harvey long sleeve t-shirt,Guy Harvey,Guy Harvey t-shirt Size M Long sleeve Like new,0,0,1,1,0,0,0
974099,FSU Seminoles Long Sleeve NWOT,missing,Size medium,0,0,0,0,0,0,0
151629,Coach purse,missing,Beautiful suede detailed purse in great condit...,1,0,0,0,0,0,0
176384,Forever 21 collared sweater,FOREVER 21,Forever 21 collared sweater. Navy and cream st...,1,1,1,0,0,0,0
620048,Mont blanc Pen,missing,Authentic used mont blanc black pen. Cracked t...,0,0,0,0,0,0,0
116165,20g Red Stud Piercing,missing,Crystal Stud Piercing ***This is for ONE stud ...,1,0,1,0,0,0,0


In [46]:
NAME_MIN_DF = 10 ## lower bound for word count
MAX_FEAT_DESCP = 5000

print("Name Encodings")

start = time.clock()

##count = CountVectorizer(min_df=NAME_MIN_DF)
##X_name = count.fit_transform(merged["name"])
count_descp = TfidfVectorizer(max_features = MAX_FEAT_DESCP, 
                              ngram_range = (1,3),
                              stop_words = "english")
X_name = count_descp.fit_transform(merged["name"])


print("Category Encoders")
count_category = CountVectorizer()
X_category = count_category.fit_transform(merged["category_name"])
X_gen_cat = count_category.fit_transform(merged["general_cat"])
X_cat1 = count_category.fit_transform(merged["subcat_1"])
X_cat2 = count_category.fit_transform(merged["subcat_2"])
X_cat3 = count_category.fit_transform(merged["subcat_3"])


print("Descp encoders")
count_descp = TfidfVectorizer(max_features = MAX_FEAT_DESCP, 
                              ngram_range = (1,3),
                              stop_words = "english")
X_descp = count_descp.fit_transform(merged["item_description"])

print("Brand encoders")
vect_brand = LabelBinarizer(sparse_output=True)
X_brand = vect_brand.fit_transform(merged["brand_name"])

print("Dummy Encoders")
merged['item_condition_id'] = merged['item_condition_id'].astype('category')
merged['shipping'] = merged['shipping'].astype('category')
X_dummies = scipy.sparse.csr_matrix(pd.get_dummies(merged[["item_condition_id", "shipping"]], sparse = True).values)

print ("Time passed min:",(time.clock()-start)/60)

Name Encodings
Category Encoders
Descp encoders
Brand encoders
Dummy Encoders
Time passed min: 5.178648224595313


In [62]:
X = scipy.sparse.hstack((X_dummies, 
                         X_descp,
                         X_brand,
                         X_gen_cat, X_cat1, X_cat2, X_cat3,
                         X_name
                        ,merged[['mis_general_cat','mis_subcat_1','mis_subcat_2','mis_subcat_3','mis_subcat_4','mis_brand_name','mis_item_description']]
                        ,merged[['non_utf_descr','non_utf_name','upper_perc_descr','upper_perc_name']]
                        ##,X_non_utf_uniq
                       ,merged[['a_free','a_free_ship','a_new','a_sale','a_gift','a_nwt','a_100perc']]
                      ,merged[['count_upper_excl']]
                        )).tocsr()
print (X.shape)
## del X_dummies, X_descp, X_brand,X_category,X_name, X_gen_cat, X_cat1, X_cat2, X_cat3
gc.collect()

## 'count_upper_descr','count_upper_name','len_descr','len_name'

(2175894, 13122)


404

In [48]:
## save data
##
scipy.sparse.save_npz('model/x.npz',X)
merged.to_csv('model/merged.csv',encoding='utf-8')

In [155]:
## load data
##

X = scipy.sparse.load_npz('model/x.npz')
merged = pd.read_csv('model/merged.csv')


In [329]:
train_test_split??

In [63]:
y_merged_train = merged['price'][:nrow_train]
y_merged_train = np.log1p(y_merged_train)
X_merged_train = X[:nrow_train]

X_train, X_valid, y_train, y_valid = train_test_split(X_merged_train, y_merged_train, test_size=0.2, random_state =102)
k_fold = KFold(n_splits=5,random_state=101)


print (X_train.shape, len(y_train))
print (X_valid.shape, len(y_valid))


(1186028, 13122) 1186028
(296507, 13122) 296507


In [60]:
## https://www.kaggle.com/apapiu/ridge-script
##

## scorer_func = make_scorer(rmsle,greater_is_better=False)
## scorer_func = make_scorer(r2_score,greater_is_better=True)
scorer_func = make_scorer(mean_squared_error,greater_is_better=False)
## scorer_func = make_scorer(mean_squared_log_error,greater_is_better=False)

mm = Ridge(solver = "lsqr", normalize=False, fit_intercept=False,random_state=120)

scores = cross_val_score(mm, X_train, y_train, 
                         cv=k_fold, n_jobs=-1,scoring = scorer_func)

scores = np.sqrt(-scores)
print (np.mean(scores),scores)

## test on validation
mm.fit(X_train, y_train)
y_pred = mm.predict(X_valid)
y_pred[y_pred<1.386]=1.386

print ("RMSLE: ",rmsle(np.expm1(y_valid),np.expm1(y_pred)))
print ("RMSE: ",mean_squared_error(y_valid,y_pred))
print ("R2: ",r2_score(y_valid,y_pred))

0.494907664851 [ 0.49453879  0.49653881  0.49382434  0.49472717  0.49490921]
RMSLE:  0.49326124138
RMSE:  0.243306652248
R2:  0.566425516024


In [None]:
0.518020633601 [ 0.52231744  0.5173286   0.52197693  0.51487274  0.51360746]
RMSLE:  0.51304258507
RMSE:  0.263212694095
R2:  0.521618753148

In [65]:
params2 = {
        'learning_rate': 0.7,
        'application': 'regression',
        'max_depth': 3,
        'num_leaves': 120,
        'verbosity': -1,
        'metric': 'RMSE',
        'data_random_seed': 2,
        'bagging_fraction': 1,
        'nthread': 4
}

start = time.clock()
d_train = lgb.Dataset(X_train, label=y_train)
d_valid = lgb.Dataset(X_valid, label=y_valid)


watchlist2 = [d_valid]    
lgb_model = lgb.train(params2, train_set=d_train, num_boost_round=8000, valid_sets=watchlist2, \
    early_stopping_rounds=250, verbose_eval=True)     

y_pred = lgb_model.predict(X_valid)
y_pred[y_pred<1.386]=1.386

print ("RMSLE: ",rmsle(np.expm1(y_valid),np.expm1(y_pred)))
print ("RMSE: ",mean_squared_error(y_valid,y_pred))
print ("R2: ",r2_score(y_valid,y_pred))

print ("Time passed min:",(time.clock()-start)/60)

[1]	valid_0's rmse: 0.703835
Training until validation scores don't improve for 250 rounds.
[2]	valid_0's rmse: 0.685523
[3]	valid_0's rmse: 0.674474
[4]	valid_0's rmse: 0.663869
[5]	valid_0's rmse: 0.655531
[6]	valid_0's rmse: 0.649037
[7]	valid_0's rmse: 0.642634
[8]	valid_0's rmse: 0.637848
[9]	valid_0's rmse: 0.63338
[10]	valid_0's rmse: 0.629714
[11]	valid_0's rmse: 0.626386
[12]	valid_0's rmse: 0.623449
[13]	valid_0's rmse: 0.620662
[14]	valid_0's rmse: 0.617435
[15]	valid_0's rmse: 0.614832
[16]	valid_0's rmse: 0.612596
[17]	valid_0's rmse: 0.610313
[18]	valid_0's rmse: 0.608196
[19]	valid_0's rmse: 0.606266
[20]	valid_0's rmse: 0.604083
[21]	valid_0's rmse: 0.602335
[22]	valid_0's rmse: 0.600808
[23]	valid_0's rmse: 0.599085
[24]	valid_0's rmse: 0.597693
[25]	valid_0's rmse: 0.596076
[26]	valid_0's rmse: 0.594791
[27]	valid_0's rmse: 0.593108
[28]	valid_0's rmse: 0.591284
[29]	valid_0's rmse: 0.590001
[30]	valid_0's rmse: 0.588624
[31]	valid_0's rmse: 0.587638
[32]	valid_0's rm

[269]	valid_0's rmse: 0.515265
[270]	valid_0's rmse: 0.515165
[271]	valid_0's rmse: 0.515062
[272]	valid_0's rmse: 0.514991
[273]	valid_0's rmse: 0.514925
[274]	valid_0's rmse: 0.514819
[275]	valid_0's rmse: 0.514637
[276]	valid_0's rmse: 0.514511
[277]	valid_0's rmse: 0.51436
[278]	valid_0's rmse: 0.514288
[279]	valid_0's rmse: 0.514186
[280]	valid_0's rmse: 0.51412
[281]	valid_0's rmse: 0.514052
[282]	valid_0's rmse: 0.51397
[283]	valid_0's rmse: 0.513899
[284]	valid_0's rmse: 0.513757
[285]	valid_0's rmse: 0.513636
[286]	valid_0's rmse: 0.513548
[287]	valid_0's rmse: 0.513495
[288]	valid_0's rmse: 0.513419
[289]	valid_0's rmse: 0.51334
[290]	valid_0's rmse: 0.51326
[291]	valid_0's rmse: 0.513136
[292]	valid_0's rmse: 0.513049
[293]	valid_0's rmse: 0.512652
[294]	valid_0's rmse: 0.510759
[295]	valid_0's rmse: 0.510498
[296]	valid_0's rmse: 0.510356
[297]	valid_0's rmse: 0.510215
[298]	valid_0's rmse: 0.510096
[299]	valid_0's rmse: 0.509965
[300]	valid_0's rmse: 0.509908
[301]	valid_0

[536]	valid_0's rmse: 0.495425
[537]	valid_0's rmse: 0.495404
[538]	valid_0's rmse: 0.495367
[539]	valid_0's rmse: 0.495335
[540]	valid_0's rmse: 0.495304
[541]	valid_0's rmse: 0.495267
[542]	valid_0's rmse: 0.495242
[543]	valid_0's rmse: 0.495217
[544]	valid_0's rmse: 0.495157
[545]	valid_0's rmse: 0.495131
[546]	valid_0's rmse: 0.495087
[547]	valid_0's rmse: 0.495036
[548]	valid_0's rmse: 0.49502
[549]	valid_0's rmse: 0.495001
[550]	valid_0's rmse: 0.494966
[551]	valid_0's rmse: 0.494932
[552]	valid_0's rmse: 0.494906
[553]	valid_0's rmse: 0.494829
[554]	valid_0's rmse: 0.494792
[555]	valid_0's rmse: 0.494765
[556]	valid_0's rmse: 0.494738
[557]	valid_0's rmse: 0.494694
[558]	valid_0's rmse: 0.494667
[559]	valid_0's rmse: 0.494625
[560]	valid_0's rmse: 0.494578
[561]	valid_0's rmse: 0.494513
[562]	valid_0's rmse: 0.494402
[563]	valid_0's rmse: 0.494181
[564]	valid_0's rmse: 0.494144
[565]	valid_0's rmse: 0.494097
[566]	valid_0's rmse: 0.494064
[567]	valid_0's rmse: 0.494029
[568]	val

[802]	valid_0's rmse: 0.486124
[803]	valid_0's rmse: 0.486102
[804]	valid_0's rmse: 0.486073
[805]	valid_0's rmse: 0.486076
[806]	valid_0's rmse: 0.486058
[807]	valid_0's rmse: 0.486044
[808]	valid_0's rmse: 0.48603
[809]	valid_0's rmse: 0.48601
[810]	valid_0's rmse: 0.485981
[811]	valid_0's rmse: 0.485969
[812]	valid_0's rmse: 0.485941
[813]	valid_0's rmse: 0.485915
[814]	valid_0's rmse: 0.485891
[815]	valid_0's rmse: 0.485877
[816]	valid_0's rmse: 0.485854
[817]	valid_0's rmse: 0.48584
[818]	valid_0's rmse: 0.485833
[819]	valid_0's rmse: 0.485819
[820]	valid_0's rmse: 0.485792
[821]	valid_0's rmse: 0.485789
[822]	valid_0's rmse: 0.485766
[823]	valid_0's rmse: 0.485743
[824]	valid_0's rmse: 0.485719
[825]	valid_0's rmse: 0.485688
[826]	valid_0's rmse: 0.485669
[827]	valid_0's rmse: 0.485646
[828]	valid_0's rmse: 0.48563
[829]	valid_0's rmse: 0.48562
[830]	valid_0's rmse: 0.485614
[831]	valid_0's rmse: 0.485603
[832]	valid_0's rmse: 0.485588
[833]	valid_0's rmse: 0.485576
[834]	valid_0

[1066]	valid_0's rmse: 0.481218
[1067]	valid_0's rmse: 0.481172
[1068]	valid_0's rmse: 0.481156
[1069]	valid_0's rmse: 0.481145
[1070]	valid_0's rmse: 0.48114
[1071]	valid_0's rmse: 0.481124
[1072]	valid_0's rmse: 0.481109
[1073]	valid_0's rmse: 0.481109
[1074]	valid_0's rmse: 0.481101
[1075]	valid_0's rmse: 0.481089
[1076]	valid_0's rmse: 0.481081
[1077]	valid_0's rmse: 0.481075
[1078]	valid_0's rmse: 0.481062
[1079]	valid_0's rmse: 0.481061
[1080]	valid_0's rmse: 0.481061
[1081]	valid_0's rmse: 0.481026
[1082]	valid_0's rmse: 0.48102
[1083]	valid_0's rmse: 0.48101
[1084]	valid_0's rmse: 0.480898
[1085]	valid_0's rmse: 0.480839
[1086]	valid_0's rmse: 0.480828
[1087]	valid_0's rmse: 0.4808
[1088]	valid_0's rmse: 0.480783
[1089]	valid_0's rmse: 0.480778
[1090]	valid_0's rmse: 0.480771
[1091]	valid_0's rmse: 0.480761
[1092]	valid_0's rmse: 0.480752
[1093]	valid_0's rmse: 0.480726
[1094]	valid_0's rmse: 0.480711
[1095]	valid_0's rmse: 0.480708
[1096]	valid_0's rmse: 0.480704
[1097]	valid_

[1324]	valid_0's rmse: 0.47814
[1325]	valid_0's rmse: 0.478068
[1326]	valid_0's rmse: 0.477999
[1327]	valid_0's rmse: 0.477957
[1328]	valid_0's rmse: 0.477942
[1329]	valid_0's rmse: 0.47795
[1330]	valid_0's rmse: 0.47795
[1331]	valid_0's rmse: 0.477955
[1332]	valid_0's rmse: 0.477948
[1333]	valid_0's rmse: 0.477956
[1334]	valid_0's rmse: 0.477954
[1335]	valid_0's rmse: 0.477947
[1336]	valid_0's rmse: 0.477945
[1337]	valid_0's rmse: 0.477946
[1338]	valid_0's rmse: 0.477939
[1339]	valid_0's rmse: 0.477931
[1340]	valid_0's rmse: 0.477912
[1341]	valid_0's rmse: 0.477901
[1342]	valid_0's rmse: 0.477895
[1343]	valid_0's rmse: 0.477888
[1344]	valid_0's rmse: 0.477875
[1345]	valid_0's rmse: 0.477858
[1346]	valid_0's rmse: 0.477843
[1347]	valid_0's rmse: 0.477832
[1348]	valid_0's rmse: 0.47783
[1349]	valid_0's rmse: 0.477758
[1350]	valid_0's rmse: 0.47772
[1351]	valid_0's rmse: 0.477716
[1352]	valid_0's rmse: 0.477718
[1353]	valid_0's rmse: 0.477716
[1354]	valid_0's rmse: 0.477707
[1355]	valid_

[1582]	valid_0's rmse: 0.475917
[1583]	valid_0's rmse: 0.475819
[1584]	valid_0's rmse: 0.475809
[1585]	valid_0's rmse: 0.475791
[1586]	valid_0's rmse: 0.475772
[1587]	valid_0's rmse: 0.475759
[1588]	valid_0's rmse: 0.475759
[1589]	valid_0's rmse: 0.475732
[1590]	valid_0's rmse: 0.475725
[1591]	valid_0's rmse: 0.475697
[1592]	valid_0's rmse: 0.475679
[1593]	valid_0's rmse: 0.475677
[1594]	valid_0's rmse: 0.475669
[1595]	valid_0's rmse: 0.475663
[1596]	valid_0's rmse: 0.475655
[1597]	valid_0's rmse: 0.475647
[1598]	valid_0's rmse: 0.475592
[1599]	valid_0's rmse: 0.475537
[1600]	valid_0's rmse: 0.475526
[1601]	valid_0's rmse: 0.475504
[1602]	valid_0's rmse: 0.475491
[1603]	valid_0's rmse: 0.475475
[1604]	valid_0's rmse: 0.475462
[1605]	valid_0's rmse: 0.475455
[1606]	valid_0's rmse: 0.475458
[1607]	valid_0's rmse: 0.475452
[1608]	valid_0's rmse: 0.475449
[1609]	valid_0's rmse: 0.475436
[1610]	valid_0's rmse: 0.475433
[1611]	valid_0's rmse: 0.475432
[1612]	valid_0's rmse: 0.475421
[1613]	v

[1840]	valid_0's rmse: 0.4737
[1841]	valid_0's rmse: 0.473695
[1842]	valid_0's rmse: 0.473686
[1843]	valid_0's rmse: 0.473684
[1844]	valid_0's rmse: 0.473679
[1845]	valid_0's rmse: 0.473675
[1846]	valid_0's rmse: 0.473671
[1847]	valid_0's rmse: 0.47367
[1848]	valid_0's rmse: 0.473667
[1849]	valid_0's rmse: 0.473669
[1850]	valid_0's rmse: 0.473676
[1851]	valid_0's rmse: 0.47366
[1852]	valid_0's rmse: 0.473653
[1853]	valid_0's rmse: 0.473643
[1854]	valid_0's rmse: 0.473633
[1855]	valid_0's rmse: 0.473621
[1856]	valid_0's rmse: 0.473606
[1857]	valid_0's rmse: 0.473601
[1858]	valid_0's rmse: 0.473585
[1859]	valid_0's rmse: 0.473588
[1860]	valid_0's rmse: 0.47358
[1861]	valid_0's rmse: 0.473582
[1862]	valid_0's rmse: 0.473583
[1863]	valid_0's rmse: 0.473578
[1864]	valid_0's rmse: 0.473573
[1865]	valid_0's rmse: 0.473549
[1866]	valid_0's rmse: 0.473543
[1867]	valid_0's rmse: 0.473534
[1868]	valid_0's rmse: 0.473528
[1869]	valid_0's rmse: 0.473523
[1870]	valid_0's rmse: 0.473522
[1871]	valid_

[2098]	valid_0's rmse: 0.472325
[2099]	valid_0's rmse: 0.472303
[2100]	valid_0's rmse: 0.472272
[2101]	valid_0's rmse: 0.472269
[2102]	valid_0's rmse: 0.472253
[2103]	valid_0's rmse: 0.47225
[2104]	valid_0's rmse: 0.472244
[2105]	valid_0's rmse: 0.472253
[2106]	valid_0's rmse: 0.472237
[2107]	valid_0's rmse: 0.472207
[2108]	valid_0's rmse: 0.472209
[2109]	valid_0's rmse: 0.472203
[2110]	valid_0's rmse: 0.472208
[2111]	valid_0's rmse: 0.472207
[2112]	valid_0's rmse: 0.472193
[2113]	valid_0's rmse: 0.472191
[2114]	valid_0's rmse: 0.472191
[2115]	valid_0's rmse: 0.47219
[2116]	valid_0's rmse: 0.472187
[2117]	valid_0's rmse: 0.472198
[2118]	valid_0's rmse: 0.472192
[2119]	valid_0's rmse: 0.472181
[2120]	valid_0's rmse: 0.472153
[2121]	valid_0's rmse: 0.472144
[2122]	valid_0's rmse: 0.472144
[2123]	valid_0's rmse: 0.472137
[2124]	valid_0's rmse: 0.472142
[2125]	valid_0's rmse: 0.47214
[2126]	valid_0's rmse: 0.472133
[2127]	valid_0's rmse: 0.472124
[2128]	valid_0's rmse: 0.472107
[2129]	vali

[2356]	valid_0's rmse: 0.470439
[2357]	valid_0's rmse: 0.470438
[2358]	valid_0's rmse: 0.470441
[2359]	valid_0's rmse: 0.470434
[2360]	valid_0's rmse: 0.470426
[2361]	valid_0's rmse: 0.470415
[2362]	valid_0's rmse: 0.470407
[2363]	valid_0's rmse: 0.470411
[2364]	valid_0's rmse: 0.470403
[2365]	valid_0's rmse: 0.470405
[2366]	valid_0's rmse: 0.470401
[2367]	valid_0's rmse: 0.470403
[2368]	valid_0's rmse: 0.470396
[2369]	valid_0's rmse: 0.470399
[2370]	valid_0's rmse: 0.470394
[2371]	valid_0's rmse: 0.47039
[2372]	valid_0's rmse: 0.47039
[2373]	valid_0's rmse: 0.470384
[2374]	valid_0's rmse: 0.470379
[2375]	valid_0's rmse: 0.470384
[2376]	valid_0's rmse: 0.470376
[2377]	valid_0's rmse: 0.470373
[2378]	valid_0's rmse: 0.470365
[2379]	valid_0's rmse: 0.470362
[2380]	valid_0's rmse: 0.470355
[2381]	valid_0's rmse: 0.470342
[2382]	valid_0's rmse: 0.470339
[2383]	valid_0's rmse: 0.470338
[2384]	valid_0's rmse: 0.470335
[2385]	valid_0's rmse: 0.470317
[2386]	valid_0's rmse: 0.47032
[2387]	vali

[2614]	valid_0's rmse: 0.469713
[2615]	valid_0's rmse: 0.46971
[2616]	valid_0's rmse: 0.469656
[2617]	valid_0's rmse: 0.469637
[2618]	valid_0's rmse: 0.469621
[2619]	valid_0's rmse: 0.469619
[2620]	valid_0's rmse: 0.469608
[2621]	valid_0's rmse: 0.469578
[2622]	valid_0's rmse: 0.469573
[2623]	valid_0's rmse: 0.469565
[2624]	valid_0's rmse: 0.469566
[2625]	valid_0's rmse: 0.46956
[2626]	valid_0's rmse: 0.46956
[2627]	valid_0's rmse: 0.46956
[2628]	valid_0's rmse: 0.469558
[2629]	valid_0's rmse: 0.469561
[2630]	valid_0's rmse: 0.469561
[2631]	valid_0's rmse: 0.469559
[2632]	valid_0's rmse: 0.46955
[2633]	valid_0's rmse: 0.469556
[2634]	valid_0's rmse: 0.469551
[2635]	valid_0's rmse: 0.469558
[2636]	valid_0's rmse: 0.469562
[2637]	valid_0's rmse: 0.469563
[2638]	valid_0's rmse: 0.469566
[2639]	valid_0's rmse: 0.469552
[2640]	valid_0's rmse: 0.469547
[2641]	valid_0's rmse: 0.469548
[2642]	valid_0's rmse: 0.469547
[2643]	valid_0's rmse: 0.469545
[2644]	valid_0's rmse: 0.469538
[2645]	valid_

[2872]	valid_0's rmse: 0.469197
[2873]	valid_0's rmse: 0.469197
[2874]	valid_0's rmse: 0.469195
[2875]	valid_0's rmse: 0.469189
[2876]	valid_0's rmse: 0.46919
[2877]	valid_0's rmse: 0.469194
[2878]	valid_0's rmse: 0.469194
[2879]	valid_0's rmse: 0.469198
[2880]	valid_0's rmse: 0.469189
[2881]	valid_0's rmse: 0.469191
[2882]	valid_0's rmse: 0.469191
[2883]	valid_0's rmse: 0.469177
[2884]	valid_0's rmse: 0.469175
[2885]	valid_0's rmse: 0.469179
[2886]	valid_0's rmse: 0.469175
[2887]	valid_0's rmse: 0.469176
[2888]	valid_0's rmse: 0.469177
[2889]	valid_0's rmse: 0.469173
[2890]	valid_0's rmse: 0.469174
[2891]	valid_0's rmse: 0.469174
[2892]	valid_0's rmse: 0.469172
[2893]	valid_0's rmse: 0.469177
[2894]	valid_0's rmse: 0.469174
[2895]	valid_0's rmse: 0.469167
[2896]	valid_0's rmse: 0.46917
[2897]	valid_0's rmse: 0.46917
[2898]	valid_0's rmse: 0.469172
[2899]	valid_0's rmse: 0.469186
[2900]	valid_0's rmse: 0.469185
[2901]	valid_0's rmse: 0.46919
[2902]	valid_0's rmse: 0.469194
[2903]	valid

[3130]	valid_0's rmse: 0.468727
[3131]	valid_0's rmse: 0.468728
[3132]	valid_0's rmse: 0.468714
[3133]	valid_0's rmse: 0.468718
[3134]	valid_0's rmse: 0.468717
[3135]	valid_0's rmse: 0.468715
[3136]	valid_0's rmse: 0.46872
[3137]	valid_0's rmse: 0.46872
[3138]	valid_0's rmse: 0.468711
[3139]	valid_0's rmse: 0.468709
[3140]	valid_0's rmse: 0.468704
[3141]	valid_0's rmse: 0.468704
[3142]	valid_0's rmse: 0.468697
[3143]	valid_0's rmse: 0.468698
[3144]	valid_0's rmse: 0.468691
[3145]	valid_0's rmse: 0.4687
[3146]	valid_0's rmse: 0.468703
[3147]	valid_0's rmse: 0.468702
[3148]	valid_0's rmse: 0.468699
[3149]	valid_0's rmse: 0.4687
[3150]	valid_0's rmse: 0.468698
[3151]	valid_0's rmse: 0.468704
[3152]	valid_0's rmse: 0.468703
[3153]	valid_0's rmse: 0.468704
[3154]	valid_0's rmse: 0.468705
[3155]	valid_0's rmse: 0.468688
[3156]	valid_0's rmse: 0.46869
[3157]	valid_0's rmse: 0.468683
[3158]	valid_0's rmse: 0.468681
[3159]	valid_0's rmse: 0.468676
[3160]	valid_0's rmse: 0.468677
[3161]	valid_0'

[3388]	valid_0's rmse: 0.468215
[3389]	valid_0's rmse: 0.468214
[3390]	valid_0's rmse: 0.468214
[3391]	valid_0's rmse: 0.468211
[3392]	valid_0's rmse: 0.468211
[3393]	valid_0's rmse: 0.468211
[3394]	valid_0's rmse: 0.468212
[3395]	valid_0's rmse: 0.468206
[3396]	valid_0's rmse: 0.468198
[3397]	valid_0's rmse: 0.468196
[3398]	valid_0's rmse: 0.468195
[3399]	valid_0's rmse: 0.468192
[3400]	valid_0's rmse: 0.468191
[3401]	valid_0's rmse: 0.46819
[3402]	valid_0's rmse: 0.468189
[3403]	valid_0's rmse: 0.468164
[3404]	valid_0's rmse: 0.468165
[3405]	valid_0's rmse: 0.468166
[3406]	valid_0's rmse: 0.46816
[3407]	valid_0's rmse: 0.468153
[3408]	valid_0's rmse: 0.468156
[3409]	valid_0's rmse: 0.468158
[3410]	valid_0's rmse: 0.468144
[3411]	valid_0's rmse: 0.468138
[3412]	valid_0's rmse: 0.468138
[3413]	valid_0's rmse: 0.468132
[3414]	valid_0's rmse: 0.468119
[3415]	valid_0's rmse: 0.468123
[3416]	valid_0's rmse: 0.468124
[3417]	valid_0's rmse: 0.468119
[3418]	valid_0's rmse: 0.468116
[3419]	val

[3646]	valid_0's rmse: 0.467763
[3647]	valid_0's rmse: 0.467759
[3648]	valid_0's rmse: 0.467748
[3649]	valid_0's rmse: 0.467747
[3650]	valid_0's rmse: 0.467754
[3651]	valid_0's rmse: 0.467738
[3652]	valid_0's rmse: 0.467735
[3653]	valid_0's rmse: 0.46774
[3654]	valid_0's rmse: 0.467741
[3655]	valid_0's rmse: 0.467734
[3656]	valid_0's rmse: 0.467735
[3657]	valid_0's rmse: 0.467739
[3658]	valid_0's rmse: 0.467733
[3659]	valid_0's rmse: 0.467751
[3660]	valid_0's rmse: 0.467745
[3661]	valid_0's rmse: 0.467747
[3662]	valid_0's rmse: 0.467747
[3663]	valid_0's rmse: 0.467745
[3664]	valid_0's rmse: 0.467754
[3665]	valid_0's rmse: 0.467758
[3666]	valid_0's rmse: 0.46776
[3667]	valid_0's rmse: 0.467763
[3668]	valid_0's rmse: 0.467759
[3669]	valid_0's rmse: 0.46776
[3670]	valid_0's rmse: 0.467767
[3671]	valid_0's rmse: 0.46776
[3672]	valid_0's rmse: 0.467761
[3673]	valid_0's rmse: 0.467765
[3674]	valid_0's rmse: 0.467761
[3675]	valid_0's rmse: 0.467761
[3676]	valid_0's rmse: 0.467764
[3677]	valid

[3904]	valid_0's rmse: 0.467412
[3905]	valid_0's rmse: 0.467411
[3906]	valid_0's rmse: 0.467404
[3907]	valid_0's rmse: 0.467406
[3908]	valid_0's rmse: 0.467406
[3909]	valid_0's rmse: 0.467405
[3910]	valid_0's rmse: 0.467405
[3911]	valid_0's rmse: 0.467403
[3912]	valid_0's rmse: 0.467404
[3913]	valid_0's rmse: 0.467406
[3914]	valid_0's rmse: 0.467399
[3915]	valid_0's rmse: 0.467403
[3916]	valid_0's rmse: 0.467411
[3917]	valid_0's rmse: 0.467394
[3918]	valid_0's rmse: 0.46739
[3919]	valid_0's rmse: 0.467386
[3920]	valid_0's rmse: 0.467386
[3921]	valid_0's rmse: 0.467389
[3922]	valid_0's rmse: 0.467387
[3923]	valid_0's rmse: 0.467394
[3924]	valid_0's rmse: 0.467379
[3925]	valid_0's rmse: 0.467381
[3926]	valid_0's rmse: 0.467382
[3927]	valid_0's rmse: 0.467384
[3928]	valid_0's rmse: 0.467387
[3929]	valid_0's rmse: 0.467389
[3930]	valid_0's rmse: 0.467349
[3931]	valid_0's rmse: 0.467327
[3932]	valid_0's rmse: 0.467325
[3933]	valid_0's rmse: 0.467316
[3934]	valid_0's rmse: 0.467318
[3935]	va

[4162]	valid_0's rmse: 0.467048
[4163]	valid_0's rmse: 0.467052
[4164]	valid_0's rmse: 0.467049
[4165]	valid_0's rmse: 0.46705
[4166]	valid_0's rmse: 0.467052
[4167]	valid_0's rmse: 0.467052
[4168]	valid_0's rmse: 0.467051
[4169]	valid_0's rmse: 0.467052
[4170]	valid_0's rmse: 0.467051
[4171]	valid_0's rmse: 0.467049
[4172]	valid_0's rmse: 0.467048
[4173]	valid_0's rmse: 0.467044
[4174]	valid_0's rmse: 0.467041
[4175]	valid_0's rmse: 0.467031
[4176]	valid_0's rmse: 0.467032
[4177]	valid_0's rmse: 0.467034
[4178]	valid_0's rmse: 0.467026
[4179]	valid_0's rmse: 0.467025
[4180]	valid_0's rmse: 0.467029
[4181]	valid_0's rmse: 0.467024
[4182]	valid_0's rmse: 0.467024
[4183]	valid_0's rmse: 0.467029
[4184]	valid_0's rmse: 0.467029
[4185]	valid_0's rmse: 0.467032
[4186]	valid_0's rmse: 0.46704
[4187]	valid_0's rmse: 0.467041
[4188]	valid_0's rmse: 0.467035
[4189]	valid_0's rmse: 0.467033
[4190]	valid_0's rmse: 0.467033
[4191]	valid_0's rmse: 0.467024
[4192]	valid_0's rmse: 0.467025
[4193]	val

[4420]	valid_0's rmse: 0.466793
[4421]	valid_0's rmse: 0.466796
[4422]	valid_0's rmse: 0.466794
[4423]	valid_0's rmse: 0.466795
[4424]	valid_0's rmse: 0.466796
[4425]	valid_0's rmse: 0.466796
[4426]	valid_0's rmse: 0.466786
[4427]	valid_0's rmse: 0.466791
[4428]	valid_0's rmse: 0.466791
[4429]	valid_0's rmse: 0.46678
[4430]	valid_0's rmse: 0.466773
[4431]	valid_0's rmse: 0.466782
[4432]	valid_0's rmse: 0.466788
[4433]	valid_0's rmse: 0.466789
[4434]	valid_0's rmse: 0.466788
[4435]	valid_0's rmse: 0.466792
[4436]	valid_0's rmse: 0.466786
[4437]	valid_0's rmse: 0.466781
[4438]	valid_0's rmse: 0.466786
[4439]	valid_0's rmse: 0.466787
[4440]	valid_0's rmse: 0.466783
[4441]	valid_0's rmse: 0.466783
[4442]	valid_0's rmse: 0.466783
[4443]	valid_0's rmse: 0.466782
[4444]	valid_0's rmse: 0.466787
[4445]	valid_0's rmse: 0.466788
[4446]	valid_0's rmse: 0.466791
[4447]	valid_0's rmse: 0.466793
[4448]	valid_0's rmse: 0.466792
[4449]	valid_0's rmse: 0.466792
[4450]	valid_0's rmse: 0.466795
[4451]	va

[4678]	valid_0's rmse: 0.46666
[4679]	valid_0's rmse: 0.466658
[4680]	valid_0's rmse: 0.466644
[4681]	valid_0's rmse: 0.46664
[4682]	valid_0's rmse: 0.466639
[4683]	valid_0's rmse: 0.466637
[4684]	valid_0's rmse: 0.466634
[4685]	valid_0's rmse: 0.466624
[4686]	valid_0's rmse: 0.466623
[4687]	valid_0's rmse: 0.466617
[4688]	valid_0's rmse: 0.466603
[4689]	valid_0's rmse: 0.466597
[4690]	valid_0's rmse: 0.466587
[4691]	valid_0's rmse: 0.466585
[4692]	valid_0's rmse: 0.466576
[4693]	valid_0's rmse: 0.466581
[4694]	valid_0's rmse: 0.466576
[4695]	valid_0's rmse: 0.46656
[4696]	valid_0's rmse: 0.466562
[4697]	valid_0's rmse: 0.466564
[4698]	valid_0's rmse: 0.466561
[4699]	valid_0's rmse: 0.466568
[4700]	valid_0's rmse: 0.466566
[4701]	valid_0's rmse: 0.466572
[4702]	valid_0's rmse: 0.466578
[4703]	valid_0's rmse: 0.466576
[4704]	valid_0's rmse: 0.466576
[4705]	valid_0's rmse: 0.466577
[4706]	valid_0's rmse: 0.466577
[4707]	valid_0's rmse: 0.466576
[4708]	valid_0's rmse: 0.466575
[4709]	vali

[4936]	valid_0's rmse: 0.466383
[4937]	valid_0's rmse: 0.466382
[4938]	valid_0's rmse: 0.46637
[4939]	valid_0's rmse: 0.466366
[4940]	valid_0's rmse: 0.466371
[4941]	valid_0's rmse: 0.466377
[4942]	valid_0's rmse: 0.466372
[4943]	valid_0's rmse: 0.466379
[4944]	valid_0's rmse: 0.46638
[4945]	valid_0's rmse: 0.466379
[4946]	valid_0's rmse: 0.466381
[4947]	valid_0's rmse: 0.46638
[4948]	valid_0's rmse: 0.466377
[4949]	valid_0's rmse: 0.466378
[4950]	valid_0's rmse: 0.466377
[4951]	valid_0's rmse: 0.466376
[4952]	valid_0's rmse: 0.466379
[4953]	valid_0's rmse: 0.466385
[4954]	valid_0's rmse: 0.466379
[4955]	valid_0's rmse: 0.466377
[4956]	valid_0's rmse: 0.466379
[4957]	valid_0's rmse: 0.46638
[4958]	valid_0's rmse: 0.466379
[4959]	valid_0's rmse: 0.466383
[4960]	valid_0's rmse: 0.466382
[4961]	valid_0's rmse: 0.466384
[4962]	valid_0's rmse: 0.466389
[4963]	valid_0's rmse: 0.466393
[4964]	valid_0's rmse: 0.4664
[4965]	valid_0's rmse: 0.466396
[4966]	valid_0's rmse: 0.466399
[4967]	valid_0

[5194]	valid_0's rmse: 0.466352
[5195]	valid_0's rmse: 0.466353
[5196]	valid_0's rmse: 0.466351
[5197]	valid_0's rmse: 0.466349
[5198]	valid_0's rmse: 0.466356
[5199]	valid_0's rmse: 0.466357
[5200]	valid_0's rmse: 0.466357
[5201]	valid_0's rmse: 0.466353
[5202]	valid_0's rmse: 0.466351
[5203]	valid_0's rmse: 0.466347
[5204]	valid_0's rmse: 0.466347
[5205]	valid_0's rmse: 0.466344
[5206]	valid_0's rmse: 0.466345
[5207]	valid_0's rmse: 0.466344
[5208]	valid_0's rmse: 0.466343
[5209]	valid_0's rmse: 0.466341
[5210]	valid_0's rmse: 0.466345
[5211]	valid_0's rmse: 0.466343
[5212]	valid_0's rmse: 0.466342
[5213]	valid_0's rmse: 0.46634
[5214]	valid_0's rmse: 0.466342
[5215]	valid_0's rmse: 0.46634
[5216]	valid_0's rmse: 0.466343
[5217]	valid_0's rmse: 0.466346
[5218]	valid_0's rmse: 0.466347
[5219]	valid_0's rmse: 0.466343
[5220]	valid_0's rmse: 0.466344
[5221]	valid_0's rmse: 0.46635
[5222]	valid_0's rmse: 0.466349
[5223]	valid_0's rmse: 0.46635
[5224]	valid_0's rmse: 0.466351
[5225]	valid

[5452]	valid_0's rmse: 0.466415
[5453]	valid_0's rmse: 0.466411
[5454]	valid_0's rmse: 0.466413
[5455]	valid_0's rmse: 0.466409
[5456]	valid_0's rmse: 0.466405
[5457]	valid_0's rmse: 0.466407
[5458]	valid_0's rmse: 0.46641
[5459]	valid_0's rmse: 0.466415
[5460]	valid_0's rmse: 0.466416
[5461]	valid_0's rmse: 0.466415
[5462]	valid_0's rmse: 0.466419
[5463]	valid_0's rmse: 0.466419
Early stopping, best iteration is:
[5213]	valid_0's rmse: 0.46634
RMSLE:  0.466236276326
RMSE:  0.217376265363
R2:  0.612633681765
Time passed min: 12.429295251109261


In [None]:
X_test = X[nrow_train:]

y_pred = lgb_model.predict(X_test)

y_pred[y_pred<1.386]=1.386
y_pred = np.expm1(y_pred)

sub = pd.DataFrame()
sub['test_id'] = merged['test_id'][nrow_train:].astype(int)
sub['price'] = y_pred
sub.to_csv('result.csv', index=False)
print ("fini")


In [None]:
## https://www.kaggle.com/thykhuely/mercari-interactive-eda-topic-modelling
##