<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"></ul></div>

In [1]:
import os
import datetime
import gc

import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler

# Edit data directory here
DATA_DIR = "./input/techjam"


def percentile(n):
    def percentile_(x):
        return np.percentile(x, n)
    percentile_.__name__ = 'percentile_%s' % n
    return percentile_

def get_prep_data(DATA_DIR):
    # Reading Files
    cc = pd.read_csv(os.path.join(DATA_DIR,'cc.csv'),parse_dates=['pos_dt'])
    demo = pd.read_csv(os.path.join(DATA_DIR,'demographics.csv'))
    kplus = pd.read_csv(os.path.join(DATA_DIR,'kplus.csv'),parse_dates=['sunday'])

    train = pd.read_csv(os.path.join(DATA_DIR,'train.csv'))
    test = pd.read_csv(os.path.join(DATA_DIR,'test.csv'))

    # Set-up
    cc_mapper = demo[['id','cc_no']].copy()
    demo = demo.drop('cc_no',axis=1).drop_duplicates().reset_index(drop=True)
    label = pd.concat([train,test],axis=0,ignore_index=True)
    demo = demo.merge(label, on='id')
    demo['ocp_cd'] = demo['ocp_cd'].fillna(0).astype(int)
    demo.set_index('id',inplace=True)
    kplus.set_index('id',inplace=True)
    joined_cc = cc.merge(cc_mapper, on='cc_no', how='inner').drop('cc_no', axis=1)

    kplus['month'] = kplus['sunday'].dt.month
    kplus['month'] = 'month'+ kplus['month'].astype(str)
    joined_cc['month'] = joined_cc.pos_dt.dt.month
    joined_cc['month'] = 'month'+ joined_cc['month'].astype(str)

    bank_holidays = ['2018-01-01','2018-01-02','2018-03-01','2018-04-06','2018-04-13',
                    '2018-04-14','2018-04-15','2018-04-16','2018-05-01','2018-05-29']
    joined_cc['is_holiday'] = joined_cc['pos_dt'].isin([datetime.datetime.strptime(i, '%Y-%m-%d') for i in bank_holidays]).astype(int)
    joined_cc['is_weekend'] = joined_cc['pos_dt'].dt.weekday.isin([0,6]).astype(int)
    joined_cc['is_holiday'] = 'holiday'+joined_cc['is_holiday'].astype(str)
    joined_cc['is_weekend'] = 'weekend'+joined_cc['is_weekend'].astype(str)
    joined_cc['quarter'] = 'q'+((joined_cc['pos_dt'].dt.month>=4)+1).astype(str)


    # Adding log 
    demo['income'] = demo['income'].apply(np.log1p)

   


    # Target Encoding
    demo = demo.reset_index()
    demo['cc_cnt'] = demo['id'].map(cc_mapper.groupby('id').cc_no.count())
    demo['has_kp'] = demo['id'].isin(kplus.index).astype(int)

    # Crossing categorical features as another feature [374 / 336]
    demo['age_gnd'] = demo['gender'].astype(str)+demo['age'].astype(str)
    demo['gnd_ocp'] = demo['gender'].astype(str)+demo['ocp_cd'].astype(str)
    demo['age_ocp'] = demo['age'].astype(str)+demo['ocp_cd'].astype(str)

    # Left age out of categorical features since it's ordinal
    categorical_features = ['gender','ocp_cd','age_gnd','gnd_ocp','age_ocp']

    # Target Encoding, code modified from [374]
    for feature in categorical_features + ['age']:
        means_per_group = demo[demo['income']>0].groupby(feature)['income'].mean()
        demo[feature+'_mean'] = demo[feature].map(means_per_group)

        count_per_group = demo[demo['income']>0].groupby(feature)['income'].count()
        demo[feature+'_count'] = demo[feature].map(count_per_group)
    demo.set_index('id',inplace=True)
    
     # Preping Training data
    train = demo.copy()

    # Normal Total Groupby
    kplus_tot = kplus.groupby('id').agg({'kp_txn_count':'sum','kp_txn_amt':'sum'}).copy()
    kplus_mm_tot = kplus.groupby(['id','month']).agg({'kp_txn_count':'sum','kp_txn_amt':'sum'}).unstack(level=1).copy()
    kplus_mm_tot.columns = ['_'.join([str(c) for c in lst]) for lst in kplus_mm_tot.columns]

    # CreditCard Total Groupby
    cc_tot = joined_cc.groupby('id').agg({'cc_txn_amt':['count','sum']}).copy()
    cc_tot.columns = ['_'.join(i) for i in cc_tot.columns]

    # CreditCard Monthly Groupby
    combined_cc = pd.pivot_table(joined_cc, index= 'id', columns= 'month', values= 'cc_txn_amt', aggfunc= [np.mean, min, max, np.sum, 'count', np.var, percentile(10), percentile(90)])
    combined_cc.columns = ['cc_'+'_'.join([str(c) for c in lst]) for lst in combined_cc.columns]


    # CreditCard Pompus Features
    combined_cc_holiday = pd.pivot_table(joined_cc, index= 'id', columns= 'is_holiday', values= 'cc_txn_amt', aggfunc= [np.mean, min, max, np.sum, 'count', np.var, percentile(10), percentile(90)])
    combined_cc_weekend = pd.pivot_table(joined_cc, index= 'id', columns= 'is_weekend', values= 'cc_txn_amt', aggfunc= [np.mean, min, max, np.sum, 'count', np.var, percentile(10), percentile(90)])
    combined_cc_quarter = pd.pivot_table(joined_cc, index= 'id', columns= 'quarter', values= 'cc_txn_amt', aggfunc= [np.mean, min, max, np.sum, 'count', np.var, percentile(10), percentile(90)])
    combined_cc_holiday.columns = ['cc_'+'_'.join([str(c) for c in lst]) for lst in combined_cc_holiday.columns]
    combined_cc_weekend.columns = ['cc_'+'_'.join([str(c) for c in lst]) for lst in combined_cc_weekend.columns]
    combined_cc_quarter.columns = ['cc_'+'_'.join([str(c) for c in lst]) for lst in combined_cc_quarter.columns]

    # Joining all together
    train = train.join(kplus_tot).join(kplus_mm_tot).join(cc_tot).join(combined_cc).join(combined_cc_holiday).join(combined_cc_weekend).join(combined_cc_quarter).fillna(0)

    # Quick Normalization
#     for col in [ i for i in train.columns if i not in ['id','gender','ocp_cd','age','income']]:
#         train[col] = StandardScaler().fit_transform(train[col].values.reshape(-1, 1))
#         train[col] = train[col].fillna(0)
#         if 'amt' in col:
#             train[col] = train[col].apply(np.log1p)

    X_train = train[train['income']>0].drop('income',axis=1).copy()
    y_train = pd.DataFrame(train[train['income']>0]['income']).copy()
    X_test = train[train['income']<=0].drop('income',axis=1).copy()

    return X_train, y_train, X_test

In [2]:
X_train, y_train, X_test =  get_prep_data(DATA_DIR)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.




In [3]:
cat_feature = ['gender','ocp_cd','age_gnd','gnd_ocp','age_ocp', 'age']

In [4]:
def techjam_score(y_pred, y_true):
    y_pred = np.array(y_pred)
    y_true = np.array(y_true)
    
    return 100 - 100 * np.mean((y_pred-y_true) ** 2 / (np.minimum(2*y_true, y_pred) + y_true)**2)

In [5]:
def techjam_feval_log(y_pred, dtrain):
    y_true = dtrain.get_label()
    return 'techjam_score', techjam_score(np.exp(y_pred), np.exp(y_true)), True

In [8]:
for cat in cat_feature:
    X_test[cat] =X_test[cat].astype(int)
    X_train[cat] =X_train[cat].astype(int)

train_data = lightgbm.Dataset(X_train, label=y_train, categorical_feature=cat_feature , free_raw_data=False)

num_leaves_choices = [15, 31, 63, 127, 200, 255, 300, 350, 400,511 ,600]
ft_frac_choices = [0.6, 0.7, 0.8, 0.9, 1.0]
bagging_frac_choices = [0.6, 0.7, 0.8, 0.9, 1.0]

# We will store the cross validation results in a simple list,
# with tuples in the form of (hyperparam dict, cv score):
cv_results = []


for num_lv in tqdm_notebook(num_leaves_choices):
    for bg_fac in bagging_frac_choices:
        for ft_fac in ft_frac_choices:
            hyperparams = {"boosting_type":'gbdt',
                            "objective": 'mape',
                            "metrics": 'None',
                            "num_leaves": num_lv,
                            "feature_fraction": ft_fac,
                            "bagging_fraction": bg_fac,
                            "learning_rate": 0.01
                                     }
            validation_summary = lightgbm.cv(hyperparams,
                                            train_data,
                                            num_boost_round=10000,
                                            nfold=5,
                                            feval=techjam_feval_log,
                                            stratified=False,
                                            shuffle=True,
                                            early_stopping_rounds=50,
                                            verbose_eval=10)
            
            optimal_num_trees = len(validation_summary["techjam_score-mean"])
            
            # to the hyperparameter dictionary:
            hyperparams["num_boost_round"] = optimal_num_trees

           # And we append results to cv_results:
            cv_results.append((hyperparams, validation_summary["techjam_score-mean"][-1]))

HBox(children=(IntProgress(value=0, max=11), HTML(value='')))



[10]	cv_agg's techjam_score: 89.4646 + 0.0831847
[20]	cv_agg's techjam_score: 89.8475 + 0.0847172
[30]	cv_agg's techjam_score: 90.1744 + 0.0843779
[40]	cv_agg's techjam_score: 90.4504 + 0.0837397
[50]	cv_agg's techjam_score: 90.6894 + 0.0832409
[60]	cv_agg's techjam_score: 90.8945 + 0.08055
[70]	cv_agg's techjam_score: 91.0698 + 0.0790555
[80]	cv_agg's techjam_score: 91.2157 + 0.0792943
[90]	cv_agg's techjam_score: 91.3422 + 0.0803296
[100]	cv_agg's techjam_score: 91.4488 + 0.0803911
[110]	cv_agg's techjam_score: 91.537 + 0.0805745
[120]	cv_agg's techjam_score: 91.6149 + 0.0809748
[130]	cv_agg's techjam_score: 91.6818 + 0.0813358
[140]	cv_agg's techjam_score: 91.7386 + 0.0812439
[150]	cv_agg's techjam_score: 91.7867 + 0.0818654
[160]	cv_agg's techjam_score: 91.8276 + 0.0822483
[170]	cv_agg's techjam_score: 91.8632 + 0.0821245
[180]	cv_agg's techjam_score: 91.8926 + 0.0828241
[190]	cv_agg's techjam_score: 91.9171 + 0.0822635
[200]	cv_agg's techjam_score: 91.9369 + 0.0813582
[210]	cv_agg

[110]	cv_agg's techjam_score: 91.5409 + 0.0800151
[120]	cv_agg's techjam_score: 91.6167 + 0.0810063
[130]	cv_agg's techjam_score: 91.6819 + 0.0815286
[140]	cv_agg's techjam_score: 91.7376 + 0.0810022
[150]	cv_agg's techjam_score: 91.7849 + 0.0800811
[160]	cv_agg's techjam_score: 91.824 + 0.0805921
[170]	cv_agg's techjam_score: 91.8576 + 0.0807031
[180]	cv_agg's techjam_score: 91.8852 + 0.0802199
[190]	cv_agg's techjam_score: 91.909 + 0.0803653
[200]	cv_agg's techjam_score: 91.9285 + 0.0813981
[210]	cv_agg's techjam_score: 91.9455 + 0.0818003
[220]	cv_agg's techjam_score: 91.9575 + 0.0826789
[230]	cv_agg's techjam_score: 91.9697 + 0.0829923
[240]	cv_agg's techjam_score: 91.9789 + 0.0830233
[250]	cv_agg's techjam_score: 91.9854 + 0.0838525
[260]	cv_agg's techjam_score: 91.9903 + 0.084479
[270]	cv_agg's techjam_score: 91.9952 + 0.0838252
[280]	cv_agg's techjam_score: 92.0002 + 0.0836926
[290]	cv_agg's techjam_score: 92.0032 + 0.0843704
[300]	cv_agg's techjam_score: 92.0059 + 0.0843571
[31

[210]	cv_agg's techjam_score: 91.9404 + 0.0831749
[220]	cv_agg's techjam_score: 91.9531 + 0.0828227
[230]	cv_agg's techjam_score: 91.9648 + 0.0825812
[240]	cv_agg's techjam_score: 91.9738 + 0.0836375
[250]	cv_agg's techjam_score: 91.983 + 0.0836158
[260]	cv_agg's techjam_score: 91.99 + 0.0827741
[270]	cv_agg's techjam_score: 91.9954 + 0.0813545
[280]	cv_agg's techjam_score: 91.9999 + 0.0799753
[290]	cv_agg's techjam_score: 92.0037 + 0.0795931
[300]	cv_agg's techjam_score: 92.0064 + 0.0787076
[310]	cv_agg's techjam_score: 92.0072 + 0.0785226
[320]	cv_agg's techjam_score: 92.0068 + 0.0768624
[330]	cv_agg's techjam_score: 92.0058 + 0.0754882
[340]	cv_agg's techjam_score: 92.0051 + 0.0742236
[350]	cv_agg's techjam_score: 92.0026 + 0.0732868
[360]	cv_agg's techjam_score: 92.0006 + 0.0732087
[10]	cv_agg's techjam_score: 89.4712 + 0.0826981
[20]	cv_agg's techjam_score: 89.8585 + 0.08249
[30]	cv_agg's techjam_score: 90.1876 + 0.0840526
[40]	cv_agg's techjam_score: 90.4691 + 0.0848608
[50]	cv_a

[100]	cv_agg's techjam_score: 91.6492 + 0.0724437
[110]	cv_agg's techjam_score: 91.7377 + 0.0720838
[120]	cv_agg's techjam_score: 91.81 + 0.0709733
[130]	cv_agg's techjam_score: 91.8718 + 0.0700299
[140]	cv_agg's techjam_score: 91.9212 + 0.0694196
[150]	cv_agg's techjam_score: 91.9603 + 0.0695449
[160]	cv_agg's techjam_score: 91.9935 + 0.067342
[170]	cv_agg's techjam_score: 92.0197 + 0.0671566
[180]	cv_agg's techjam_score: 92.04 + 0.0668642
[190]	cv_agg's techjam_score: 92.0564 + 0.0662904
[200]	cv_agg's techjam_score: 92.0685 + 0.0674713
[210]	cv_agg's techjam_score: 92.0771 + 0.0673746
[220]	cv_agg's techjam_score: 92.0831 + 0.0666509
[230]	cv_agg's techjam_score: 92.0871 + 0.0657975
[240]	cv_agg's techjam_score: 92.0905 + 0.0653942
[250]	cv_agg's techjam_score: 92.0915 + 0.0657816
[260]	cv_agg's techjam_score: 92.0914 + 0.0655837
[270]	cv_agg's techjam_score: 92.0902 + 0.0667717
[280]	cv_agg's techjam_score: 92.0887 + 0.067157
[290]	cv_agg's techjam_score: 92.0863 + 0.0685302
[300]	

[80]	cv_agg's techjam_score: 91.415 + 0.0713446
[90]	cv_agg's techjam_score: 91.5412 + 0.0716667
[100]	cv_agg's techjam_score: 91.6456 + 0.0708702
[110]	cv_agg's techjam_score: 91.7333 + 0.070515
[120]	cv_agg's techjam_score: 91.8066 + 0.0701765
[130]	cv_agg's techjam_score: 91.8681 + 0.0694068
[140]	cv_agg's techjam_score: 91.9169 + 0.0692519
[150]	cv_agg's techjam_score: 91.9568 + 0.0690977
[160]	cv_agg's techjam_score: 91.9883 + 0.0678026
[170]	cv_agg's techjam_score: 92.0149 + 0.0655716
[180]	cv_agg's techjam_score: 92.0355 + 0.064882
[190]	cv_agg's techjam_score: 92.0528 + 0.0641839
[200]	cv_agg's techjam_score: 92.0644 + 0.0631657
[210]	cv_agg's techjam_score: 92.0729 + 0.0624788
[220]	cv_agg's techjam_score: 92.0784 + 0.0618406
[230]	cv_agg's techjam_score: 92.0828 + 0.0603208
[240]	cv_agg's techjam_score: 92.0852 + 0.0600049
[250]	cv_agg's techjam_score: 92.0861 + 0.0600406
[260]	cv_agg's techjam_score: 92.0852 + 0.0599966
[270]	cv_agg's techjam_score: 92.0829 + 0.0615733
[280]

[60]	cv_agg's techjam_score: 91.1886 + 0.0767453
[70]	cv_agg's techjam_score: 91.3756 + 0.075841
[80]	cv_agg's techjam_score: 91.5281 + 0.0758557
[90]	cv_agg's techjam_score: 91.6567 + 0.0757828
[100]	cv_agg's techjam_score: 91.7639 + 0.0745378
[110]	cv_agg's techjam_score: 91.8485 + 0.0742801
[120]	cv_agg's techjam_score: 91.92 + 0.0737461
[130]	cv_agg's techjam_score: 91.9776 + 0.0732431
[140]	cv_agg's techjam_score: 92.0227 + 0.0715126
[150]	cv_agg's techjam_score: 92.0586 + 0.0715927
[160]	cv_agg's techjam_score: 92.0884 + 0.0713018
[170]	cv_agg's techjam_score: 92.1119 + 0.071001
[180]	cv_agg's techjam_score: 92.1295 + 0.0701042
[190]	cv_agg's techjam_score: 92.1415 + 0.0688146
[200]	cv_agg's techjam_score: 92.1498 + 0.0691769
[210]	cv_agg's techjam_score: 92.1567 + 0.06859
[220]	cv_agg's techjam_score: 92.1604 + 0.0682423
[230]	cv_agg's techjam_score: 92.1617 + 0.0687582
[240]	cv_agg's techjam_score: 92.161 + 0.0685141
[250]	cv_agg's techjam_score: 92.1608 + 0.0682356
[260]	cv_ag

[50]	cv_agg's techjam_score: 90.977 + 0.0757444
[60]	cv_agg's techjam_score: 91.1978 + 0.0752562
[70]	cv_agg's techjam_score: 91.3828 + 0.073379
[80]	cv_agg's techjam_score: 91.535 + 0.0724415
[90]	cv_agg's techjam_score: 91.6608 + 0.0724154
[100]	cv_agg's techjam_score: 91.7665 + 0.0725083
[110]	cv_agg's techjam_score: 91.8508 + 0.0727475
[120]	cv_agg's techjam_score: 91.9213 + 0.0737107
[130]	cv_agg's techjam_score: 91.9785 + 0.0729348
[140]	cv_agg's techjam_score: 92.0248 + 0.0718308
[150]	cv_agg's techjam_score: 92.06 + 0.0713294
[160]	cv_agg's techjam_score: 92.09 + 0.0704888
[170]	cv_agg's techjam_score: 92.1134 + 0.0696918
[180]	cv_agg's techjam_score: 92.1301 + 0.0702346
[190]	cv_agg's techjam_score: 92.1425 + 0.0701664
[200]	cv_agg's techjam_score: 92.152 + 0.0697545
[210]	cv_agg's techjam_score: 92.1589 + 0.0706599
[220]	cv_agg's techjam_score: 92.1621 + 0.0699795
[230]	cv_agg's techjam_score: 92.163 + 0.0698743
[240]	cv_agg's techjam_score: 92.162 + 0.0712213
[250]	cv_agg's 

[30]	cv_agg's techjam_score: 90.4089 + 0.0771266
[40]	cv_agg's techjam_score: 90.7239 + 0.0761598
[50]	cv_agg's techjam_score: 90.9839 + 0.0744326
[60]	cv_agg's techjam_score: 91.2014 + 0.0706242
[70]	cv_agg's techjam_score: 91.3855 + 0.0703979
[80]	cv_agg's techjam_score: 91.5364 + 0.0708605
[90]	cv_agg's techjam_score: 91.6624 + 0.0700373
[100]	cv_agg's techjam_score: 91.7665 + 0.069889
[110]	cv_agg's techjam_score: 91.8499 + 0.0697328
[120]	cv_agg's techjam_score: 91.919 + 0.0708116
[130]	cv_agg's techjam_score: 91.9756 + 0.0710162
[140]	cv_agg's techjam_score: 92.0195 + 0.0697332
[150]	cv_agg's techjam_score: 92.0555 + 0.0682021
[160]	cv_agg's techjam_score: 92.0828 + 0.0678524
[170]	cv_agg's techjam_score: 92.1042 + 0.0673919
[180]	cv_agg's techjam_score: 92.1197 + 0.0665204
[190]	cv_agg's techjam_score: 92.1317 + 0.0669297
[200]	cv_agg's techjam_score: 92.1399 + 0.0658571
[210]	cv_agg's techjam_score: 92.1455 + 0.0658975
[220]	cv_agg's techjam_score: 92.148 + 0.0657585
[230]	cv_a

[50]	cv_agg's techjam_score: 91.0313 + 0.0804117
[60]	cv_agg's techjam_score: 91.256 + 0.0799951
[70]	cv_agg's techjam_score: 91.4434 + 0.0778401
[80]	cv_agg's techjam_score: 91.5947 + 0.0773087
[90]	cv_agg's techjam_score: 91.7217 + 0.0756724
[100]	cv_agg's techjam_score: 91.827 + 0.0749414
[110]	cv_agg's techjam_score: 91.9109 + 0.0747834
[120]	cv_agg's techjam_score: 91.9792 + 0.0740918
[130]	cv_agg's techjam_score: 92.034 + 0.0737009
[140]	cv_agg's techjam_score: 92.0788 + 0.0725879
[150]	cv_agg's techjam_score: 92.1135 + 0.0720501
[160]	cv_agg's techjam_score: 92.1403 + 0.0714816
[170]	cv_agg's techjam_score: 92.1609 + 0.0705011
[180]	cv_agg's techjam_score: 92.1754 + 0.0699633
[190]	cv_agg's techjam_score: 92.1856 + 0.0691303
[200]	cv_agg's techjam_score: 92.1934 + 0.0686767
[210]	cv_agg's techjam_score: 92.1979 + 0.0684308
[220]	cv_agg's techjam_score: 92.1997 + 0.0681734
[230]	cv_agg's techjam_score: 92.1985 + 0.0680451
[240]	cv_agg's techjam_score: 92.1969 + 0.0687344
[250]	cv

[150]	cv_agg's techjam_score: 92.1113 + 0.0751502
[160]	cv_agg's techjam_score: 92.1375 + 0.0737745
[170]	cv_agg's techjam_score: 92.1576 + 0.0725659
[180]	cv_agg's techjam_score: 92.1707 + 0.0725649
[190]	cv_agg's techjam_score: 92.18 + 0.0729307
[200]	cv_agg's techjam_score: 92.1866 + 0.0735477
[210]	cv_agg's techjam_score: 92.1903 + 0.0736424
[220]	cv_agg's techjam_score: 92.1919 + 0.0741211
[230]	cv_agg's techjam_score: 92.192 + 0.0738037
[240]	cv_agg's techjam_score: 92.1901 + 0.0737014
[250]	cv_agg's techjam_score: 92.1881 + 0.0732615
[260]	cv_agg's techjam_score: 92.1837 + 0.0730619
[270]	cv_agg's techjam_score: 92.179 + 0.0725361
[10]	cv_agg's techjam_score: 89.592 + 0.0822182
[20]	cv_agg's techjam_score: 90.0629 + 0.0801863
[30]	cv_agg's techjam_score: 90.4558 + 0.0782463
[40]	cv_agg's techjam_score: 90.7782 + 0.0763325
[50]	cv_agg's techjam_score: 91.045 + 0.0747362
[60]	cv_agg's techjam_score: 91.267 + 0.0728248
[70]	cv_agg's techjam_score: 91.4502 + 0.0721042
[80]	cv_agg's 

[20]	cv_agg's techjam_score: 90.067 + 0.08365
[30]	cv_agg's techjam_score: 90.4619 + 0.0825458
[40]	cv_agg's techjam_score: 90.7861 + 0.0819503
[50]	cv_agg's techjam_score: 91.0546 + 0.0824092
[60]	cv_agg's techjam_score: 91.2799 + 0.0803158
[70]	cv_agg's techjam_score: 91.4681 + 0.0786184
[80]	cv_agg's techjam_score: 91.6204 + 0.0780484
[90]	cv_agg's techjam_score: 91.7478 + 0.0774119
[100]	cv_agg's techjam_score: 91.8528 + 0.0758367
[110]	cv_agg's techjam_score: 91.9365 + 0.0759382
[120]	cv_agg's techjam_score: 92.0048 + 0.0769586
[130]	cv_agg's techjam_score: 92.0595 + 0.0764926
[140]	cv_agg's techjam_score: 92.1025 + 0.0741372
[150]	cv_agg's techjam_score: 92.1342 + 0.0731726
[160]	cv_agg's techjam_score: 92.1599 + 0.0734793
[170]	cv_agg's techjam_score: 92.1779 + 0.0738742
[180]	cv_agg's techjam_score: 92.1909 + 0.0738374
[190]	cv_agg's techjam_score: 92.2011 + 0.0734509
[200]	cv_agg's techjam_score: 92.2075 + 0.073286
[210]	cv_agg's techjam_score: 92.2109 + 0.0737336
[220]	cv_agg

[180]	cv_agg's techjam_score: 92.1871 + 0.0770726
[190]	cv_agg's techjam_score: 92.196 + 0.0772896
[200]	cv_agg's techjam_score: 92.2015 + 0.0766672
[210]	cv_agg's techjam_score: 92.2035 + 0.0768222
[220]	cv_agg's techjam_score: 92.2037 + 0.0769639
[230]	cv_agg's techjam_score: 92.2013 + 0.0781795
[240]	cv_agg's techjam_score: 92.1985 + 0.0785518
[250]	cv_agg's techjam_score: 92.1953 + 0.0791301
[260]	cv_agg's techjam_score: 92.1921 + 0.0804628
[10]	cv_agg's techjam_score: 89.6 + 0.0840043
[20]	cv_agg's techjam_score: 90.0794 + 0.0826939
[30]	cv_agg's techjam_score: 90.4761 + 0.0803205
[40]	cv_agg's techjam_score: 90.8025 + 0.0796372
[50]	cv_agg's techjam_score: 91.0704 + 0.0781984
[60]	cv_agg's techjam_score: 91.2923 + 0.0746882
[70]	cv_agg's techjam_score: 91.4755 + 0.0751242
[80]	cv_agg's techjam_score: 91.626 + 0.0731066
[90]	cv_agg's techjam_score: 91.7503 + 0.0727858
[100]	cv_agg's techjam_score: 91.8505 + 0.0727497
[110]	cv_agg's techjam_score: 91.9312 + 0.0722418
[120]	cv_agg's

[70]	cv_agg's techjam_score: 91.478 + 0.081831
[80]	cv_agg's techjam_score: 91.6293 + 0.0811011
[90]	cv_agg's techjam_score: 91.7563 + 0.080149
[100]	cv_agg's techjam_score: 91.8604 + 0.0779711
[110]	cv_agg's techjam_score: 91.9429 + 0.0784476
[120]	cv_agg's techjam_score: 92.0105 + 0.0782178
[130]	cv_agg's techjam_score: 92.064 + 0.0790763
[140]	cv_agg's techjam_score: 92.1055 + 0.0781351
[150]	cv_agg's techjam_score: 92.1376 + 0.0767225
[160]	cv_agg's techjam_score: 92.1635 + 0.0763801
[170]	cv_agg's techjam_score: 92.1825 + 0.0759849
[180]	cv_agg's techjam_score: 92.1949 + 0.0743407
[190]	cv_agg's techjam_score: 92.2047 + 0.0742746
[200]	cv_agg's techjam_score: 92.21 + 0.0737343
[210]	cv_agg's techjam_score: 92.2127 + 0.0736183
[220]	cv_agg's techjam_score: 92.2121 + 0.0755198
[230]	cv_agg's techjam_score: 92.21 + 0.0775076
[240]	cv_agg's techjam_score: 92.2069 + 0.0783458
[250]	cv_agg's techjam_score: 92.2048 + 0.0793588
[260]	cv_agg's techjam_score: 92.2018 + 0.0793319
[10]	cv_agg

[230]	cv_agg's techjam_score: 92.1998 + 0.0873579
[240]	cv_agg's techjam_score: 92.1975 + 0.0874978
[250]	cv_agg's techjam_score: 92.1955 + 0.0876669
[260]	cv_agg's techjam_score: 92.1916 + 0.0881778
[10]	cv_agg's techjam_score: 89.6039 + 0.083358
[20]	cv_agg's techjam_score: 90.085 + 0.0831248
[30]	cv_agg's techjam_score: 90.4829 + 0.0808845
[40]	cv_agg's techjam_score: 90.8106 + 0.0792226
[50]	cv_agg's techjam_score: 91.0801 + 0.0793244
[60]	cv_agg's techjam_score: 91.3024 + 0.0772905
[70]	cv_agg's techjam_score: 91.4852 + 0.0759668
[80]	cv_agg's techjam_score: 91.6356 + 0.0764685
[90]	cv_agg's techjam_score: 91.7584 + 0.0762752
[100]	cv_agg's techjam_score: 91.8587 + 0.0766003
[110]	cv_agg's techjam_score: 91.938 + 0.0757744
[120]	cv_agg's techjam_score: 92.0035 + 0.0778935
[130]	cv_agg's techjam_score: 92.0549 + 0.0800671
[140]	cv_agg's techjam_score: 92.095 + 0.0817522
[150]	cv_agg's techjam_score: 92.1272 + 0.082634
[160]	cv_agg's techjam_score: 92.152 + 0.081998
[170]	cv_agg's t

[110]	cv_agg's techjam_score: 91.9473 + 0.0789505
[120]	cv_agg's techjam_score: 92.0156 + 0.0775698
[130]	cv_agg's techjam_score: 92.0696 + 0.07709
[140]	cv_agg's techjam_score: 92.111 + 0.0755502
[150]	cv_agg's techjam_score: 92.1419 + 0.0761355
[160]	cv_agg's techjam_score: 92.1675 + 0.0758408
[170]	cv_agg's techjam_score: 92.1864 + 0.0762822
[180]	cv_agg's techjam_score: 92.1992 + 0.0744622
[190]	cv_agg's techjam_score: 92.2093 + 0.0740626
[200]	cv_agg's techjam_score: 92.2153 + 0.0739006
[210]	cv_agg's techjam_score: 92.2166 + 0.0730913
[220]	cv_agg's techjam_score: 92.2161 + 0.0737752
[230]	cv_agg's techjam_score: 92.2154 + 0.073854
[240]	cv_agg's techjam_score: 92.2124 + 0.073731
[250]	cv_agg's techjam_score: 92.2099 + 0.0752662
[260]	cv_agg's techjam_score: 92.206 + 0.0754457
[10]	cv_agg's techjam_score: 89.6035 + 0.0828857
[20]	cv_agg's techjam_score: 90.0863 + 0.0822122
[30]	cv_agg's techjam_score: 90.4839 + 0.0804226
[40]	cv_agg's techjam_score: 90.8135 + 0.0796331
[50]	cv_ag

[240]	cv_agg's techjam_score: 92.2014 + 0.0824439
[250]	cv_agg's techjam_score: 92.1994 + 0.0834422
[260]	cv_agg's techjam_score: 92.1964 + 0.0830231
[10]	cv_agg's techjam_score: 89.6075 + 0.0828848
[20]	cv_agg's techjam_score: 90.0907 + 0.0822433
[30]	cv_agg's techjam_score: 90.4885 + 0.0798466
[40]	cv_agg's techjam_score: 90.815 + 0.0807829
[50]	cv_agg's techjam_score: 91.0857 + 0.0797603
[60]	cv_agg's techjam_score: 91.3096 + 0.0795341
[70]	cv_agg's techjam_score: 91.491 + 0.0786258
[80]	cv_agg's techjam_score: 91.6408 + 0.0789625
[90]	cv_agg's techjam_score: 91.7622 + 0.0804384
[100]	cv_agg's techjam_score: 91.861 + 0.0816479
[110]	cv_agg's techjam_score: 91.9425 + 0.0811742
[120]	cv_agg's techjam_score: 92.0072 + 0.0829493
[130]	cv_agg's techjam_score: 92.0574 + 0.0850274
[140]	cv_agg's techjam_score: 92.0982 + 0.0863942
[150]	cv_agg's techjam_score: 92.129 + 0.0888878
[160]	cv_agg's techjam_score: 92.1524 + 0.0896686
[170]	cv_agg's techjam_score: 92.1689 + 0.0893104
[180]	cv_agg'

[120]	cv_agg's techjam_score: 92.0201 + 0.0785585
[130]	cv_agg's techjam_score: 92.0743 + 0.0781982
[140]	cv_agg's techjam_score: 92.1159 + 0.0768346
[150]	cv_agg's techjam_score: 92.1477 + 0.075361
[160]	cv_agg's techjam_score: 92.1727 + 0.0749124
[170]	cv_agg's techjam_score: 92.1915 + 0.074215
[180]	cv_agg's techjam_score: 92.206 + 0.0732856
[190]	cv_agg's techjam_score: 92.2154 + 0.0740337
[200]	cv_agg's techjam_score: 92.2202 + 0.0744446
[210]	cv_agg's techjam_score: 92.2225 + 0.0741933
[220]	cv_agg's techjam_score: 92.2225 + 0.0750111
[230]	cv_agg's techjam_score: 92.2215 + 0.076022
[240]	cv_agg's techjam_score: 92.219 + 0.0768812
[250]	cv_agg's techjam_score: 92.2169 + 0.0774725
[260]	cv_agg's techjam_score: 92.2129 + 0.0770281
[10]	cv_agg's techjam_score: 89.6046 + 0.0836693
[20]	cv_agg's techjam_score: 90.0889 + 0.0842022
[30]	cv_agg's techjam_score: 90.4864 + 0.0820041
[40]	cv_agg's techjam_score: 90.8172 + 0.081105
[50]	cv_agg's techjam_score: 91.0872 + 0.0803837
[60]	cv_agg

[10]	cv_agg's techjam_score: 89.6086 + 0.0833254
[20]	cv_agg's techjam_score: 90.0941 + 0.0823366
[30]	cv_agg's techjam_score: 90.4942 + 0.0807913
[40]	cv_agg's techjam_score: 90.8217 + 0.0791361
[50]	cv_agg's techjam_score: 91.0928 + 0.0790033
[60]	cv_agg's techjam_score: 91.3146 + 0.0779905
[70]	cv_agg's techjam_score: 91.4955 + 0.0787254
[80]	cv_agg's techjam_score: 91.6469 + 0.0774077
[90]	cv_agg's techjam_score: 91.7692 + 0.0784749
[100]	cv_agg's techjam_score: 91.8685 + 0.0806651
[110]	cv_agg's techjam_score: 91.9499 + 0.0812336
[120]	cv_agg's techjam_score: 92.0143 + 0.0819545
[130]	cv_agg's techjam_score: 92.0646 + 0.0826623
[140]	cv_agg's techjam_score: 92.1035 + 0.0827248
[150]	cv_agg's techjam_score: 92.134 + 0.0825011
[160]	cv_agg's techjam_score: 92.1569 + 0.0815732
[170]	cv_agg's techjam_score: 92.173 + 0.0811152
[180]	cv_agg's techjam_score: 92.1854 + 0.0825184
[190]	cv_agg's techjam_score: 92.193 + 0.082325
[200]	cv_agg's techjam_score: 92.1982 + 0.0833831
[210]	cv_agg'

[170]	cv_agg's techjam_score: 92.1964 + 0.0761458
[180]	cv_agg's techjam_score: 92.2104 + 0.0762543
[190]	cv_agg's techjam_score: 92.2188 + 0.0767706
[200]	cv_agg's techjam_score: 92.2241 + 0.0765052
[210]	cv_agg's techjam_score: 92.2255 + 0.0766527
[220]	cv_agg's techjam_score: 92.2249 + 0.0779618
[230]	cv_agg's techjam_score: 92.2233 + 0.0782208
[240]	cv_agg's techjam_score: 92.2223 + 0.0783145
[250]	cv_agg's techjam_score: 92.2195 + 0.0790591
[10]	cv_agg's techjam_score: 89.6077 + 0.0829885
[20]	cv_agg's techjam_score: 90.0921 + 0.0848253
[30]	cv_agg's techjam_score: 90.4918 + 0.0846346
[40]	cv_agg's techjam_score: 90.823 + 0.0845827
[50]	cv_agg's techjam_score: 91.0943 + 0.0837481
[60]	cv_agg's techjam_score: 91.3172 + 0.0825606
[70]	cv_agg's techjam_score: 91.5036 + 0.0814823
[80]	cv_agg's techjam_score: 91.6547 + 0.080213
[90]	cv_agg's techjam_score: 91.7779 + 0.0824963
[100]	cv_agg's techjam_score: 91.8804 + 0.0815199
[110]	cv_agg's techjam_score: 91.9594 + 0.0825683
[120]	cv_ag

[70]	cv_agg's techjam_score: 91.5019 + 0.0775631
[80]	cv_agg's techjam_score: 91.6515 + 0.0775555
[90]	cv_agg's techjam_score: 91.7744 + 0.0796253
[100]	cv_agg's techjam_score: 91.8746 + 0.0809206
[110]	cv_agg's techjam_score: 91.9535 + 0.0831137
[120]	cv_agg's techjam_score: 92.0188 + 0.0856203
[130]	cv_agg's techjam_score: 92.0701 + 0.0855341
[140]	cv_agg's techjam_score: 92.1082 + 0.0863402
[150]	cv_agg's techjam_score: 92.1377 + 0.0867222
[160]	cv_agg's techjam_score: 92.159 + 0.0869771
[170]	cv_agg's techjam_score: 92.1762 + 0.0875478
[180]	cv_agg's techjam_score: 92.1868 + 0.0889633
[190]	cv_agg's techjam_score: 92.1934 + 0.0893387
[200]	cv_agg's techjam_score: 92.1961 + 0.0901323
[210]	cv_agg's techjam_score: 92.1981 + 0.0928639
[220]	cv_agg's techjam_score: 92.1967 + 0.0940706
[230]	cv_agg's techjam_score: 92.1963 + 0.0947565
[240]	cv_agg's techjam_score: 92.194 + 0.0960102
[250]	cv_agg's techjam_score: 92.1898 + 0.0973467
[260]	cv_agg's techjam_score: 92.1851 + 0.0969557
[10]	

[200]	cv_agg's techjam_score: 92.2295 + 0.0835239
[210]	cv_agg's techjam_score: 92.2333 + 0.0835653
[220]	cv_agg's techjam_score: 92.2337 + 0.0840873
[230]	cv_agg's techjam_score: 92.234 + 0.0846372
[240]	cv_agg's techjam_score: 92.2324 + 0.0846986
[250]	cv_agg's techjam_score: 92.2306 + 0.0846641
[260]	cv_agg's techjam_score: 92.2264 + 0.0839806
[270]	cv_agg's techjam_score: 92.2223 + 0.0833303
[280]	cv_agg's techjam_score: 92.2172 + 0.0837059
[10]	cv_agg's techjam_score: 89.6082 + 0.0852809
[20]	cv_agg's techjam_score: 90.0951 + 0.085629
[30]	cv_agg's techjam_score: 90.4948 + 0.085501
[40]	cv_agg's techjam_score: 90.8278 + 0.0858125
[50]	cv_agg's techjam_score: 91.0976 + 0.0849516
[60]	cv_agg's techjam_score: 91.3214 + 0.0829584
[70]	cv_agg's techjam_score: 91.5066 + 0.0823631
[80]	cv_agg's techjam_score: 91.6584 + 0.0822373
[90]	cv_agg's techjam_score: 91.7809 + 0.081613
[100]	cv_agg's techjam_score: 91.8823 + 0.0816223
[110]	cv_agg's techjam_score: 91.963 + 0.0825341
[120]	cv_agg's

[30]	cv_agg's techjam_score: 90.5033 + 0.0834467
[40]	cv_agg's techjam_score: 90.8335 + 0.082598
[50]	cv_agg's techjam_score: 91.1042 + 0.081922
[60]	cv_agg's techjam_score: 91.3263 + 0.0800313
[70]	cv_agg's techjam_score: 91.5095 + 0.0799204
[80]	cv_agg's techjam_score: 91.6587 + 0.0801577
[90]	cv_agg's techjam_score: 91.7804 + 0.0787946
[100]	cv_agg's techjam_score: 91.8804 + 0.0806531
[110]	cv_agg's techjam_score: 91.9603 + 0.0821652
[120]	cv_agg's techjam_score: 92.0228 + 0.0836061
[130]	cv_agg's techjam_score: 92.0735 + 0.0848294
[140]	cv_agg's techjam_score: 92.1133 + 0.0859128
[150]	cv_agg's techjam_score: 92.1444 + 0.0875665
[160]	cv_agg's techjam_score: 92.1674 + 0.0876164
[170]	cv_agg's techjam_score: 92.1839 + 0.0884296
[180]	cv_agg's techjam_score: 92.1953 + 0.0891104
[190]	cv_agg's techjam_score: 92.203 + 0.0901514
[200]	cv_agg's techjam_score: 92.2055 + 0.0922499
[210]	cv_agg's techjam_score: 92.2072 + 0.0928214
[220]	cv_agg's techjam_score: 92.2059 + 0.0951884
[230]	cv_a

[170]	cv_agg's techjam_score: 92.1955 + 0.0820316
[180]	cv_agg's techjam_score: 92.2087 + 0.0827388
[190]	cv_agg's techjam_score: 92.2159 + 0.0832703
[200]	cv_agg's techjam_score: 92.2202 + 0.082951
[210]	cv_agg's techjam_score: 92.2224 + 0.0817667
[220]	cv_agg's techjam_score: 92.2217 + 0.0802416
[230]	cv_agg's techjam_score: 92.2224 + 0.0814707
[240]	cv_agg's techjam_score: 92.2193 + 0.081089
[250]	cv_agg's techjam_score: 92.2163 + 0.0805918
[260]	cv_agg's techjam_score: 92.2116 + 0.0801569
[10]	cv_agg's techjam_score: 89.6092 + 0.0849049
[20]	cv_agg's techjam_score: 90.0972 + 0.0864859
[30]	cv_agg's techjam_score: 90.498 + 0.0840457
[40]	cv_agg's techjam_score: 90.8315 + 0.0828738
[50]	cv_agg's techjam_score: 91.1022 + 0.0816469
[60]	cv_agg's techjam_score: 91.3273 + 0.0796636
[70]	cv_agg's techjam_score: 91.5114 + 0.0783795
[80]	cv_agg's techjam_score: 91.661 + 0.0787165
[90]	cv_agg's techjam_score: 91.786 + 0.0790505
[100]	cv_agg's techjam_score: 91.8872 + 0.07914
[110]	cv_agg's t

[60]	cv_agg's techjam_score: 91.3277 + 0.084608
[70]	cv_agg's techjam_score: 91.5101 + 0.0843112
[80]	cv_agg's techjam_score: 91.6612 + 0.0855333
[90]	cv_agg's techjam_score: 91.783 + 0.085263
[100]	cv_agg's techjam_score: 91.8829 + 0.0855091
[110]	cv_agg's techjam_score: 91.9619 + 0.0865071
[120]	cv_agg's techjam_score: 92.0253 + 0.0868351
[130]	cv_agg's techjam_score: 92.0754 + 0.0880094
[140]	cv_agg's techjam_score: 92.1142 + 0.0894132
[150]	cv_agg's techjam_score: 92.1443 + 0.0899092
[160]	cv_agg's techjam_score: 92.1655 + 0.0911334
[170]	cv_agg's techjam_score: 92.1821 + 0.0911208
[180]	cv_agg's techjam_score: 92.1925 + 0.0902871
[190]	cv_agg's techjam_score: 92.199 + 0.0901194
[200]	cv_agg's techjam_score: 92.2031 + 0.0910534
[210]	cv_agg's techjam_score: 92.2041 + 0.0911662
[220]	cv_agg's techjam_score: 92.2042 + 0.0923964
[230]	cv_agg's techjam_score: 92.2035 + 0.0917565
[240]	cv_agg's techjam_score: 92.2001 + 0.0940348
[250]	cv_agg's techjam_score: 92.1963 + 0.0938131
[260]	cv

In [9]:
sort_cv_result = sorted(cv_results, key=lambda tup:tup[1])

In [11]:
sort_cv_result[-1]

({'boosting_type': 'gbdt',
  'objective': 'mape',
  'metrics': 'None',
  'num_leaves': 511,
  'feature_fraction': 0.6,
  'bagging_fraction': 1.0,
  'learning_rate': 0.01,
  'num_boost_round': 234},
 92.23441356231982)

In [None]:
#select parameter score > 92.21

In [14]:
### select best 10 models
MODELS=[] 
for params_and_score in tqdm_notebook(sort_cv_result[-10:]):
    params = params_and_score[0]
    model = lightgbm.train(params,
                train_data,
               )
    MODELS.append(model)

HBox(children=(IntProgress(value=0, max=10), HTML(value='')))






In [15]:
### ensemble 10 models 
pred = []
for model in MODELS:
    y_pred = model.predict(X_test)
    y_pred = np.exp(y_pred)
    pred.append(y_pred)

In [16]:
pred=np.array(pred)
# perform ensemble
final_pred = pred.mean(axis=0)


In [28]:
### Create submission dataframe
submission = pd.DataFrame()
submission['id'] = [i for i in range(50001,65001)] 
submission['final_pred'] = final_pred