In [1]:
import pandas as pd
import numpy as np
import xgboost as xgb
import datetime


input_path = '.'

def load_order_data(file_name):
    df = pd.read_csv('%s/%s' % (input_path, file_name))
    c = 'order_unix_time'
    mask = pd.notnull(df[c])
    df.loc[mask, c] = df.loc[mask, c].apply(lambda x: datetime.datetime.fromtimestamp(x))
    df.loc[mask, 'date'] = df.loc[mask, c].apply(lambda x: x.strftime('%Y%m%d'))
    df.loc[mask, 'hour'] = df.loc[mask, c].apply(lambda x: x.hour)
    df.loc[mask, 'minute'] = df.loc[mask, c].apply(lambda x: x.minute)
    return df

def load_area_data(file_name):
    df = pd.read_csv('%s/%s' % (input_path, file_name), dtype={'date': str, 'time': str})
    mask = pd.notnull(df['time'])
    df.loc[mask, 'hour'] = df.loc[mask, 'time'].apply(lambda x: int(x[:2]))
    df.loc[mask, 'minute'] = df.loc[mask, 'time'].apply(lambda x: int(x[2:]))
    df.drop(['log_unix_time', 'time'], axis=1, inplace=True)
    return df

print('loading data...')
df_tr = load_order_data('./all_data/waybill_info.csv')
mask = (df_tr.delivery_duration < 4654.0) & (df_tr.delivery_duration > 663.0) & ((df_tr.hour.values == 11) | (df_tr.hour.values == 17))
df_tr = df_tr.loc[mask]
df_te = load_order_data('./all_data/waybill_info_test_b.csv')

df_tr_weather = load_area_data('./all_data/weather_realtime.csv')
df_te_weather = load_area_data('./all_data/weather_realtime_test.csv')

df_tr_area = load_area_data('./all_data/area_realtime.csv')
df_te_area = load_area_data('./all_data/area_realtime_test.csv')

print('merging data...')
df_tr = pd.merge(df_tr, df_tr_weather, on=['date', 'hour', 'minute', 'area_id'], how='left')
df_tr = pd.merge(df_tr, df_tr_area, on=['date', 'hour', 'minute', 'area_id'], how='left')

df_te = pd.merge(df_te, df_te_weather, on=['date', 'hour', 'minute', 'area_id'], how='left')
df_te = pd.merge(df_te, df_te_area, on=['date', 'hour', 'minute', 'area_id'], how='left')

print('constructing training data...')
cols = df_tr.columns.tolist()
to_drop = ['order_unix_time', 'arriveshop_unix_time', 'fetch_unix_time', 'finish_unix_time', 'order_id', 'delivery_duration', 'date']
features = list(np.setdiff1d(cols, to_drop))
print(features)


loading data...
merging data...
constructing training data...
['area_id', 'box_total_value', 'customer_latitude', 'customer_longitude', 'delivery_distance', 'deliverying_order_num', 'food_num', 'food_total_value', 'hour', 'minute', 'not_fetched_order_num', 'notbusy_working_rider_num', 'poi_id', 'poi_lat', 'poi_lng', 'rain', 'temperature', 'waiting_order_num', 'wind', 'working_rider_num']


baseline去除的标签：订单下单时刻、骑手到店时间、骑手取餐时间、骑手送达时间
baseline使用的标签：配送区域id、餐盒费、用户纬度、用户经度、配送导航距离、区域取餐未送达单量、菜品数量、订单价格（原价）、天气时间小时、分钟、区域未取餐量、不忙的区域骑手数、商家id、商家维度、经度、雨量、温度、下单时刻商户未完成单量、风速、包含忙的区域骑手数

增加计算标签：骑手到点时间-订单下单时刻  测试时没有该属性。可定义成店铺属性，与总下达时间相互辅助
计算商铺与用户的距离，看是否有帮助---已有配送导航距离，是否需要计算直接距离，这样可知道路线复杂度


In [2]:
print('constructing training data...')
cols = df_tr.columns.tolist()
to_drop = ['order_unix_time', 'arriveshop_unix_time', 'fetch_unix_time', 'finish_unix_time', 'order_id', 'delivery_duration', 'date']
features = list(np.setdiff1d(cols, to_drop))
print(features)

constructing training data...
['area_id', 'box_total_value', 'customer_latitude', 'customer_longitude', 'delivery_distance', 'deliverying_order_num', 'food_num', 'food_total_value', 'hour', 'minute', 'not_fetched_order_num', 'notbusy_working_rider_num', 'poi_id', 'poi_lat', 'poi_lng', 'rain', 'temperature', 'waiting_order_num', 'wind', 'working_rider_num']


baselineMAE：368   num_round = 20000
split baselineMAE: 364

In [3]:
import random
x_train = df_tr[features]
y_train = df_tr['delivery_duration']
g_train_ratio = 0.9
image_num = len(x_train)
train_val_split = list(range(image_num))
random.seed(9527) # seed random with a fixed number
random.shuffle(train_val_split)
val_num = int(image_num*(1-g_train_ratio))
val_id = train_val_split[0:val_num]
train_id = train_val_split[val_num:image_num]
X_train = x_train.iloc[train_id,:]
Y_train = y_train[train_id]
X_val = x_train.iloc[val_id,:]
Y_val = y_train[val_id]

dtrain = xgb.DMatrix(X_train.values, Y_train)

print('training model...')
watchlist = [(dtrain, 'train')]
param = {
        'booster': 'gbtree',
        'objective': 'reg:linear',
        'eval_metric': 'mae',
        'eta': 0.01,
        'num_round': 20000,
        'colsample_bytree': 0.65,
        'subsample': 0.8,
        'max_depth': 5,
        'nthread': -1,
        'seed': 20171001,
        'silent': 1,
    }
bst = xgb.train(param, dtrain, param['num_round'], watchlist, verbose_eval=10)

In [4]:
import lightgbm as lgb

lgb_train = lgb.Dataset(X_train.values, Y_train.values)
lgb_eval = lgb.Dataset(X_val.values, Y_val.values,reference=lgb_train)

# 02. parameters

params = {
    'boosting_type': 'gbdt',
    'objective': 'regression_l1',
    'metric': 'mae',
    'num_leaves': 31,
    'learning_rate': 0.001,
    'feature_fraction': 0.65,
    'bagging_fraction': 0.8,
    'bagging_freq': 5,
    'seed': 20171023,
    'verbose': 0
}

# train
early_stopping_callback = lgb.callback.early_stopping(100)

gbm = lgb.train(params,
                lgb_train,
                num_boost_round=100000,
                valid_sets=lgb_eval,
                callbacks=[early_stopping_callback],
                verbose_eval=40
               )


Training until validation scores don't improve for 100 rounds.
[40]	valid_0's l1: 682.479
[80]	valid_0's l1: 670.602
[120]	valid_0's l1: 659.216
[160]	valid_0's l1: 648.399
[200]	valid_0's l1: 638.209
[240]	valid_0's l1: 628.536
[280]	valid_0's l1: 619.558
[320]	valid_0's l1: 610.965
[360]	valid_0's l1: 602.825
[400]	valid_0's l1: 595.196
[440]	valid_0's l1: 587.8
[480]	valid_0's l1: 580.903
[520]	valid_0's l1: 574.478
[560]	valid_0's l1: 568.191
[600]	valid_0's l1: 562.405
[640]	valid_0's l1: 556.791
[680]	valid_0's l1: 551.575
[720]	valid_0's l1: 546.601
[760]	valid_0's l1: 541.917
[800]	valid_0's l1: 537.442
[840]	valid_0's l1: 533.292
[880]	valid_0's l1: 529.192
[920]	valid_0's l1: 525.396
[960]	valid_0's l1: 521.677
[1000]	valid_0's l1: 518.187
[1040]	valid_0's l1: 514.839
[1080]	valid_0's l1: 511.689
[1120]	valid_0's l1: 508.636
[1160]	valid_0's l1: 505.825
[1200]	valid_0's l1: 503.09
[1240]	valid_0's l1: 500.498
[1280]	valid_0's l1: 497.979
[1320]	valid_0's l1: 495.597
[1360]	va

[11320]	valid_0's l1: 415.602
[11360]	valid_0's l1: 415.553
[11400]	valid_0's l1: 415.51
[11440]	valid_0's l1: 415.473
[11480]	valid_0's l1: 415.435
[11520]	valid_0's l1: 415.401
[11560]	valid_0's l1: 415.359
[11600]	valid_0's l1: 415.315
[11640]	valid_0's l1: 415.275
[11680]	valid_0's l1: 415.231
[11720]	valid_0's l1: 415.193
[11760]	valid_0's l1: 415.157
[11800]	valid_0's l1: 415.117
[11840]	valid_0's l1: 415.084
[11880]	valid_0's l1: 415.053
[11920]	valid_0's l1: 415.017
[11960]	valid_0's l1: 414.978
[12000]	valid_0's l1: 414.948
[12040]	valid_0's l1: 414.905
[12080]	valid_0's l1: 414.867
[12120]	valid_0's l1: 414.839
[12160]	valid_0's l1: 414.805
[12200]	valid_0's l1: 414.77
[12240]	valid_0's l1: 414.742
[12280]	valid_0's l1: 414.705
[12320]	valid_0's l1: 414.673
[12360]	valid_0's l1: 414.642
[12400]	valid_0's l1: 414.604
[12440]	valid_0's l1: 414.569
[12480]	valid_0's l1: 414.539
[12520]	valid_0's l1: 414.504
[12560]	valid_0's l1: 414.469
[12600]	valid_0's l1: 414.438
[12640]	vali

[22280]	valid_0's l1: 409.832
[22320]	valid_0's l1: 409.821
[22360]	valid_0's l1: 409.81
[22400]	valid_0's l1: 409.802
[22440]	valid_0's l1: 409.793
[22480]	valid_0's l1: 409.779
[22520]	valid_0's l1: 409.771
[22560]	valid_0's l1: 409.758
[22600]	valid_0's l1: 409.749
[22640]	valid_0's l1: 409.738
[22680]	valid_0's l1: 409.727
[22720]	valid_0's l1: 409.718
[22760]	valid_0's l1: 409.705
[22800]	valid_0's l1: 409.694
[22840]	valid_0's l1: 409.684
[22880]	valid_0's l1: 409.675
[22920]	valid_0's l1: 409.659
[22960]	valid_0's l1: 409.644
[23000]	valid_0's l1: 409.63
[23040]	valid_0's l1: 409.615
[23080]	valid_0's l1: 409.604
[23120]	valid_0's l1: 409.594
[23160]	valid_0's l1: 409.582
[23200]	valid_0's l1: 409.573
[23240]	valid_0's l1: 409.558
[23280]	valid_0's l1: 409.548
[23320]	valid_0's l1: 409.536
[23360]	valid_0's l1: 409.528
[23400]	valid_0's l1: 409.513
[23440]	valid_0's l1: 409.504
[23480]	valid_0's l1: 409.496
[23520]	valid_0's l1: 409.489
[23560]	valid_0's l1: 409.479
[23600]	vali

[33240]	valid_0's l1: 407.58
[33280]	valid_0's l1: 407.57
[33320]	valid_0's l1: 407.56
[33360]	valid_0's l1: 407.557
[33400]	valid_0's l1: 407.548
[33440]	valid_0's l1: 407.544
[33480]	valid_0's l1: 407.539
[33520]	valid_0's l1: 407.537
[33560]	valid_0's l1: 407.532
[33600]	valid_0's l1: 407.527
[33640]	valid_0's l1: 407.523
[33680]	valid_0's l1: 407.517
[33720]	valid_0's l1: 407.51
[33760]	valid_0's l1: 407.507
[33800]	valid_0's l1: 407.503
[33840]	valid_0's l1: 407.498
[33880]	valid_0's l1: 407.493
[33920]	valid_0's l1: 407.488
[33960]	valid_0's l1: 407.486
[34000]	valid_0's l1: 407.481
[34040]	valid_0's l1: 407.475
[34080]	valid_0's l1: 407.47
[34120]	valid_0's l1: 407.466
[34160]	valid_0's l1: 407.463
[34200]	valid_0's l1: 407.455
[34240]	valid_0's l1: 407.449
[34280]	valid_0's l1: 407.443
[34320]	valid_0's l1: 407.438
[34360]	valid_0's l1: 407.432
[34400]	valid_0's l1: 407.426
[34440]	valid_0's l1: 407.421
[34480]	valid_0's l1: 407.415
[34520]	valid_0's l1: 407.412
[34560]	valid_0

[44240]	valid_0's l1: 406.332
[44280]	valid_0's l1: 406.329
[44320]	valid_0's l1: 406.326
[44360]	valid_0's l1: 406.324
[44400]	valid_0's l1: 406.32
[44440]	valid_0's l1: 406.317
[44480]	valid_0's l1: 406.312
[44520]	valid_0's l1: 406.31
[44560]	valid_0's l1: 406.306
[44600]	valid_0's l1: 406.301
[44640]	valid_0's l1: 406.298
[44680]	valid_0's l1: 406.295
[44720]	valid_0's l1: 406.29
[44760]	valid_0's l1: 406.287
[44800]	valid_0's l1: 406.283
[44840]	valid_0's l1: 406.28
[44880]	valid_0's l1: 406.276
[44920]	valid_0's l1: 406.273
[44960]	valid_0's l1: 406.269
[45000]	valid_0's l1: 406.264
[45040]	valid_0's l1: 406.261
[45080]	valid_0's l1: 406.257
[45120]	valid_0's l1: 406.255
[45160]	valid_0's l1: 406.251
[45200]	valid_0's l1: 406.248
[45240]	valid_0's l1: 406.245
[45280]	valid_0's l1: 406.242
[45320]	valid_0's l1: 406.239
[45360]	valid_0's l1: 406.237
[45400]	valid_0's l1: 406.233
[45440]	valid_0's l1: 406.23
[45480]	valid_0's l1: 406.227
[45520]	valid_0's l1: 406.224
[45560]	valid_0

[55240]	valid_0's l1: 405.509
[55280]	valid_0's l1: 405.507
[55320]	valid_0's l1: 405.505
[55360]	valid_0's l1: 405.502
[55400]	valid_0's l1: 405.5
[55440]	valid_0's l1: 405.496
[55480]	valid_0's l1: 405.495
[55520]	valid_0's l1: 405.492
[55560]	valid_0's l1: 405.49
[55600]	valid_0's l1: 405.488
[55640]	valid_0's l1: 405.486
[55680]	valid_0's l1: 405.484
[55720]	valid_0's l1: 405.482
[55760]	valid_0's l1: 405.477
[55800]	valid_0's l1: 405.475
[55840]	valid_0's l1: 405.473
[55880]	valid_0's l1: 405.469
[55920]	valid_0's l1: 405.467
[55960]	valid_0's l1: 405.465
[56000]	valid_0's l1: 405.464
[56040]	valid_0's l1: 405.462
[56080]	valid_0's l1: 405.459
[56120]	valid_0's l1: 405.457
[56160]	valid_0's l1: 405.454
[56200]	valid_0's l1: 405.453
[56240]	valid_0's l1: 405.45
[56280]	valid_0's l1: 405.449
[56320]	valid_0's l1: 405.448
[56360]	valid_0's l1: 405.447
[56400]	valid_0's l1: 405.445
[56440]	valid_0's l1: 405.444
[56480]	valid_0's l1: 405.44
[56520]	valid_0's l1: 405.438
[56560]	valid_0

[66240]	valid_0's l1: 404.927
[66280]	valid_0's l1: 404.926
[66320]	valid_0's l1: 404.925
[66360]	valid_0's l1: 404.925
[66400]	valid_0's l1: 404.923
[66440]	valid_0's l1: 404.921
[66480]	valid_0's l1: 404.92
[66520]	valid_0's l1: 404.918
[66560]	valid_0's l1: 404.916
[66600]	valid_0's l1: 404.912
[66640]	valid_0's l1: 404.909
[66680]	valid_0's l1: 404.907
[66720]	valid_0's l1: 404.904
[66760]	valid_0's l1: 404.902
[66800]	valid_0's l1: 404.9
[66840]	valid_0's l1: 404.898
[66880]	valid_0's l1: 404.895
[66920]	valid_0's l1: 404.894
[66960]	valid_0's l1: 404.892
[67000]	valid_0's l1: 404.89
[67040]	valid_0's l1: 404.888
[67080]	valid_0's l1: 404.886
[67120]	valid_0's l1: 404.882
[67160]	valid_0's l1: 404.88
[67200]	valid_0's l1: 404.878
[67240]	valid_0's l1: 404.876
[67280]	valid_0's l1: 404.874
[67320]	valid_0's l1: 404.872
[67360]	valid_0's l1: 404.87
[67400]	valid_0's l1: 404.869
[67440]	valid_0's l1: 404.867
[67480]	valid_0's l1: 404.866
[67520]	valid_0's l1: 404.865
[67560]	valid_0'

[77240]	valid_0's l1: 404.479
[77280]	valid_0's l1: 404.477
[77320]	valid_0's l1: 404.474
[77360]	valid_0's l1: 404.474
[77400]	valid_0's l1: 404.473
[77440]	valid_0's l1: 404.472
[77480]	valid_0's l1: 404.472
[77520]	valid_0's l1: 404.47
[77560]	valid_0's l1: 404.469
[77600]	valid_0's l1: 404.468
[77640]	valid_0's l1: 404.465
[77680]	valid_0's l1: 404.464
[77720]	valid_0's l1: 404.462
[77760]	valid_0's l1: 404.461
[77800]	valid_0's l1: 404.459
[77840]	valid_0's l1: 404.458
[77880]	valid_0's l1: 404.456
[77920]	valid_0's l1: 404.455
[77960]	valid_0's l1: 404.453
[78000]	valid_0's l1: 404.452
[78040]	valid_0's l1: 404.449
[78080]	valid_0's l1: 404.449
[78120]	valid_0's l1: 404.449
[78160]	valid_0's l1: 404.447
[78200]	valid_0's l1: 404.447
[78240]	valid_0's l1: 404.446
[78280]	valid_0's l1: 404.445
[78320]	valid_0's l1: 404.445
[78360]	valid_0's l1: 404.444
[78400]	valid_0's l1: 404.442
[78440]	valid_0's l1: 404.441
[78480]	valid_0's l1: 404.439
[78520]	valid_0's l1: 404.438
[78560]	val

[88200]	valid_0's l1: 404.143
[88240]	valid_0's l1: 404.143
[88280]	valid_0's l1: 404.142
[88320]	valid_0's l1: 404.141
[88360]	valid_0's l1: 404.141
[88400]	valid_0's l1: 404.14
[88440]	valid_0's l1: 404.139
[88480]	valid_0's l1: 404.138
[88520]	valid_0's l1: 404.137
[88560]	valid_0's l1: 404.136
[88600]	valid_0's l1: 404.134
[88640]	valid_0's l1: 404.133
[88680]	valid_0's l1: 404.132
[88720]	valid_0's l1: 404.131
[88760]	valid_0's l1: 404.13
[88800]	valid_0's l1: 404.129
[88840]	valid_0's l1: 404.127
[88880]	valid_0's l1: 404.126
[88920]	valid_0's l1: 404.126
[88960]	valid_0's l1: 404.125
[89000]	valid_0's l1: 404.124
[89040]	valid_0's l1: 404.124
[89080]	valid_0's l1: 404.123
[89120]	valid_0's l1: 404.123
[89160]	valid_0's l1: 404.122
[89200]	valid_0's l1: 404.122
[89240]	valid_0's l1: 404.121
[89280]	valid_0's l1: 404.12
[89320]	valid_0's l1: 404.119
[89360]	valid_0's l1: 404.116
[89400]	valid_0's l1: 404.115
[89440]	valid_0's l1: 404.113
[89480]	valid_0's l1: 404.112
[89520]	valid

df_tr.info()

df_tr.to_csv('other_train_feature.csv', index=False)
df_te.to_csv('other_test_feature.csv', index=False)

In [7]:
gbm.save_model('light_gbm.txt')
# predict
print('Start predicting...')
x_test = df_te[features]
id_test = df_te['order_id']
#y_pred = gbm.predict(x_test.values, num_iteration=gbm.best_iteration)
#y_pred = gbm.predict(x_test.values)

print('generating submission...')
#sub = pd.DataFrame({'order_id': id_test, 'delivery_duration': y_pred})

print('saving submission...')
#sub.to_csv('sub_lgb_starter.csv', index=False)

Start predicting...
generating submission...
saving submission...


In [6]:
x_train = df_tr[features]
y_train = df_tr['delivery_duration']

x_test = df_te[features]
id_test = df_te['order_id']

print(x_train.shape)
print(x_test.shape)

dtrain = xgb.DMatrix(x_train.values, y_train)
dtest = xgb.DMatrix(x_test.values)

print('training model...')
watchlist = [(dtrain, 'train')]
param = {
        'booster': 'gbtree',
        'objective': 'reg:linear',
        'eval_metric': 'mae',
        'eta': 0.01,
        'num_round': 20000,
        'colsample_bytree': 0.65,
        'subsample': 0.8,
        'max_depth': 5,
        'nthread': -1,
        'seed': 20171001,
        'silent': 1,
    }
bst = xgb.train(param, dtrain, param['num_round'], watchlist, verbose_eval=10)
bst.save_model('xgboost.model')

(283815, 20)
(251864, 20)
training model...
[0]	train-mae:2314.6
[10]	train-mae:2093.34
[20]	train-mae:1893.21
[30]	train-mae:1712.25
[40]	train-mae:1548.94
[50]	train-mae:1402.25
[60]	train-mae:1271.47
[70]	train-mae:1155.89
[80]	train-mae:1054.51
[90]	train-mae:966.181
[100]	train-mae:889.805
[110]	train-mae:823.787
[120]	train-mae:767.512
[130]	train-mae:719.329
[140]	train-mae:678.423
[150]	train-mae:643.628
[160]	train-mae:613.953
[170]	train-mae:588.898
[180]	train-mae:567.832
[190]	train-mae:550.049
[200]	train-mae:534.919
[210]	train-mae:522.414
[220]	train-mae:511.666
[230]	train-mae:502.356
[240]	train-mae:494.586
[250]	train-mae:487.834
[260]	train-mae:482.198
[270]	train-mae:477.292
[280]	train-mae:473.068
[290]	train-mae:469.465
[300]	train-mae:466.295
[310]	train-mae:463.577
[320]	train-mae:461.189
[330]	train-mae:459.092
[340]	train-mae:457.15
[350]	train-mae:455.37
[360]	train-mae:453.895
[370]	train-mae:452.512
[380]	train-mae:451.264
[390]	train-mae:449.985
[400]	trai

[3330]	train-mae:404.63
[3340]	train-mae:404.582
[3350]	train-mae:404.533
[3360]	train-mae:404.48
[3370]	train-mae:404.425
[3380]	train-mae:404.387
[3390]	train-mae:404.335
[3400]	train-mae:404.288
[3410]	train-mae:404.247
[3420]	train-mae:404.206
[3430]	train-mae:404.162
[3440]	train-mae:404.112
[3450]	train-mae:404.082
[3460]	train-mae:404.037
[3470]	train-mae:403.991
[3480]	train-mae:403.946
[3490]	train-mae:403.893
[3500]	train-mae:403.844
[3510]	train-mae:403.808
[3520]	train-mae:403.774
[3530]	train-mae:403.712
[3540]	train-mae:403.668
[3550]	train-mae:403.62
[3560]	train-mae:403.581
[3570]	train-mae:403.522
[3580]	train-mae:403.475
[3590]	train-mae:403.433
[3600]	train-mae:403.399
[3610]	train-mae:403.351
[3620]	train-mae:403.296
[3630]	train-mae:403.265
[3640]	train-mae:403.214
[3650]	train-mae:403.173
[3660]	train-mae:403.131
[3670]	train-mae:403.099
[3680]	train-mae:403.062
[3690]	train-mae:403.011
[3700]	train-mae:402.958
[3710]	train-mae:402.901
[3720]	train-mae:402.85
[373

[6630]	train-mae:393.313
[6640]	train-mae:393.292
[6650]	train-mae:393.268
[6660]	train-mae:393.241
[6670]	train-mae:393.216
[6680]	train-mae:393.184
[6690]	train-mae:393.155
[6700]	train-mae:393.129
[6710]	train-mae:393.1
[6720]	train-mae:393.072
[6730]	train-mae:393.052
[6740]	train-mae:393.026
[6750]	train-mae:393.002
[6760]	train-mae:392.977
[6770]	train-mae:392.955
[6780]	train-mae:392.924
[6790]	train-mae:392.901
[6800]	train-mae:392.877
[6810]	train-mae:392.843
[6820]	train-mae:392.821
[6830]	train-mae:392.793
[6840]	train-mae:392.759
[6850]	train-mae:392.735
[6860]	train-mae:392.713
[6870]	train-mae:392.686
[6880]	train-mae:392.66
[6890]	train-mae:392.633
[6900]	train-mae:392.608
[6910]	train-mae:392.58
[6920]	train-mae:392.548
[6930]	train-mae:392.525
[6940]	train-mae:392.499
[6950]	train-mae:392.469
[6960]	train-mae:392.444
[6970]	train-mae:392.416
[6980]	train-mae:392.39
[6990]	train-mae:392.363
[7000]	train-mae:392.328
[7010]	train-mae:392.305
[7020]	train-mae:392.28
[7030]

[9920]	train-mae:385.506
[9930]	train-mae:385.486
[9940]	train-mae:385.465
[9950]	train-mae:385.447
[9960]	train-mae:385.425
[9970]	train-mae:385.4
[9980]	train-mae:385.378
[9990]	train-mae:385.355
[10000]	train-mae:385.331
[10010]	train-mae:385.309
[10020]	train-mae:385.288
[10030]	train-mae:385.267
[10040]	train-mae:385.243
[10050]	train-mae:385.222
[10060]	train-mae:385.205
[10070]	train-mae:385.186
[10080]	train-mae:385.163
[10090]	train-mae:385.143
[10100]	train-mae:385.121
[10110]	train-mae:385.099
[10120]	train-mae:385.08
[10130]	train-mae:385.055
[10140]	train-mae:385.033
[10150]	train-mae:385.014
[10160]	train-mae:384.994
[10170]	train-mae:384.97
[10180]	train-mae:384.948
[10190]	train-mae:384.922
[10200]	train-mae:384.899
[10210]	train-mae:384.876
[10220]	train-mae:384.858
[10230]	train-mae:384.835
[10240]	train-mae:384.815
[10250]	train-mae:384.793
[10260]	train-mae:384.77
[10270]	train-mae:384.753
[10280]	train-mae:384.735
[10290]	train-mae:384.716
[10300]	train-mae:384.695

[13090]	train-mae:379.136
[13100]	train-mae:379.117
[13110]	train-mae:379.099
[13120]	train-mae:379.083
[13130]	train-mae:379.061
[13140]	train-mae:379.044
[13150]	train-mae:379.027
[13160]	train-mae:379.004
[13170]	train-mae:378.981
[13180]	train-mae:378.963
[13190]	train-mae:378.942
[13200]	train-mae:378.922
[13210]	train-mae:378.903
[13220]	train-mae:378.882
[13230]	train-mae:378.864
[13240]	train-mae:378.845
[13250]	train-mae:378.823
[13260]	train-mae:378.801
[13270]	train-mae:378.783
[13280]	train-mae:378.764
[13290]	train-mae:378.748
[13300]	train-mae:378.728
[13310]	train-mae:378.711
[13320]	train-mae:378.689
[13330]	train-mae:378.672
[13340]	train-mae:378.654
[13350]	train-mae:378.634
[13360]	train-mae:378.613
[13370]	train-mae:378.593
[13380]	train-mae:378.573
[13390]	train-mae:378.555
[13400]	train-mae:378.537
[13410]	train-mae:378.521
[13420]	train-mae:378.506
[13430]	train-mae:378.483
[13440]	train-mae:378.469
[13450]	train-mae:378.45
[13460]	train-mae:378.432
[13470]	train

[16260]	train-mae:373.394
[16270]	train-mae:373.379
[16280]	train-mae:373.36
[16290]	train-mae:373.342
[16300]	train-mae:373.324
[16310]	train-mae:373.308
[16320]	train-mae:373.291
[16330]	train-mae:373.272
[16340]	train-mae:373.252
[16350]	train-mae:373.238
[16360]	train-mae:373.221
[16370]	train-mae:373.204
[16380]	train-mae:373.186
[16390]	train-mae:373.168
[16400]	train-mae:373.151
[16410]	train-mae:373.134
[16420]	train-mae:373.118
[16430]	train-mae:373.103
[16440]	train-mae:373.086
[16450]	train-mae:373.069
[16460]	train-mae:373.051
[16470]	train-mae:373.034
[16480]	train-mae:373.015
[16490]	train-mae:372.999
[16500]	train-mae:372.983
[16510]	train-mae:372.966
[16520]	train-mae:372.948
[16530]	train-mae:372.93
[16540]	train-mae:372.914
[16550]	train-mae:372.894
[16560]	train-mae:372.875
[16570]	train-mae:372.853
[16580]	train-mae:372.835
[16590]	train-mae:372.815
[16600]	train-mae:372.799
[16610]	train-mae:372.781
[16620]	train-mae:372.763
[16630]	train-mae:372.742
[16640]	train-

[19430]	train-mae:368.076
[19440]	train-mae:368.06
[19450]	train-mae:368.044
[19460]	train-mae:368.026
[19470]	train-mae:368.009
[19480]	train-mae:367.994
[19490]	train-mae:367.98
[19500]	train-mae:367.965
[19510]	train-mae:367.946
[19520]	train-mae:367.929
[19530]	train-mae:367.916
[19540]	train-mae:367.899
[19550]	train-mae:367.881
[19560]	train-mae:367.864
[19570]	train-mae:367.847
[19580]	train-mae:367.831
[19590]	train-mae:367.813
[19600]	train-mae:367.796
[19610]	train-mae:367.779
[19620]	train-mae:367.762
[19630]	train-mae:367.747
[19640]	train-mae:367.731
[19650]	train-mae:367.718
[19660]	train-mae:367.705
[19670]	train-mae:367.687
[19680]	train-mae:367.671
[19690]	train-mae:367.655
[19700]	train-mae:367.638
[19710]	train-mae:367.622
[19720]	train-mae:367.607
[19730]	train-mae:367.592
[19740]	train-mae:367.575
[19750]	train-mae:367.559
[19760]	train-mae:367.54
[19770]	train-mae:367.522
[19780]	train-mae:367.505
[19790]	train-mae:367.489
[19800]	train-mae:367.473
[19810]	train-m

print('generating prediction...')
pred = bst.predict(dtest)

print('generating submission...')
sub = pd.DataFrame({'order_id': id_test, 'delivery_duration': pred})

print('saving submission...')
sub.to_csv('sub_xgb_starter.csv', index=False)

gbm = lgb.Booster(model_file='light_gbm.txt')  #init model
bst = xgb.Booster({'nthread':4}) #init model  
bst.load_model("xgboost.model") # load data

In [8]:
blend_train = np.zeros((x_train.shape[0],2))
blend_test = np.zeros((x_test.shape[0],2))

blend_train[:, 0] = bst.predict(xgb.DMatrix(x_train.values))
blend_test[:, 0] = bst.predict(xgb.DMatrix(x_test.values))
blend_train[:, 1] = gbm.predict(x_train.values)
blend_test[:, 1] = gbm.predict(x_test.values)


In [9]:
# Start blending!
from sklearn import linear_model
bclf = linear_model.LogisticRegression(C=100)
bclf.fit(blend_train, y_train)


LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

print('generating prediction...')
stacking_pred = bclf.predict(blend_test[0:100,:])
print(stacking_pred)
print(blend_test[0:100,:])

print('generating submission...')
#sub = pd.DataFrame({'order_id': id_test, 'delivery_duration': stacking_pred})

print('saving submission...')
#sub.to_csv('sub_stacking_starter.csv', index=False)

In [16]:
blend_test.shape

(251864, 2)

In [23]:
fusion_pred = blend_test[:,0]*0.8 + blend_test[:,1]*0.2
print('generating submission...')
sub = pd.DataFrame({'order_id': id_test, 'delivery_duration': fusion_pred})

print('saving submission...')
sub.to_csv('sub_stacking_starter.csv', index=False)

generating submission...
saving submission...
