In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from geopy import distance
import pickle
from utils_dump_load import load_from_pickle, dump_to_pickle

<h1> Loading data

In [2]:
pickle_filename = "data/data.pkl"
df_train, df_test = load_from_pickle(pickle_filename)

<h2> Feature Selection

In [3]:
features_not_in_test = []
for feature_name in df_train.columns:
    if feature_name not in df_test.columns:
        features_not_in_test.append(feature_name)
features_not_in_test

['Arrival at Destination - Day of Month',
 'Arrival at Destination - Weekday (Mo = 1)',
 'Arrival at Destination - Time',
 'Time from Pickup to Arrival']

In [4]:
steps = ["Placement", "Confirmation", "Arrival at Pickup", "Pickup"]

In [5]:
features_to_drop = ["Vehicle Type", "geospeed"]
#option1
#features_to_drop +=  ["User Id", "Rider Id", "Pickup - Time"]
#option2
#features_to_drop += ["Pickup Lat", "Pickup Long", "Destination Lat", "Destination Long"]
#option3
#features_to_drop += ['Platform Type', 'Personal or Business', "Precipitation in millimeters"]

In [6]:
for step in steps:
    #Let's keep Pickup data (last step prior run)
    if step not in ['Pickup', "Arrival at Destination"]:
        #features_to_drop.append(step + " - " + "Time")
        #features_to_drop.append(step + " - " + "Delta")
        #features_to_drop.append(step + " - " + "Weekday (Mo = 1)")
        #features_to_drop.append(step + " - Weekend")
        features_to_drop.append(step + " - " + "Day of Month")
        pass

In [7]:
features_to_drop

['Vehicle Type',
 'Platform Type',
 'Personal or Business',
 'Precipitation in millimeters',
 'Placement - Day of Month',
 'Confirmation - Day of Month',
 'Arrival at Pickup - Day of Month']

In [8]:
to_drop = features_to_drop + features_not_in_test

<h2> Scalling data

In [9]:
features_to_scale = []
features_not_scaled = []
for feature_name in df_train.columns:
    if df_train[feature_name].dtype == np.float64 and feature_name not in to_drop:
        features_to_scale.append(feature_name)
    elif feature_name not in to_drop:
        features_not_scaled.append(feature_name)

In [10]:
features_to_scale

['Placement - Time',
 'Confirmation - Time',
 'Arrival at Pickup - Time',
 'Pickup - Time',
 'Distance (KM)',
 'Temperature',
 'Pickup Lat',
 'Pickup Long',
 'Destination Lat',
 'Destination Long',
 'No_Of_Orders',
 'Age',
 'Average_Rating',
 'No_of_Ratings',
 'speed',
 'geospeed',
 'Placement - Delta',
 'Confirmation - Delta',
 'Arrival at Pickup - Delta',
 'geopy_distance']

In [11]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
df_train_scaled = scaler.fit_transform(df_train[features_to_scale])
df_test_scaled = scaler.transform(df_test[features_to_scale])

In [12]:
df_train_scaled = pd.DataFrame(df_train_scaled, index=df_train.index, columns=features_to_scale)
df_test_scaled = pd.DataFrame(df_test_scaled, index=df_test.index, columns=features_to_scale)

In [13]:
df_train = pd.concat([df_train_scaled, df_train[features_not_scaled + to_drop]], axis=1)
df_test = pd.concat([df_test_scaled, df_test[features_not_scaled + features_to_drop]], axis=1)

<h2> Defining our features X and target y

In [14]:
X = df_train.drop(to_drop, axis=1)
y = df_train["Time from Pickup to Arrival"]
X.shape, y.shape

((21201, 32), (21201,))

<h1> Apply ML model

<h2> Defining X_train, X_test

In [15]:
from sklearn.model_selection import train_test_split, KFold,cross_val_score

In [16]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

<h2> Benchmark model

In [17]:
from sklearn.metrics import accuracy_score, mean_squared_error

In [18]:
from sklearn.linear_model import HuberRegressor,SGDRegressor,ElasticNet, Ridge,LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor,GradientBoostingRegressor
from xgboost.sklearn import XGBRegressor
from lightgbm.sklearn import LGBMRegressor
from catboost import CatBoostRegressor

In [19]:
def rmsle_cv(model,X_train,y_train,n_folds=5):
    kf = KFold(n_folds, shuffle=True, random_state=42).get_n_splits(X_train.values)
    rmse= np.sqrt(-cross_val_score(model, X_train.values, y_train, scoring="neg_mean_squared_error", cv = kf))
    return(np.mean(rmse))

In [20]:
models = [RandomForestRegressor, ExtraTreesRegressor, GradientBoostingRegressor, LGBMRegressor, ElasticNet, 
          LinearRegression, HuberRegressor, Ridge, KNeighborsRegressor, ExtraTreesRegressor]

In [21]:
models_sel = [RandomForestRegressor, GradientBoostingRegressor, LGBMRegressor, LinearRegression, Ridge]

In [22]:
def benchmark_models():
    for Model in models_sel:
        
        #Model fit on full data
        model = Model()
        model_name = model.__class__.__name__
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        #err1 = rmsle_cv(model, X_train, y_train)
        err2 = np.sqrt(mean_squared_error(y_test, y_pred))
        #print(f"{model_name} fit on full data :  cross val loss rmse => {err1}")
        print(f"{model_name} fit on full data :  single rmse => {err2}")

In [23]:
benchmark_models()



RandomForestRegressor fit on full data :  single rmse => 780.4920721898704
GradientBoostingRegressor fit on full data :  single rmse => 732.6084146620188
LGBMRegressor fit on full data :  single rmse => 740.4767579396996
LinearRegression fit on full data :  single rmse => 770.2489320518806
Ridge fit on full data :  single rmse => 770.247861297955


<h2> CatBoost model

In [24]:
cat_b =  CatBoostRegressor(loss_function='RMSE', )

In [25]:
cat_b.fit(X_train, y_train)
y_pred = cat_b.predict(X_test)

0:	learn: 1802.3206606	total: 87.8ms	remaining: 1m 27s
1:	learn: 1760.1060860	total: 119ms	remaining: 59.5s
2:	learn: 1719.3765261	total: 150ms	remaining: 50s
3:	learn: 1679.7834859	total: 180ms	remaining: 44.8s
4:	learn: 1642.2019211	total: 214ms	remaining: 42.6s
5:	learn: 1605.4194656	total: 245ms	remaining: 40.6s
6:	learn: 1571.0276187	total: 274ms	remaining: 38.8s
7:	learn: 1537.3485647	total: 304ms	remaining: 37.7s
8:	learn: 1504.5794731	total: 349ms	remaining: 38.4s
9:	learn: 1472.6990907	total: 378ms	remaining: 37.4s
10:	learn: 1442.4070612	total: 407ms	remaining: 36.6s
11:	learn: 1412.8472569	total: 434ms	remaining: 35.8s
12:	learn: 1384.7433531	total: 462ms	remaining: 35.1s
13:	learn: 1357.6771984	total: 490ms	remaining: 34.5s
14:	learn: 1331.6714791	total: 525ms	remaining: 34.5s
15:	learn: 1306.7935207	total: 563ms	remaining: 34.6s
16:	learn: 1282.4320372	total: 591ms	remaining: 34.2s
17:	learn: 1259.5839051	total: 619ms	remaining: 33.8s
18:	learn: 1236.8663084	total: 648ms	r

155:	learn: 734.9083274	total: 4.83s	remaining: 26.2s
156:	learn: 734.7241410	total: 4.87s	remaining: 26.2s
157:	learn: 734.5973876	total: 4.9s	remaining: 26.1s
158:	learn: 734.4544852	total: 4.93s	remaining: 26.1s
159:	learn: 734.2867206	total: 4.95s	remaining: 26s
160:	learn: 734.0303221	total: 4.98s	remaining: 26s
161:	learn: 733.8676820	total: 5.02s	remaining: 25.9s
162:	learn: 733.7608483	total: 5.05s	remaining: 26s
163:	learn: 733.4865693	total: 5.09s	remaining: 25.9s
164:	learn: 733.3318102	total: 5.12s	remaining: 25.9s
165:	learn: 733.1694391	total: 5.14s	remaining: 25.8s
166:	learn: 732.9150462	total: 5.17s	remaining: 25.8s
167:	learn: 732.7519616	total: 5.2s	remaining: 25.8s
168:	learn: 732.6080188	total: 5.23s	remaining: 25.7s
169:	learn: 732.4835862	total: 5.26s	remaining: 25.7s
170:	learn: 732.3589739	total: 5.3s	remaining: 25.7s
171:	learn: 732.1797639	total: 5.33s	remaining: 25.6s
172:	learn: 732.0951989	total: 5.35s	remaining: 25.6s
173:	learn: 731.8585874	total: 5.38s	

311:	learn: 717.5441222	total: 9.28s	remaining: 20.5s
312:	learn: 717.5099811	total: 9.31s	remaining: 20.4s
313:	learn: 717.4961838	total: 9.34s	remaining: 20.4s
314:	learn: 717.4822855	total: 9.37s	remaining: 20.4s
315:	learn: 717.4531857	total: 9.39s	remaining: 20.3s
316:	learn: 717.2736203	total: 9.42s	remaining: 20.3s
317:	learn: 717.2563587	total: 9.45s	remaining: 20.3s
318:	learn: 717.2419323	total: 9.46s	remaining: 20.2s
319:	learn: 717.1197150	total: 9.49s	remaining: 20.2s
320:	learn: 716.9228434	total: 9.53s	remaining: 20.1s
321:	learn: 716.8825339	total: 9.55s	remaining: 20.1s
322:	learn: 716.8188170	total: 9.58s	remaining: 20.1s
323:	learn: 716.7844353	total: 9.61s	remaining: 20s
324:	learn: 716.6956985	total: 9.64s	remaining: 20s
325:	learn: 716.6417556	total: 9.66s	remaining: 20s
326:	learn: 716.5990145	total: 9.69s	remaining: 19.9s
327:	learn: 716.5774869	total: 9.72s	remaining: 19.9s
328:	learn: 716.5192907	total: 9.75s	remaining: 19.9s
329:	learn: 716.4013459	total: 9.7

468:	learn: 706.1209625	total: 13.6s	remaining: 15.4s
469:	learn: 706.0572012	total: 13.6s	remaining: 15.4s
470:	learn: 705.9146677	total: 13.7s	remaining: 15.3s
471:	learn: 705.8673944	total: 13.7s	remaining: 15.3s
472:	learn: 705.7280430	total: 13.7s	remaining: 15.3s
473:	learn: 705.6175542	total: 13.7s	remaining: 15.2s
474:	learn: 705.5009690	total: 13.8s	remaining: 15.2s
475:	learn: 705.4130779	total: 13.8s	remaining: 15.2s
476:	learn: 705.2309322	total: 13.8s	remaining: 15.2s
477:	learn: 705.0668813	total: 13.9s	remaining: 15.1s
478:	learn: 704.9512832	total: 13.9s	remaining: 15.1s
479:	learn: 704.8570347	total: 13.9s	remaining: 15.1s
480:	learn: 704.8372772	total: 13.9s	remaining: 15.1s
481:	learn: 704.7543518	total: 14s	remaining: 15s
482:	learn: 704.5644574	total: 14s	remaining: 15s
483:	learn: 704.4454165	total: 14s	remaining: 15s
484:	learn: 704.3157107	total: 14.1s	remaining: 14.9s
485:	learn: 704.2703545	total: 14.1s	remaining: 14.9s
486:	learn: 704.1773271	total: 14.1s	rem

626:	learn: 696.2849555	total: 18.1s	remaining: 10.8s
627:	learn: 696.2443160	total: 18.1s	remaining: 10.7s
628:	learn: 696.1411490	total: 18.2s	remaining: 10.7s
629:	learn: 696.1382615	total: 18.2s	remaining: 10.7s
630:	learn: 696.0747640	total: 18.2s	remaining: 10.7s
631:	learn: 695.9445977	total: 18.2s	remaining: 10.6s
632:	learn: 695.9390534	total: 18.3s	remaining: 10.6s
633:	learn: 695.8788772	total: 18.3s	remaining: 10.6s
634:	learn: 695.8077136	total: 18.3s	remaining: 10.5s
635:	learn: 695.7815951	total: 18.4s	remaining: 10.5s
636:	learn: 695.7462658	total: 18.4s	remaining: 10.5s
637:	learn: 695.6824444	total: 18.4s	remaining: 10.5s
638:	learn: 695.6418335	total: 18.4s	remaining: 10.4s
639:	learn: 695.6366103	total: 18.5s	remaining: 10.4s
640:	learn: 695.6120065	total: 18.5s	remaining: 10.4s
641:	learn: 695.5460621	total: 18.5s	remaining: 10.3s
642:	learn: 695.5399157	total: 18.6s	remaining: 10.3s
643:	learn: 695.4534799	total: 18.6s	remaining: 10.3s
644:	learn: 695.4488014	tota

782:	learn: 689.0021130	total: 22.5s	remaining: 6.24s
783:	learn: 688.9984181	total: 22.5s	remaining: 6.21s
784:	learn: 688.9840104	total: 22.6s	remaining: 6.18s
785:	learn: 688.9674462	total: 22.6s	remaining: 6.15s
786:	learn: 688.9665079	total: 22.6s	remaining: 6.12s
787:	learn: 688.9613596	total: 22.6s	remaining: 6.09s
788:	learn: 688.9145191	total: 22.7s	remaining: 6.06s
789:	learn: 688.8933349	total: 22.7s	remaining: 6.03s
790:	learn: 688.8446939	total: 22.7s	remaining: 6s
791:	learn: 688.8446937	total: 22.7s	remaining: 5.97s
792:	learn: 688.7868349	total: 22.8s	remaining: 5.94s
793:	learn: 688.7818551	total: 22.8s	remaining: 5.91s
794:	learn: 688.7803420	total: 22.8s	remaining: 5.88s
795:	learn: 688.7783830	total: 22.8s	remaining: 5.85s
796:	learn: 688.7674739	total: 22.9s	remaining: 5.83s
797:	learn: 688.6675509	total: 22.9s	remaining: 5.8s
798:	learn: 688.6380812	total: 22.9s	remaining: 5.77s
799:	learn: 688.5899177	total: 23s	remaining: 5.74s
800:	learn: 688.5210316	total: 23s

941:	learn: 683.3370498	total: 26.8s	remaining: 1.65s
942:	learn: 683.3355206	total: 26.8s	remaining: 1.62s
943:	learn: 683.2963702	total: 26.9s	remaining: 1.59s
944:	learn: 683.2221476	total: 26.9s	remaining: 1.56s
945:	learn: 683.1620538	total: 26.9s	remaining: 1.54s
946:	learn: 683.0613413	total: 26.9s	remaining: 1.51s
947:	learn: 683.0443799	total: 27s	remaining: 1.48s
948:	learn: 683.0104340	total: 27s	remaining: 1.45s
949:	learn: 683.0083441	total: 27s	remaining: 1.42s
950:	learn: 682.9925738	total: 27s	remaining: 1.39s
951:	learn: 682.9895284	total: 27.1s	remaining: 1.36s
952:	learn: 682.9437877	total: 27.1s	remaining: 1.34s
953:	learn: 682.9436141	total: 27.1s	remaining: 1.31s
954:	learn: 682.9296175	total: 27.1s	remaining: 1.28s
955:	learn: 682.8794938	total: 27.2s	remaining: 1.25s
956:	learn: 682.8785176	total: 27.2s	remaining: 1.22s
957:	learn: 682.8253366	total: 27.2s	remaining: 1.19s
958:	learn: 682.8246763	total: 27.3s	remaining: 1.17s
959:	learn: 682.8208578	total: 27.3s

In [26]:
cat_b.best_score_

{'learn': {'RMSE': 681.3723514764171}}

In [27]:
order = cat_b.feature_importances_.argsort()[::-1]
list(zip(X_train.columns[order], cat_b.feature_importances_[order]))

[('Distance (KM)', 27.109782732507448),
 ('geopy_distance', 24.627135423595792),
 ('speed', 7.7202938786072695),
 ('Arrival at Pickup - Delta', 6.853196238876016),
 ('geospeed', 5.869189898730648),
 ('Destination Lat', 4.448326507124091),
 ('Pickup Lat', 3.8903924745250795),
 ('Confirmation - Delta', 2.990341592944005),
 ('Average_Rating', 2.393402223754605),
 ('Placement - Delta', 2.2176320569790553),
 ('Destination Long', 1.9524488758332954),
 ('Age', 1.6419649615273006),
 ('No_of_Ratings', 1.4904258274112931),
 ('No_Of_Orders', 1.4619791599012595),
 ('Pickup Long', 1.3190785012138069),
 ('User Id', 0.7298768196818158),
 ('Rider Id', 0.4559047714440692),
 ('Temperature', 0.4108290519697299),
 ('Confirmation - Time', 0.313080635682156),
 ('Pickup - Time', 0.2958091641164837),
 ('Pickup - Weekday (Mo = 1)', 0.2952209013788295),
 ('Arrival at Pickup - Time', 0.2835355071700913),
 ('Arrival at Pickup - Weekday (Mo = 1)', 0.25347432710512946),
 ('Confirmation - Weekday (Mo = 1)', 0.228921

<h1> Dump to submission file

In [28]:
X__test = df_test.drop(features_to_drop, axis=1)
X__test.shape, X_train.shape

((7068, 32), (16960, 32))

In [29]:
X__test.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Placement - Time,7068.0,-0.01236,1.001022,-2.479172,-0.814842,-0.059424,0.777265,3.72028
Confirmation - Time,7068.0,-0.012455,1.000139,-2.424949,-0.82526,-0.0582,0.779518,3.691178
Arrival at Pickup - Time,7068.0,-0.012613,1.000647,-2.314103,-0.814995,-0.05187,0.779206,3.830805
Pickup - Time,7068.0,-0.013608,0.999891,-2.392565,-0.819158,-0.051974,0.784092,3.933809
Distance (KM),7068.0,-0.00839,0.98357,-1.50058,-0.794967,-0.265757,0.616259,6.61397
Temperature,7068.0,-0.009555,0.996161,-2.995054,-0.708433,0.015195,0.641119,2.637986
Pickup Lat,7068.0,-0.018782,0.984852,-4.983951,-0.646787,0.005523,0.793129,4.363618
Pickup Long,7068.0,-0.003092,1.01308,-4.201462,-0.711423,-0.107936,0.492424,4.797827
Destination Lat,7068.0,0.009357,0.983148,-4.022419,-0.526672,-0.018319,0.612372,7.246856
Destination Long,7068.0,0.002378,0.972889,-3.846742,-0.55174,-0.073629,0.379588,4.64534


In [30]:
model_sel1 = GradientBoostingRegressor()
model_sel2 = LGBMRegressor()
model_sel3 = CatBoostRegressor()
#model_sel = LinearRegression()
#model_sel = RandomForestRegressor()

In [31]:
model_sel1.fit(X, y)
model_sel2.fit(X, y)
model_sel3.fit(X, y)

0:	learn: 1799.8307120	total: 30.5ms	remaining: 30.5s
1:	learn: 1757.4663726	total: 59.6ms	remaining: 29.7s
2:	learn: 1717.6158317	total: 89ms	remaining: 29.6s
3:	learn: 1677.7789191	total: 117ms	remaining: 29.2s
4:	learn: 1640.2644836	total: 147ms	remaining: 29.3s
5:	learn: 1603.7351808	total: 175ms	remaining: 29.1s
6:	learn: 1568.5346360	total: 204ms	remaining: 29s
7:	learn: 1534.1837650	total: 237ms	remaining: 29.3s
8:	learn: 1500.9155469	total: 275ms	remaining: 30.2s
9:	learn: 1469.4518918	total: 311ms	remaining: 30.7s
10:	learn: 1439.5806413	total: 341ms	remaining: 30.6s
11:	learn: 1410.3291698	total: 372ms	remaining: 30.6s
12:	learn: 1381.8554403	total: 402ms	remaining: 30.5s
13:	learn: 1354.5222693	total: 430ms	remaining: 30.3s
14:	learn: 1328.8760559	total: 463ms	remaining: 30.4s
15:	learn: 1303.5650507	total: 500ms	remaining: 30.8s
16:	learn: 1279.9164984	total: 530ms	remaining: 30.6s
17:	learn: 1256.8688772	total: 558ms	remaining: 30.5s
18:	learn: 1234.3872838	total: 587ms	re

160:	learn: 733.1512000	total: 4.97s	remaining: 25.9s
161:	learn: 732.9890365	total: 5.01s	remaining: 25.9s
162:	learn: 732.6735019	total: 5.04s	remaining: 25.9s
163:	learn: 732.5323533	total: 5.06s	remaining: 25.8s
164:	learn: 732.2592102	total: 5.09s	remaining: 25.8s
165:	learn: 732.1129694	total: 5.12s	remaining: 25.7s
166:	learn: 731.9930008	total: 5.15s	remaining: 25.7s
167:	learn: 731.8209682	total: 5.2s	remaining: 25.7s
168:	learn: 731.7079343	total: 5.22s	remaining: 25.7s
169:	learn: 731.6279262	total: 5.25s	remaining: 25.6s
170:	learn: 731.5001894	total: 5.28s	remaining: 25.6s
171:	learn: 731.3585609	total: 5.31s	remaining: 25.6s
172:	learn: 731.1389877	total: 5.34s	remaining: 25.5s
173:	learn: 731.0301332	total: 5.37s	remaining: 25.5s
174:	learn: 730.9170975	total: 5.39s	remaining: 25.4s
175:	learn: 730.8359885	total: 5.43s	remaining: 25.4s
176:	learn: 730.7499643	total: 5.46s	remaining: 25.4s
177:	learn: 730.6423215	total: 5.49s	remaining: 25.3s
178:	learn: 730.4491474	total

319:	learn: 716.3405251	total: 9.66s	remaining: 20.5s
320:	learn: 716.1906826	total: 9.7s	remaining: 20.5s
321:	learn: 716.0494846	total: 9.73s	remaining: 20.5s
322:	learn: 715.9478444	total: 9.76s	remaining: 20.4s
323:	learn: 715.7676817	total: 9.78s	remaining: 20.4s
324:	learn: 715.6798881	total: 9.81s	remaining: 20.4s
325:	learn: 715.5342728	total: 9.84s	remaining: 20.3s
326:	learn: 715.4581627	total: 9.87s	remaining: 20.3s
327:	learn: 715.4204660	total: 9.9s	remaining: 20.3s
328:	learn: 715.3687367	total: 9.93s	remaining: 20.3s
329:	learn: 715.3035677	total: 9.96s	remaining: 20.2s
330:	learn: 715.2177078	total: 9.99s	remaining: 20.2s
331:	learn: 715.1722111	total: 10s	remaining: 20.2s
332:	learn: 715.0748324	total: 10s	remaining: 20.1s
333:	learn: 715.0053115	total: 10.1s	remaining: 20.1s
334:	learn: 714.8874842	total: 10.1s	remaining: 20.1s
335:	learn: 714.8432424	total: 10.1s	remaining: 20s
336:	learn: 714.7830339	total: 10.2s	remaining: 20s
337:	learn: 714.7451794	total: 10.2s	r

476:	learn: 705.1657954	total: 14.3s	remaining: 15.6s
477:	learn: 705.1509106	total: 14.3s	remaining: 15.6s
478:	learn: 705.1027398	total: 14.3s	remaining: 15.6s
479:	learn: 705.0587636	total: 14.4s	remaining: 15.6s
480:	learn: 704.9665772	total: 14.4s	remaining: 15.5s
481:	learn: 704.9257799	total: 14.4s	remaining: 15.5s
482:	learn: 704.8578915	total: 14.4s	remaining: 15.5s
483:	learn: 704.7776052	total: 14.5s	remaining: 15.4s
484:	learn: 704.7745497	total: 14.5s	remaining: 15.4s
485:	learn: 704.6751299	total: 14.6s	remaining: 15.4s
486:	learn: 704.6055594	total: 14.6s	remaining: 15.4s
487:	learn: 704.5612898	total: 14.7s	remaining: 15.4s
488:	learn: 704.4505216	total: 14.7s	remaining: 15.4s
489:	learn: 704.4226445	total: 14.8s	remaining: 15.4s
490:	learn: 704.3600834	total: 14.8s	remaining: 15.4s
491:	learn: 704.3491246	total: 14.9s	remaining: 15.3s
492:	learn: 704.3146495	total: 14.9s	remaining: 15.3s
493:	learn: 704.2862463	total: 14.9s	remaining: 15.3s
494:	learn: 704.2327739	tota

633:	learn: 697.7090882	total: 20s	remaining: 11.5s
634:	learn: 697.7083853	total: 20s	remaining: 11.5s
635:	learn: 697.7077039	total: 20.1s	remaining: 11.5s
636:	learn: 697.7074019	total: 20.1s	remaining: 11.4s
637:	learn: 697.6267905	total: 20.1s	remaining: 11.4s
638:	learn: 697.5483473	total: 20.1s	remaining: 11.4s
639:	learn: 697.5367633	total: 20.1s	remaining: 11.3s
640:	learn: 697.5075135	total: 20.2s	remaining: 11.3s
641:	learn: 697.4122018	total: 20.2s	remaining: 11.3s
642:	learn: 697.3719435	total: 20.2s	remaining: 11.2s
643:	learn: 697.3473923	total: 20.3s	remaining: 11.2s
644:	learn: 697.2904654	total: 20.3s	remaining: 11.2s
645:	learn: 697.1896887	total: 20.3s	remaining: 11.1s
646:	learn: 697.0768254	total: 20.4s	remaining: 11.1s
647:	learn: 697.0494417	total: 20.4s	remaining: 11.1s
648:	learn: 696.9920380	total: 20.4s	remaining: 11s
649:	learn: 696.9913344	total: 20.4s	remaining: 11s
650:	learn: 696.8940687	total: 20.5s	remaining: 11s
651:	learn: 696.8529700	total: 20.5s	r

787:	learn: 691.5044872	total: 25s	remaining: 6.72s
788:	learn: 691.4691266	total: 25s	remaining: 6.69s
789:	learn: 691.4332531	total: 25.1s	remaining: 6.66s
790:	learn: 691.4055330	total: 25.1s	remaining: 6.63s
791:	learn: 691.4054018	total: 25.1s	remaining: 6.59s
792:	learn: 691.3276207	total: 25.1s	remaining: 6.56s
793:	learn: 691.3271434	total: 25.1s	remaining: 6.52s
794:	learn: 691.3106109	total: 25.2s	remaining: 6.49s
795:	learn: 691.2316897	total: 25.2s	remaining: 6.46s
796:	learn: 691.2155019	total: 25.2s	remaining: 6.43s
797:	learn: 691.1939025	total: 25.3s	remaining: 6.4s
798:	learn: 691.1844353	total: 25.3s	remaining: 6.37s
799:	learn: 691.1839720	total: 25.3s	remaining: 6.33s
800:	learn: 691.1356559	total: 25.4s	remaining: 6.3s
801:	learn: 691.1000636	total: 25.4s	remaining: 6.27s
802:	learn: 691.0864920	total: 25.4s	remaining: 6.24s
803:	learn: 691.0680483	total: 25.5s	remaining: 6.21s
804:	learn: 691.0222863	total: 25.5s	remaining: 6.18s
805:	learn: 690.9661333	total: 25.

942:	learn: 685.5779434	total: 29.7s	remaining: 1.79s
943:	learn: 685.5476950	total: 29.7s	remaining: 1.76s
944:	learn: 685.5096014	total: 29.7s	remaining: 1.73s
945:	learn: 685.4744455	total: 29.8s	remaining: 1.7s
946:	learn: 685.4691173	total: 29.8s	remaining: 1.67s
947:	learn: 685.4417874	total: 29.8s	remaining: 1.64s
948:	learn: 685.4339021	total: 29.9s	remaining: 1.6s
949:	learn: 685.4099325	total: 29.9s	remaining: 1.57s
950:	learn: 685.4092319	total: 29.9s	remaining: 1.54s
951:	learn: 685.3556504	total: 30s	remaining: 1.51s
952:	learn: 685.3199839	total: 30s	remaining: 1.48s
953:	learn: 685.3175356	total: 30s	remaining: 1.45s
954:	learn: 685.2079315	total: 30s	remaining: 1.42s
955:	learn: 685.1910946	total: 30.1s	remaining: 1.38s
956:	learn: 685.1906873	total: 30.1s	remaining: 1.35s
957:	learn: 685.1341769	total: 30.1s	remaining: 1.32s
958:	learn: 685.1334478	total: 30.1s	remaining: 1.29s
959:	learn: 685.0480635	total: 30.2s	remaining: 1.26s
960:	learn: 685.0469048	total: 30.2s	r

<catboost.core.CatBoostRegressor at 0x7f60ba67d650>

In [32]:
model_sel3.best_score_

{'learn': {'RMSE': 683.9133246428332}}

In [33]:
order = model_sel3.feature_importances_.argsort()[::-1]
list(zip(X_train.columns[order], model_sel3.feature_importances_[order]))

[('Distance (KM)', 27.124731212070433),
 ('geopy_distance', 24.495039157958978),
 ('speed', 7.188437255285942),
 ('Arrival at Pickup - Delta', 6.805240652328403),
 ('geospeed', 6.650862222546659),
 ('Destination Lat', 4.516763695377784),
 ('Pickup Lat', 3.679090868562997),
 ('Confirmation - Delta', 3.1827783879494236),
 ('Average_Rating', 2.6648534451232395),
 ('Destination Long', 1.9718105461953241),
 ('Placement - Delta', 1.8930730966782081),
 ('Age', 1.7274350588227798),
 ('No_Of_Orders', 1.4605782529548819),
 ('No_of_Ratings', 1.4111723204716118),
 ('Pickup Long', 1.1197166321082686),
 ('User Id', 0.9605072456390669),
 ('Temperature', 0.4973055802512711),
 ('Arrival at Pickup - Weekday (Mo = 1)', 0.31587595362704735),
 ('Arrival at Pickup - Time', 0.2953044741079411),
 ('Confirmation - Time', 0.2901490687987809),
 ('Placement - Weekday (Mo = 1)', 0.2747719398669325),
 ('Rider Id', 0.2666123798687277),
 ('Pickup - Time', 0.2488977350672137),
 ('Placement - Time', 0.23205447773929777

In [34]:
y__pred1 = model_sel1.predict(X__test)
y__pred2 = model_sel2.predict(X__test)
y__pred3 = model_sel3.predict(X__test)

In [35]:
y__pred = np.mean([y__pred2, y__pred3], axis=0)

In [36]:
submit_file = "data/SampleSubmission.csv"
df_submit = pd.read_csv(submit_file, index_col="Order_No")
df_submit.index = df_test.index
df_submit["Time from Pickup to Arrival"] = y__pred
df_submit.head()

Unnamed: 0_level_0,Time from Pickup to Arrival
Order No,Unnamed: 1_level_1
Order_No_19248,1797.729766
Order_No_2699,3153.713426
Order_No_21486,1861.077485
Order_No_19336,2544.181935
Order_No_20374,2873.289186


In [37]:
df_submit.to_csv("to_submit/SampleSubmission_mean.csv")

<h1> to be improved

- Improve CatBoost Hyperparameters