In [1]:
import gc
import os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from tqdm import tqdm, tqdm_notebook
gc.enable()

In [2]:
BASE_DIR = os.path.abspath(os.path.curdir)
TRAIN = os.path.join(BASE_DIR, "train")

In [3]:
def read_data():
    train = pd.read_csv(os.path.join(TRAIN, 'train.csv'),
                        sep=',')
    train.sort_values(["week", "center_id", "meal_id"], ascending=True, inplace=True)
    meal_info = pd.read_csv(os.path.join(TRAIN, 'meal_info.csv'),
                            sep=',')
    fc_info = pd.read_csv(os.path.join(TRAIN, 'fulfilment_center_info.csv'),
                          sep=',')
    test = pd.read_csv(os.path.join(BASE_DIR, 'test.csv'))
    test.sort_values(["week", "center_id", "meal_id"], ascending=True, inplace=True)
    print("train records - ", train.shape)
    print("test records - ", test.shape)
    print("meal info records - ", meal_info.shape)
    print("fulfilment center records - ", fc_info.shape)
    return train, test, meal_info, fc_info


In [4]:
# Reading the dataset
train_pd, test_pd, meal_info_pd, fc_info_pd = read_data()

train records -  (456548, 9)
test records -  (32573, 8)
meal info records -  (51, 3)
fulfilment center records -  (77, 5)


In [5]:
start_week = train_pd.week.min()
end_week = test_pd.week.max()
print("Train range weeks - {} to {}".format(start_week, train_pd.week.max()))
print("Test range weeks - {} to {}".format(test_pd.week.min(), end_week))

Train range weeks - 1 to 145
Test range weeks - 146 to 155


In [6]:
# np.ceil(train_pd.week.unique() / 13)

In [7]:
# month_order_agg.month.unique() % 12

In [8]:
# quarter_order_agg.quarter.unique() % 4

In [39]:
def create_additional_features(data):
    data.sort_values(["week", "center_id", "meal_id"], ascending=True, inplace=True)
    data.loc[:, "base_minus_checkout"] = np.abs(data.base_price - data.checkout_price)
    data.loc[:, "cuisine_category"] = data["cuisine"] + "-" + data["category"]
    del data["category"]
    del data["cuisine"]
    center_meal_agg = data.groupby(["center_id", "meal_id"]).agg(
        {"base_minus_checkout": ["mean", "std", "max"]})
    center_meal_agg.columns = ['{}_{}'.format(col0, col1) 
                               for col0, col1 in center_meal_agg.columns.ravel()]
    center_meal_agg.reset_index(inplace=True)
    data = pd.merge(data, center_meal_agg, on=["center_id", "meal_id"], how="inner")
    data.loc[:, "is_discount"] = data["base_minus_checkout"] > data["base_minus_checkout_mean"]    
    return data

In [10]:
def correct_dtypes(data, type_dict, is_ohe=False):
    for type_ in type_dict:
        print("Processing Type - ", type_)
        for col in type_dict[type_]:
            print("processing Column - ", col)
            if type_ == "bool":
                data.loc[:, col] = data[col].astype('bool')
            elif type_ == "cat":
                if is_ohe:
                    print("Creating One Hot Encodings...")
                    data = pd.concat([data, pd.get_dummies(data[col])], axis=1)
                    del data[col]
                else:
                    print("Converting to Categorical data types...")
                    data.loc[:, col] = pd.Categorical(data[col])
    return data

In [11]:
def get_time_series_feat(data, is_test=False):
    month = get_average(4)
    quarter = get_average(13)
    for rows in tqdm_notebook(month):
        data.loc[data.week.isin(month[rows]), "month"] = rows
    for rows in tqdm_notebook(quarter):
        data.loc[data.week.isin(quarter[rows]), "quarter"] = rows
    assert (len(data["month"].isnull()) != 0), "Month is coming as NULL"
    assert (len(data["quarter"].isnull()) != 0), "Quarter is coming as NULL"
#     data["quarter"].fillna(max(quarter.keys()) + 1, inplace=True)
    data.loc[:, "month"] = data["month"] % 12
    data.loc[:, "quarter"] = data["quarter"] % 4
    data.month.replace(to_replace={0: 12}, inplace=True)
    data.quarter.replace(to_replace={0: 4}, inplace=True)
    if is_test:
        pass
    else:
        monthly_order_agg = data.groupby(["month", "center_id", "meal_id"]).agg({"num_orders": "mean"}).rename(
            columns={"num_orders": "num_order_monthly_mean"}).reset_index()
        quarterly_order_agg = data.groupby(["quarter", "center_id", "meal_id"]).agg({"num_orders": "mean"}).rename(
            columns={"num_orders": "num_order_quarterly_mean"}).reset_index()
#         data["month"].fillna(max(month.keys()) + 1, inplace=True)
#         data["quarter"].fillna(max(quarter.keys()) + 1, inplace=True)
        return monthly_order_agg, quarterly_order_agg

In [12]:
# temp = get_time_series_feat(train_pd)
# get_average(13)

In [13]:
def get_window(window, stop, start=1):
    start = start
    while start <= stop:
        if stop - start < window:
            yield (start, stop)
        else:
            yield (start, start + window)
        start = start + window


def get_average(window, start=1, stop=end_week+1):
    windows_list = get_window(window, stop, start)
    window_week_map = {}
    for counter, range_ in enumerate(windows_list):
        start, stop = range_
        window_week_map[counter + 1] = list(range(start, stop))
    return window_week_map

In [14]:
# print(get_average(52).keys())
# list(map(lambda x: x%52, list(get_average(52).keys())))

In [15]:
dtypes_dict = {'bool': ["emailer_for_promotion", "homepage_featured"],
               'cat': ["cuisine_category", "center_type"]}
joined_pd = pd.merge(train_pd, meal_info_pd, on="meal_id", how="left")
joined_pd = pd.merge(joined_pd, fc_info_pd, on="center_id", how="left")
joined_pd.sort_values(["week", "center_id", "meal_id"], ascending=True, inplace=True)
print("All the data merged shape - ", joined_pd.shape)
joined_pd = create_additional_features(joined_pd)

All the data merged shape -  (456548, 15)


In [16]:
joined_pd = correct_dtypes(joined_pd, dtypes_dict, is_ohe=True)

Processing Type -  bool
processing Column -  emailer_for_promotion
processing Column -  homepage_featured
Processing Type -  cat
processing Column -  cuisine_category
Creating One Hot Encodings...
processing Column -  center_type
Creating One Hot Encodings...


In [17]:
joined_pd.shape

(456548, 37)

In [18]:
month_order_agg, quarter_order_agg = get_time_series_feat(joined_pd)
joined_pd = pd.merge(joined_pd, month_order_agg, on=["month", "center_id", "meal_id"], how="left")
joined_pd = pd.merge(joined_pd, quarter_order_agg, on=["quarter", "center_id", "meal_id"], how="left")

HBox(children=(IntProgress(value=0, max=39), HTML(value='')))




HBox(children=(IntProgress(value=0, max=12), HTML(value='')))




In [19]:
joined_pd.shape

(456548, 41)

In [20]:
print(joined_pd.month.value_counts(dropna=False))
print(joined_pd.quarter.value_counts(dropna=False))

1.0     40478
11.0    38626
12.0    38556
10.0    38299
7.0     38219
8.0     38138
9.0     38127
6.0     37807
5.0     37720
3.0     37207
4.0     36777
2.0     36594
Name: month, dtype: int64
3.0    124373
2.0    122513
1.0    120601
4.0     89061
Name: quarter, dtype: int64


In [21]:
joined_test = pd.merge(test_pd, meal_info_pd, on="meal_id", how="left")
joined_test = pd.merge(joined_test, fc_info_pd, on="center_id", how="left")
joined_test.sort_values(["week", "center_id", "meal_id"], ascending=True, inplace=True)
print("All the TEST data merged shape - ", joined_test.shape)

All the TEST data merged shape -  (32573, 14)


In [22]:
joined_test = create_additional_features(joined_test)
joined_test = correct_dtypes(joined_test, dtypes_dict, is_ohe=True)
joined_test.shape

Processing Type -  bool
processing Column -  emailer_for_promotion
processing Column -  homepage_featured
Processing Type -  cat
processing Column -  cuisine_category
Creating One Hot Encodings...
processing Column -  center_type
Creating One Hot Encodings...


(32573, 36)

In [23]:
get_time_series_feat(joined_test, is_test=True)

HBox(children=(IntProgress(value=0, max=39), HTML(value='')))




HBox(children=(IntProgress(value=0, max=12), HTML(value='')))




In [24]:
joined_test = pd.merge(joined_test, month_order_agg, on=["month", "center_id", "meal_id"], how="left")
joined_test = pd.merge(joined_test, quarter_order_agg, on=["quarter", "center_id", "meal_id"], how="left")

In [25]:
# joined_test.head().T

In [26]:
def data_transform(data_pd, rem_cols, is_test=False):
    if is_test:
        remove_cols = rem_cols
        predictor = None
    else:
        remove_cols = rem_cols + ["num_orders"]
        # ["id", 'week', "base_price", "center_id", "meal_id"]
        predictor = "num_orders"

    req_cols = []
    for col in data_pd.columns:
        if col not in remove_cols:
            req_cols.append(col)
    print("columns used for prediction - ", req_cols)
    dataset1 = data_pd[req_cols]

    if is_test:
        target1 = None
    else:
        target1 = data_pd[predictor].values
        print("Target shape - ", target1.shape)
    print("Input data shape - ", dataset1.shape)
    return dataset1, target1

In [27]:
joined_pd.shape

(456548, 41)

In [57]:
joined_pd.base_minus_checkout_std.fillna(0.0, inplace=True)

In [58]:
joined_pd.isnull().sum()

id                          0
week                        0
center_id                   0
meal_id                     0
checkout_price              0
base_price                  0
emailer_for_promotion       0
homepage_featured           0
num_orders                  0
city_code                   0
region_code                 0
op_area                     0
base_minus_checkout         0
base_minus_checkout_mean    0
base_minus_checkout_std     0
base_minus_checkout_max     0
is_discount                 0
Continental-Beverages       0
Continental-Fish            0
Continental-Pizza           0
Continental-Seafood         0
Indian-Beverages            0
Indian-Biryani              0
Indian-Desert               0
Indian-Rice Bowl            0
Italian-Beverages           0
Italian-Pasta               0
Italian-Salad               0
Italian-Sandwich            0
Thai-Beverages              0
Thai-Extras                 0
Thai-Other Snacks           0
Thai-Soup                   0
Thai-Start

In [56]:
# joined_pd[(joined_pd["region_code"] == 85) & 
#           (joined_pd["Continental-Fish"] == 1) & 
#           (joined_pd["week"] == 85) & 
#           (joined_pd["TYPE_C"] == 1)]["base_minus_checkout_std"]

In [55]:
# joined_pd[joined_pd.base_minus_checkout_std.isnull()].T

In [62]:
joined_test.base_minus_checkout_std.fillna(0.0, inplace=True)

In [63]:
joined_test.isnull().sum()

id                            0
week                          0
center_id                     0
meal_id                       0
checkout_price                0
base_price                    0
emailer_for_promotion         0
homepage_featured             0
city_code                     0
region_code                   0
op_area                       0
base_minus_checkout           0
base_minus_checkout_mean      0
base_minus_checkout_std       0
base_minus_checkout_max       0
is_discount                   0
Continental-Beverages         0
Continental-Fish              0
Continental-Pizza             0
Continental-Seafood           0
Indian-Beverages              0
Indian-Biryani                0
Indian-Desert                 0
Indian-Rice Bowl              0
Italian-Beverages             0
Italian-Pasta                 0
Italian-Salad                 0
Italian-Sandwich              0
Thai-Beverages                0
Thai-Extras                   0
Thai-Other Snacks             0
Thai-Sou

In [66]:
meal_info_pd.head()

Unnamed: 0,meal_id,category,cuisine
0,1885,Beverages,Thai
1,1993,Beverages,Thai
2,2539,Beverages,Thai
3,1248,Beverages,Indian
4,2631,Beverages,Indian


In [67]:
month_meal = pd.merge(month_order_agg, meal_info_pd, on=["meal_id"], how="inner")
month_meal = pd.merge(month_meal, fc_info_pd, on=["center_id"], how="inner")

In [None]:
quarter_meal = pd.merge(month_order_agg, meal_info_pd, on=["meal_id"], how="inner")
month_meal = pd.merge(month_meal, fc_info_pd, on=["center_id"], how="inner")

In [65]:
joined_test[joined_test.num_order_monthly_mean.isnull()].T

Unnamed: 0,263,264,265,266,267,268,269,1245,1246,1247,...,32496,32497,32498,32509,32510,32511,32512,32513,32541,32572
id,1315410,1492833,1136861,1295531,1280308,1167103,1389109,1152557,1262290,1118117,...,1014910,1174372,1169548,1411001,1179493,1048344,1462332,1279293,1329839,1030237
week,149,150,151,152,153,154,155,149,150,151,...,153,154,155,154,149,151,149,151,150,155
center_id,10,10,10,10,10,10,10,13,13,13,...,73,73,73,80,92,92,92,92,139,139
meal_id,2104,2104,2104,2104,2104,2104,2104,2104,2104,2104,...,2956,2956,2956,1902,2104,2104,2577,2577,1525,2577
checkout_price,581.03,582.03,581.03,582.03,581.03,680.03,680.03,583.03,582.03,582.03,...,631.53,679.03,678.03,445.23,631.53,629.53,320.13,319.13,262.93,320.13
base_price,581.03,583.03,583.03,582.03,581.03,680.03,679.03,583.03,581.03,583.03,...,630.53,678.03,679.03,447.23,629.53,629.53,321.13,321.13,262.93,320.13
emailer_for_promotion,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
homepage_featured,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
city_code,590,590,590,590,590,590,590,590,590,590,...,576,576,576,604,526,526,526,526,693,693
region_code,56,56,56,56,56,56,56,56,56,56,...,34,34,34,56,34,34,34,34,34,34


In [69]:
joined_pd.columns

Index(['id', 'week', 'center_id', 'meal_id', 'checkout_price', 'base_price',
       'emailer_for_promotion', 'homepage_featured', 'num_orders', 'city_code',
       'region_code', 'op_area', 'base_minus_checkout',
       'base_minus_checkout_mean', 'base_minus_checkout_std',
       'base_minus_checkout_max', 'is_discount', 'Continental-Beverages',
       'Continental-Fish', 'Continental-Pizza', 'Continental-Seafood',
       'Indian-Beverages', 'Indian-Biryani', 'Indian-Desert',
       'Indian-Rice Bowl', 'Italian-Beverages', 'Italian-Pasta',
       'Italian-Salad', 'Italian-Sandwich', 'Thai-Beverages', 'Thai-Extras',
       'Thai-Other Snacks', 'Thai-Soup', 'Thai-Starters', 'TYPE_A', 'TYPE_B',
       'TYPE_C', 'month', 'quarter', 'num_order_monthly_mean',
       'num_order_quarterly_mean'],
      dtype='object')

In [70]:
cols_to_remove = ['id', 'week', 'center_id', 'meal_id']

In [71]:
train_set, train_target = data_transform(joined_pd.copy(), rem_cols=cols_to_remove)
test_set, _ = data_transform(joined_test.copy(), rem_cols=cols_to_remove, is_test=True)

columns used for prediction -  ['checkout_price', 'base_price', 'emailer_for_promotion', 'homepage_featured', 'city_code', 'region_code', 'op_area', 'base_minus_checkout', 'base_minus_checkout_mean', 'base_minus_checkout_std', 'base_minus_checkout_max', 'is_discount', 'Continental-Beverages', 'Continental-Fish', 'Continental-Pizza', 'Continental-Seafood', 'Indian-Beverages', 'Indian-Biryani', 'Indian-Desert', 'Indian-Rice Bowl', 'Italian-Beverages', 'Italian-Pasta', 'Italian-Salad', 'Italian-Sandwich', 'Thai-Beverages', 'Thai-Extras', 'Thai-Other Snacks', 'Thai-Soup', 'Thai-Starters', 'TYPE_A', 'TYPE_B', 'TYPE_C', 'month', 'quarter', 'num_order_monthly_mean', 'num_order_quarterly_mean']
Target shape -  (456548,)
Input data shape -  (456548, 36)
columns used for prediction -  ['checkout_price', 'base_price', 'emailer_for_promotion', 'homepage_featured', 'city_code', 'region_code', 'op_area', 'base_minus_checkout', 'base_minus_checkout_mean', 'base_minus_checkout_std', 'base_minus_checko

In [72]:
from sklearn.model_selection import ShuffleSplit

shuffle = ShuffleSplit(n_splits=5, test_size=0.2, random_state=2019)
train_index, test_index = list(shuffle.split(train_set))[2]
print("Train - Val split | ", len(train_index), " - ", len(test_index))

Train - Val split |  365238  -  91310


In [73]:
X_train = train_set.iloc[train_index].values
X_val = train_set.iloc[test_index].values
y_train = train_target[train_index]
y_val = train_target[test_index]

X_test = test_set.values

print("TRAIN SHAPE || data - {} | target - {}".format(X_train.shape,
                                                      y_train.shape))
print("VAL SHAPE || data - {} | target - {}".format(X_val.shape,
                                                    y_val.shape))
print("TEST SHAPE || data - {} ".format(X_test.shape))

TRAIN SHAPE || data - (365238, 36) | target - (365238,)
VAL SHAPE || data - (91310, 36) | target - (91310,)
TEST SHAPE || data - (32573, 36) 


In [74]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error


def rms_log_error(true, pred):
    return np.sqrt(mean_squared_error(true, pred))


def model_results(model):
    y_train_pred = model.predict(X_train)
    y_val_pred = model.predict(X_val)

    print("Train In-sample metric : ", rms_log_error(y_train,
                                                     y_train_pred))
    print("Validation metric : ", rms_log_error(y_val,
                                                y_val_pred))


In [75]:
# Train In-sample metric :  166.52392347627202
# Validation metric :  163.69684683292664
rf_config = {"n_estimators": 1000,
             "max_depth": 8,
             "min_samples_split": 0.003,
             "max_features": 'sqrt',
             "random_state": 111,
             "n_jobs": 4,
             "verbose": 1}

rf = RandomForestRegressor(**rf_config)

rf.fit(X_train, y_train)

model_results(rf)


[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   10.1s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:   44.5s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:  1.7min
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:  3.0min
[Parallel(n_jobs=4)]: Done 1000 out of 1000 | elapsed:  3.8min finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.5s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    2.3s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    5.3s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    9.5s
[Parallel(n_jobs=4)]: Done 1000 out of 1000 | elapsed:   12.3s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.6s
[Paral

Train In-sample metric :  166.52392347627202
Validation metric :  163.69684683292664


[Parallel(n_jobs=4)]: Done 1000 out of 1000 | elapsed:    2.9s finished


In [None]:
# Train In-sample metric :  105.15903034359839
# Validation metric :  136.502376993227
rf_config = {"n_estimators": 800,
             "max_depth": 20,
             "max_features": 'sqrt',
             "random_state": 111,
             "n_jobs": 4,
             "verbose": 1}

rf = RandomForestRegressor(**rf_config)

rf.fit(X_train, y_train)

model_results(rf)

[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   19.6s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:  1.4min
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:  3.4min


In [None]:
rf_config = {"n_estimators": 1500,
             "max_depth": 15,
             "max_features": 'log2',
             "min_samples_split": 0.003, 
             "random_state": 2019,
             "n_jobs": 4,
             "verbose": 1}

rf = RandomForestRegressor(**rf_config)

rf.fit(X_train, y_train)

model_results(rf)

In [87]:
# max_depth = 12
# Train In-sample metric :  197.20422421330088
# Validation metric :  188.3555629074162
rf_config = {"n_estimators": 1500,
             "max_depth": 10,
             "max_features": 'log2',
             "min_samples_split": 0.003, 
             "random_state": 2019,
             "n_jobs": 4,
             "verbose": 1}

rf = RandomForestRegressor(**rf_config)

rf.fit(X_train, y_train)

model_results(rf)

[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   11.3s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:   49.1s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:  1.9min
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:  3.6min
[Parallel(n_jobs=4)]: Done 1242 tasks      | elapsed:  5.7min
[Parallel(n_jobs=4)]: Done 1500 out of 1500 | elapsed:  7.0min finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.7s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    3.3s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    8.3s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:   14.6s
[Parallel(n_jobs=4)]: Done 1242 tasks      | elapsed:   21.8s
[Parallel(n_jobs=4)]: Done 1500 out of 1500 | elapsed:   26.9s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Par

Train In-sample metric :  198.0867908210644
Validation metric :  189.17593860992582


[Parallel(n_jobs=4)]: Done 1500 out of 1500 | elapsed:    6.5s finished


In [79]:
rf_config = {"n_estimators": 700,
             "max_depth": 12,
             "max_features": 'sqrt',
             "random_state": 111,
             "n_jobs": 4,
             "verbose": 1}

rf = RandomForestRegressor(**rf_config)

rf.fit(X_train, y_train)

model_results(rf)

[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   13.8s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:  1.0min
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:  2.3min
[Parallel(n_jobs=4)]: Done 700 out of 700 | elapsed:  3.8min finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.9s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    3.9s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    9.1s
[Parallel(n_jobs=4)]: Done 700 out of 700 | elapsed:   14.2s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.2s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    1.0s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    2.3s


Train In-sample metric :  128.43748620593917
Validation metric :  143.0822366528206


[Parallel(n_jobs=4)]: Done 700 out of 700 | elapsed:    3.5s finished
