# Pickup to Delivery Overall

In [1]:
import os
import sys
import datetime
import pandas as pd
pd.options.display.float_format = '{:.2f}'.format
import numpy as np
import pickle

sys.path.insert(0, os.path.expanduser('./'))
import query_runner as qr
import utils
from estimator import BaselineModel_sum, BaselineModel_mean, LinearModel

In [2]:
base_query_path = './queries/'
dwh_config, livedb_config, parameters_config = utils.load_config(config_file='./config.ini')
datalake_connection = qr.create_connection(db='datalake')
#monolith_connection = qr.create_connection(user=livedb_config['monolith_username'], password=livedb_config['monolith_password'], db='livedb')
#dispatching_db_connection = qr.create_connection(user=livedb_config['dispatching_db_username'], password=livedb_config['dispatching_db_password'], db='dispatchingdb')

INFO:trino.auth:keyring module not found. OAuth2 token will not be stored in keyring.


In [3]:
start_date = parameters_config['start_date']
end_date = parameters_config['end_date']
country_code = parameters_config['country_code']
cities = parameters_config['cities']

print(f'Start date: {start_date} | End date: {end_date} | Countries: {country_code} | Cities: {cities}')

Start date: 2024-09-30 | End date: 2024-10-20 | Countries: ES | Cities: 'MAD', 'BCN', 'SEV', 'ALC'


In [4]:
parameters = {
    'start_date': start_date,
    'end_date': end_date,
    'country_code': country_code,
    'cities': cities
}

## Load the dataset

In [5]:
data = pd.read_parquet("data/parquet/dataframe.parquet")

## Hyperparameters

In [6]:
test_set_perc = 0.1
days_for_test = 7
k_cv = 5

## Database split

As we have partitioned the data by city and creation date, we can use this information to split the data. This will help to avoid data leakage, as we will not have data from the future in the training set.
This is much better than just sorting the data by the creation timestamp and taking 10% of the dataset as test set, as we did before.

In [7]:
# We take the last week of the dataset to test the model
begin_test_date = pd.to_datetime(end_date) - pd.Timedelta(days=days_for_test-1)
begin_test_date = begin_test_date.strftime("%Y-%m-%d")
print(f'Start date: {start_date} | Begin test date: {begin_test_date} | End date: {end_date}')

Start date: 2024-09-30 | Begin test date: 2024-10-14 | End date: 2024-10-20


In [8]:
X_train = pd.read_parquet("data/parquet/dataframe.parquet/", filters=[('creation_date', '<', begin_test_date)])
X_train.head()

Unnamed: 0,country_code,order_id,courier_id,creation_timestamp,activation_timestamp,transport,pickup_timestamp,delivery_entering_timestamp,delivery_timestamp,pickup_latitude,...,time_zone,pickup_latitude_rad,pickup_longitude_rad,delivery_latitude_rad,delivery_longitude_rad,pd_distance_haversine_m,pd_distance_haversine_m_sk,pd_distance_manhattan_m,creation_date,city_code
0,ES,100901000206,169065712,2024-09-30 19:00:49+00:00,2024-09-30 19:00:52+00:00,CAR,2024-09-30 19:20:41.337000+00:00,2024-09-30 19:30:56+00:00,2024-09-30 19:38:11+00:00,38.37,...,Europe/Madrid,0.67,-0.01,0.67,-0.01,3147.42,3147.42,4371.55,2024-09-30,ALC
1,ES,100901487351,173633632,2024-09-30 21:54:58+00:00,2024-09-30 21:54:59+00:00,MOTORBIKE,2024-09-30 21:58:40.678000+00:00,2024-09-30 22:12:29+00:00,2024-09-30 22:16:54+00:00,38.34,...,Europe/Madrid,0.67,-0.01,0.67,-0.01,1959.9,1959.9,2228.78,2024-09-30,ALC
2,ES,100901615508,2320936,2024-09-30 23:17:55+00:00,2024-09-30 23:17:55+00:00,CAR,2024-09-30 23:25:52.013000+00:00,2024-09-30 23:34:12+00:00,2024-09-30 23:35:27+00:00,38.43,...,Europe/Madrid,0.67,-0.01,0.67,-0.01,3043.08,3043.08,4304.54,2024-09-30,ALC
3,ES,100900153723,174681565,2024-09-30 12:50:30+00:00,2024-09-30 12:50:31+00:00,MOTORBIKE,2024-09-30 12:51:54.134000+00:00,2024-09-30 12:53:27+00:00,2024-09-30 12:56:56+00:00,38.39,...,Europe/Madrid,0.67,-0.01,0.67,-0.01,384.77,384.77,538.64,2024-09-30,ALC
4,ES,100900235936,142421923,2024-09-30 13:23:50+00:00,2024-09-30 13:23:51+00:00,CAR,2024-09-30 13:49:36.097000+00:00,2024-09-30 13:57:33+00:00,2024-09-30 14:01:33+00:00,38.41,...,Europe/Madrid,0.67,-0.01,0.67,-0.01,2321.1,2321.1,2443.13,2024-09-30,ALC


In [9]:
# Check that there are no nulls deriving from a wrong writing of parquet files (appending instead of overwriting)
X_train.isnull().sum().sum()

np.int64(0)

In [10]:
y_train = X_train['delivery_entering_timestamp'] - X_train['pickup_timestamp']
y_train = pd.Series(y_train, name='pickup_to_delivery')
y_train

0        0 days 00:10:14.663000
1        0 days 00:13:48.322000
2        0 days 00:08:19.987000
3        0 days 00:01:32.866000
4        0 days 00:07:56.903000
                  ...          
678746   0 days 00:03:53.637000
678747   0 days 00:03:57.223000
678748   0 days 00:06:54.631000
678749   0 days 00:23:10.249000
678750   0 days 00:05:43.235000
Name: pickup_to_delivery, Length: 678751, dtype: timedelta64[ns]

In [11]:
y_train.isnull().sum().sum()

np.int64(0)

In [12]:
X_test = pd.read_parquet("data/parquet/dataframe.parquet", filters=[('creation_date', '>=', begin_test_date)])
X_test.head()

Unnamed: 0,country_code,order_id,courier_id,creation_timestamp,activation_timestamp,transport,pickup_timestamp,delivery_entering_timestamp,delivery_timestamp,pickup_latitude,...,time_zone,pickup_latitude_rad,pickup_longitude_rad,delivery_latitude_rad,delivery_longitude_rad,pd_distance_haversine_m,pd_distance_haversine_m_sk,pd_distance_manhattan_m,creation_date,city_code
0,ES,100932167582,10191824,2024-10-14 15:51:51+00:00,2024-10-14 15:51:52+00:00,MOTORBIKE,2024-10-14 16:00:14.017000+00:00,2024-10-14 16:06:55+00:00,2024-10-14 16:08:56+00:00,38.36,...,Europe/Madrid,0.67,-0.01,0.67,-0.01,1817.53,1817.53,2562.96,2024-10-14,ALC
1,ES,100932256816,50618208,2024-10-14 16:36:30+00:00,2024-10-14 16:36:31+00:00,CAR,2024-10-14 16:45:53.061000+00:00,2024-10-14 16:53:56+00:00,2024-10-14 16:56:21+00:00,38.35,...,Europe/Madrid,0.67,-0.01,0.67,-0.01,757.29,757.29,1071.86,2024-10-14,ALC
2,ES,100932496959,170202435,2024-10-14 18:26:33+00:00,2024-10-14 18:26:34+00:00,MOTORBIKE,2024-10-14 18:35:05.220000+00:00,2024-10-14 18:41:32+00:00,2024-10-14 18:43:20+00:00,38.43,...,Europe/Madrid,0.67,-0.01,0.67,-0.01,2541.01,2541.01,3531.02,2024-10-14,ALC
3,ES,100932664161,166731686,2024-10-14 19:30:57+00:00,2024-10-14 19:30:59+00:00,CAR,2024-10-14 19:58:06.210000+00:00,2024-10-14 20:03:31+00:00,2024-10-14 20:04:48+00:00,38.4,...,Europe/Madrid,0.67,-0.01,0.67,-0.01,1432.01,1432.01,1931.87,2024-10-14,ALC
4,ES,100932854728,168741567,2024-10-14 20:35:29+00:00,2024-10-14 20:35:30+00:00,CAR,2024-10-14 20:57:11.030000+00:00,2024-10-14 21:00:16+00:00,2024-10-14 21:04:55+00:00,38.36,...,Europe/Madrid,0.67,-0.01,0.67,-0.01,477.65,477.65,544.53,2024-10-14,ALC


In [13]:
X_test.isnull().sum().sum()

np.int64(0)

In [14]:
y_test = (X_test['delivery_entering_timestamp'] - X_test['pickup_timestamp']).dt.total_seconds()
y_test = pd.Series(y_test, dtype=np.float64, name='pickup_to_delivery')
y_test

0        400.98
1        482.94
2        386.78
3        324.79
4        184.97
          ...  
349864   632.29
349865   557.10
349866   304.41
349867   522.05
349868   291.69
Name: pickup_to_delivery, Length: 349869, dtype: float64

In [15]:
y_test.isnull().sum().sum()

np.int64(0)

In [16]:
print("Train datasets shapes: ", X_train.shape, y_train.shape)
print("Test datasets shapes: ", X_test.shape, y_test.shape)

Train datasets shapes:  (678751, 23) (678751,)
Test datasets shapes:  (349869, 23) (349869,)


## Baseline Models

### BaselineModel_sum

In [17]:
start = datetime.datetime.now()
print(f"Start time: {start}")

Start time: 2025-04-16 18:44:14.713528


In [18]:
model_bl_sum = BaselineModel_sum()
model_bl_sum.fit(X_train, y_train)

INFO:root:Train datasets shapes: X: (678751, 23), y: (678751,)


<estimator.BaselineModel_sum at 0x11db81e50>

In [19]:
end = datetime.datetime.now()
print(f"End time: {end}")
model_bl_sum_time = end - start
print(f"Time elapsed: {model_bl_sum_time}")

End time: 2025-04-16 18:44:15.073964
Time elapsed: 0:00:00.360436


In [20]:
X_test_expanded = X_test.copy()
X_test_expanded['y_test_predicted'] = model_bl_sum.predict(X_test)
X_test_expanded['y_test'] = (X_test_expanded['delivery_entering_timestamp'] - X_test_expanded['pickup_timestamp']).dt.total_seconds()
X_test_expanded['diff'] = X_test_expanded['y_test_predicted'] - X_test_expanded['y_test']
X_test_expanded

Unnamed: 0,country_code,order_id,courier_id,creation_timestamp,activation_timestamp,transport,pickup_timestamp,delivery_entering_timestamp,delivery_timestamp,pickup_latitude,...,delivery_latitude_rad,delivery_longitude_rad,pd_distance_haversine_m,pd_distance_haversine_m_sk,pd_distance_manhattan_m,creation_date,city_code,y_test_predicted,y_test,diff
0,ES,100932167582,10191824,2024-10-14 15:51:51+00:00,2024-10-14 15:51:52+00:00,MOTORBIKE,2024-10-14 16:00:14.017000+00:00,2024-10-14 16:06:55+00:00,2024-10-14 16:08:56+00:00,38.36,...,0.67,-0.01,1817.53,1817.53,2562.96,2024-10-14,ALC,581.14,400.98,180.15
1,ES,100932256816,50618208,2024-10-14 16:36:30+00:00,2024-10-14 16:36:31+00:00,CAR,2024-10-14 16:45:53.061000+00:00,2024-10-14 16:53:56+00:00,2024-10-14 16:56:21+00:00,38.35,...,0.67,-0.01,757.29,757.29,1071.86,2024-10-14,ALC,213.69,482.94,-269.25
2,ES,100932496959,170202435,2024-10-14 18:26:33+00:00,2024-10-14 18:26:34+00:00,MOTORBIKE,2024-10-14 18:35:05.220000+00:00,2024-10-14 18:41:32+00:00,2024-10-14 18:43:20+00:00,38.43,...,0.67,-0.01,2541.01,2541.01,3531.02,2024-10-14,ALC,812.46,386.78,425.68
3,ES,100932664161,166731686,2024-10-14 19:30:57+00:00,2024-10-14 19:30:59+00:00,CAR,2024-10-14 19:58:06.210000+00:00,2024-10-14 20:03:31+00:00,2024-10-14 20:04:48+00:00,38.40,...,0.67,-0.01,1432.01,1432.01,1931.87,2024-10-14,ALC,404.09,324.79,79.30
4,ES,100932854728,168741567,2024-10-14 20:35:29+00:00,2024-10-14 20:35:30+00:00,CAR,2024-10-14 20:57:11.030000+00:00,2024-10-14 21:00:16+00:00,2024-10-14 21:04:55+00:00,38.36,...,0.67,-0.01,477.65,477.65,544.53,2024-10-14,ALC,134.78,184.97,-50.19
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
349864,ES,100947015472,176969368,2024-10-21 00:57:33+00:00,2024-10-21 00:57:34+00:00,BICYCLE,2024-10-21 01:12:35.705000+00:00,2024-10-21 01:23:08+00:00,2024-10-21 01:32:38+00:00,37.38,...,0.65,-0.10,2098.39,2098.39,2925.83,2024-10-20,SEV,788.86,632.29,156.57
349865,ES,100944621553,159448683,2024-10-20 10:33:25+00:00,2024-10-20 10:33:26+00:00,CAR,2024-10-20 11:01:33.897000+00:00,2024-10-20 11:10:51+00:00,2024-10-20 11:12:43+00:00,37.38,...,0.65,-0.10,2080.63,2080.63,2601.94,2024-10-20,SEV,587.11,557.10,30.01
349866,ES,100945007795,145859188,2024-10-20 13:32:20+00:00,2024-10-20 13:32:21+00:00,BICYCLE,2024-10-20 13:39:11.591000+00:00,2024-10-20 13:44:16+00:00,2024-10-20 13:48:51+00:00,37.38,...,0.65,-0.10,622.01,622.01,824.28,2024-10-20,SEV,233.84,304.41,-70.57
349867,ES,100945051115,177805460,2024-10-20 13:44:47+00:00,2024-10-20 13:44:49+00:00,MOTORBIKE,2024-10-20 13:53:20.950000+00:00,2024-10-20 14:02:03+00:00,2024-10-20 14:02:46+00:00,37.35,...,0.65,-0.11,2980.41,2980.41,3963.16,2024-10-20,SEV,952.95,522.05,430.90


In [21]:
model_bl_sum.predict(X_test.iloc[0])

0   581.14
Name: pickup_to_delivery_predicted, dtype: float64

### Evaluation pipeline

In [22]:
model_bl_sum_eval = model_bl_sum.evaluate(X_test, y_test)
model_bl_sum_eval

y_hat <class 'pandas.core.series.Series'> float64
y_test <class 'pandas.core.series.Series'> float64


(np.float64(145.6280391037287), np.float64(52902.38788391615))

### BaselineModel_mean

In [23]:
start = datetime.datetime.now()
print(f"Start time: {start}")

Start time: 2025-04-16 18:44:58.930929


In [24]:
model_bl_mean = BaselineModel_mean()
model_bl_mean.fit(X_train, y_train)

INFO:root:Train datasets shapes: X: (678751, 23), y: (678751,)


<estimator.BaselineModel_mean at 0x11db82330>

In [25]:
end = datetime.datetime.now()
print(f"End time: {end}")
model_bl_mean_time = end - start
print(f"Time elapsed: {model_bl_mean_time}")

End time: 2025-04-16 18:44:59.228450
Time elapsed: 0:00:00.297521


In [26]:
X_test_expanded = X_test.copy()
X_test_expanded['y_test_predicted'] = model_bl_mean.predict(X_test)
X_test_expanded['y_test'] = (X_test_expanded['delivery_entering_timestamp'] - X_test_expanded['pickup_timestamp']).dt.total_seconds()
X_test_expanded['diff'] = X_test_expanded['y_test_predicted'] - X_test_expanded['y_test']
X_test_expanded

Unnamed: 0,country_code,order_id,courier_id,creation_timestamp,activation_timestamp,transport,pickup_timestamp,delivery_entering_timestamp,delivery_timestamp,pickup_latitude,...,delivery_latitude_rad,delivery_longitude_rad,pd_distance_haversine_m,pd_distance_haversine_m_sk,pd_distance_manhattan_m,creation_date,city_code,y_test_predicted,y_test,diff
0,ES,100932167582,10191824,2024-10-14 15:51:51+00:00,2024-10-14 15:51:52+00:00,MOTORBIKE,2024-10-14 16:00:14.017000+00:00,2024-10-14 16:06:55+00:00,2024-10-14 16:08:56+00:00,38.36,...,0.67,-0.01,1817.53,1817.53,2562.96,2024-10-14,ALC,408.65,400.98,7.67
1,ES,100932256816,50618208,2024-10-14 16:36:30+00:00,2024-10-14 16:36:31+00:00,CAR,2024-10-14 16:45:53.061000+00:00,2024-10-14 16:53:56+00:00,2024-10-14 16:56:21+00:00,38.35,...,0.67,-0.01,757.29,757.29,1071.86,2024-10-14,ALC,305.30,482.94,-177.64
2,ES,100932496959,170202435,2024-10-14 18:26:33+00:00,2024-10-14 18:26:34+00:00,MOTORBIKE,2024-10-14 18:35:05.220000+00:00,2024-10-14 18:41:32+00:00,2024-10-14 18:43:20+00:00,38.43,...,0.67,-0.01,2541.01,2541.01,3531.02,2024-10-14,ALC,571.32,386.78,184.54
3,ES,100932664161,166731686,2024-10-14 19:30:57+00:00,2024-10-14 19:30:59+00:00,CAR,2024-10-14 19:58:06.210000+00:00,2024-10-14 20:03:31+00:00,2024-10-14 20:04:48+00:00,38.40,...,0.67,-0.01,1432.01,1432.01,1931.87,2024-10-14,ALC,577.31,324.79,252.52
4,ES,100932854728,168741567,2024-10-14 20:35:29+00:00,2024-10-14 20:35:30+00:00,CAR,2024-10-14 20:57:11.030000+00:00,2024-10-14 21:00:16+00:00,2024-10-14 21:04:55+00:00,38.36,...,0.67,-0.01,477.65,477.65,544.53,2024-10-14,ALC,192.57,184.97,7.60
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
349864,ES,100947015472,176969368,2024-10-21 00:57:33+00:00,2024-10-21 00:57:34+00:00,BICYCLE,2024-10-21 01:12:35.705000+00:00,2024-10-21 01:23:08+00:00,2024-10-21 01:32:38+00:00,37.38,...,0.65,-0.10,2098.39,2098.39,2925.83,2024-10-20,SEV,689.62,632.29,57.32
349865,ES,100944621553,159448683,2024-10-20 10:33:25+00:00,2024-10-20 10:33:26+00:00,CAR,2024-10-20 11:01:33.897000+00:00,2024-10-20 11:10:51+00:00,2024-10-20 11:12:43+00:00,37.38,...,0.65,-0.10,2080.63,2080.63,2601.94,2024-10-20,SEV,838.81,557.10,281.70
349866,ES,100945007795,145859188,2024-10-20 13:32:20+00:00,2024-10-20 13:32:21+00:00,BICYCLE,2024-10-20 13:39:11.591000+00:00,2024-10-20 13:44:16+00:00,2024-10-20 13:48:51+00:00,37.38,...,0.65,-0.10,622.01,622.01,824.28,2024-10-20,SEV,204.42,304.41,-99.99
349867,ES,100945051115,177805460,2024-10-20 13:44:47+00:00,2024-10-20 13:44:49+00:00,MOTORBIKE,2024-10-20 13:53:20.950000+00:00,2024-10-20 14:02:03+00:00,2024-10-20 14:02:46+00:00,37.35,...,0.65,-0.11,2980.41,2980.41,3963.16,2024-10-20,SEV,670.11,522.05,148.06


In [27]:
model_bl_mean.predict(X_test.iloc[0])

0   408.65
Name: pickup_to_delivery_predicted, dtype: float64

### Evaluation pipeline

In [28]:
model_bl_mean_eval = model_bl_mean.evaluate(X_test, y_test)
model_bl_mean_eval

(np.float64(161.23709324791398), np.float64(64837.86641313063))

## Linear Model

In [29]:
# Train on a small subset to check if the model is working
X_train_smaller = X_train.head(1000)
y_train_smaller = y_train.head(1000)

### Linear Model encoding dummy variables

In [30]:
model_linear_smaller = LinearModel(model_type='linear', encoding='dummy')
model_linear_smaller.fit(X_train_smaller, y_train_smaller)

INFO:root:Train datasets shapes: X: (1000, 25), y: (1000,)
INFO:root:Starting to encode variables
INFO:root:Encoded dataset shape: X: (1000, 39)
INFO:root:Finished to encode variables. Starting to fit the model
INFO:root:Finished training the model


<estimator.LinearModel at 0x11daf1370>

In [31]:
X_test_expanded = X_train_smaller.copy()
X_test_expanded['y_test_predicted'] = model_linear_smaller.predict(X_test_expanded)
X_test_expanded['y_test'] = (X_test_expanded['delivery_entering_timestamp'] - X_test_expanded['pickup_timestamp']).dt.total_seconds()
X_test_expanded['diff'] = X_test_expanded['y_test_predicted'] - X_test_expanded['y_test']
X_test_expanded

Unnamed: 0,country_code,order_id,courier_id,creation_timestamp,activation_timestamp,transport,pickup_timestamp,delivery_entering_timestamp,delivery_timestamp,pickup_latitude,...,pd_distance_haversine_m,pd_distance_haversine_m_sk,pd_distance_manhattan_m,creation_date,city_code,time,velocity,y_test_predicted,y_test,diff
0,ES,100901000206,169065712,2024-09-30 19:00:49+00:00,2024-09-30 19:00:52+00:00,CAR,2024-09-30 19:20:41.337000+00:00,2024-09-30 19:30:56+00:00,2024-09-30 19:38:11+00:00,38.37,...,3147.42,3147.42,4371.55,2024-09-30,ALC,614.66,5.12,614663000000.03,614.66,614662999385.37
1,ES,100901487351,173633632,2024-09-30 21:54:58+00:00,2024-09-30 21:54:59+00:00,MOTORBIKE,2024-09-30 21:58:40.678000+00:00,2024-09-30 22:12:29+00:00,2024-09-30 22:16:54+00:00,38.34,...,1959.90,1959.90,2228.78,2024-09-30,ALC,828.32,2.37,828322000000.01,828.32,828321999171.68
2,ES,100901615508,2320936,2024-09-30 23:17:55+00:00,2024-09-30 23:17:55+00:00,CAR,2024-09-30 23:25:52.013000+00:00,2024-09-30 23:34:12+00:00,2024-09-30 23:35:27+00:00,38.43,...,3043.08,3043.08,4304.54,2024-09-30,ALC,499.99,6.09,499987000000.02,499.99,499986999500.04
3,ES,100900153723,174681565,2024-09-30 12:50:30+00:00,2024-09-30 12:50:31+00:00,MOTORBIKE,2024-09-30 12:51:54.134000+00:00,2024-09-30 12:53:27+00:00,2024-09-30 12:56:56+00:00,38.39,...,384.77,384.77,538.64,2024-09-30,ALC,92.87,4.14,92865999999.98,92.87,92865999907.12
4,ES,100900235936,142421923,2024-09-30 13:23:50+00:00,2024-09-30 13:23:51+00:00,CAR,2024-09-30 13:49:36.097000+00:00,2024-09-30 13:57:33+00:00,2024-09-30 14:01:33+00:00,38.41,...,2321.10,2321.10,2443.13,2024-09-30,ALC,476.90,4.87,476903000000.01,476.90,476902999523.11
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,ES,100901315231,146195464,2024-09-30 20:49:51+00:00,2024-09-30 20:49:52+00:00,CAR,2024-09-30 20:59:02.847000+00:00,2024-09-30 21:04:33+00:00,2024-09-30 21:05:24+00:00,38.36,...,1485.18,1485.18,2054.80,2024-09-30,ALC,330.15,4.50,330153000000.00,330.15,330152999669.85
996,ES,100901448857,141248791,2024-09-30 21:38:14+00:00,2024-09-30 21:38:15+00:00,MOTORBIKE,2024-09-30 21:46:44.524000+00:00,2024-09-30 22:00:39+00:00,2024-09-30 22:01:02+00:00,38.36,...,6872.88,6872.88,9730.64,2024-09-30,ALC,834.48,8.24,834476000000.09,834.48,834475999165.62
997,ES,100901466311,4623155,2024-09-30 21:45:32+00:00,2024-09-30 21:45:33+00:00,BICYCLE,2024-09-30 21:59:36.356000+00:00,2024-09-30 22:11:08+00:00,2024-09-30 22:15:15+00:00,38.36,...,2490.76,2490.76,3220.66,2024-09-30,ALC,691.64,3.60,691644000000.01,691.64,691643999308.37
998,ES,100901492373,168086136,2024-09-30 21:57:16+00:00,2024-09-30 21:57:17+00:00,CAR,2024-09-30 22:13:17.522000+00:00,2024-09-30 22:20:54+00:00,2024-09-30 22:24:49+00:00,38.35,...,1772.50,1772.50,2455.20,2024-09-30,ALC,456.48,3.88,456478000000.00,456.48,456477999543.53


In [32]:
model_linear_smaller.predict(X_test.iloc[[0]])

array([-0.07244721])

#### Train on 3 days of data

In [33]:
# We take the last week of the dataset to test the model
days_for_train = 3
days_for_test = 1
end_train_date = pd.to_datetime(start_date) + pd.Timedelta(days=days_for_train)
end_train_date = end_train_date.strftime("%Y-%m-%d")
print(f'Start date: {start_date} | End test date: {end_train_date} | Test date: {end_date}')

Start date: 2024-09-30 | End test date: 2024-10-03 | Test date: 2024-10-20


In [34]:
X_train_3d = pd.read_parquet("data/parquet/dataframe.parquet/", filters=[('creation_date', '<', end_train_date)])
y_train_3d = X_train_3d['delivery_entering_timestamp'] - X_train_3d['pickup_timestamp']
y_train_3d = pd.Series(y_train_3d, name='pickup_to_delivery')
X_test_3d = pd.read_parquet("data/parquet/dataframe.parquet", filters=[('creation_date', '=', end_date)])
y_test_3d = (X_test_3d['delivery_entering_timestamp'] - X_test_3d['pickup_timestamp']).dt.total_seconds()
y_test_3d = pd.Series(y_test_3d, dtype=np.float64, name='pickup_to_delivery')
print("Train datasets 3d shapes: ", X_train_3d.shape, y_train_3d.shape)
print("Test datasets 3d shapes: ", X_test_3d.shape, y_test_3d.shape)

Train datasets 3d shapes:  (130457, 23) (130457,)
Test datasets 3d shapes:  (56149, 23) (56149,)


In [35]:
start = datetime.datetime.now()
print(f"Start time: {start}")

Start time: 2025-04-16 18:45:37.440809


In [36]:
model_linear_3d = LinearModel(model_type='linear', encoding='dummy')
model_linear_3d.fit(X_train_3d, y_train_3d)

INFO:root:Train datasets shapes: X: (130457, 23), y: (130457,)
INFO:root:Starting to encode variables
INFO:root:Encoded dataset shape: X: (130457, 38)
INFO:root:Finished to encode variables. Starting to fit the model
INFO:root:Finished training the model


<estimator.LinearModel at 0x11e01a9c0>

In [37]:
end = datetime.datetime.now()
print(f"End time: {end}")
model_linear_3d_time = end - start
print(f"Time elapsed: {model_linear_3d_time}")

End time: 2025-04-16 18:45:37.958464
Time elapsed: 0:00:00.517655


In [38]:
model_linear_3d_eval = model_linear_3d.evaluate(X_test_3d, y_test_3d)
model_linear_3d_eval

(np.float64(3635090825535.1387), np.float64(1.3307022853175335e+25))

#### Train on a week of data

In [39]:
# We take the last week of the dataset to test the model
days_for_train = 7
days_for_test = 3
end_train_date = pd.to_datetime(start_date) + pd.Timedelta(days=days_for_train)
end_train_date = end_train_date.strftime("%Y-%m-%d")
begin_test_date = pd.to_datetime(end_date) - pd.Timedelta(days=days_for_test-1)
begin_test_date = begin_test_date.strftime("%Y-%m-%d")
print(f'Start date: {start_date} | End test date: {end_train_date} | Begin test date: {begin_test_date} | End date: {end_date}')

Start date: 2024-09-30 | End test date: 2024-10-07 | Begin test date: 2024-10-18 | End date: 2024-10-20


In [40]:
X_train_7d = pd.read_parquet("data/parquet/dataframe.parquet/", filters=[('creation_date', '<', end_train_date)])
y_train_7d = X_train_7d['delivery_entering_timestamp'] - X_train_7d['pickup_timestamp']
y_train_7d = pd.Series(y_train_7d, name='pickup_to_delivery')
X_test_7d = pd.read_parquet("data/parquet/dataframe.parquet", filters=[('creation_date', '>=', begin_test_date)])
y_test_7d = (X_test_7d['delivery_entering_timestamp'] - X_test_7d['pickup_timestamp']).dt.total_seconds()
y_test_7d = pd.Series(y_test_7d, dtype=np.float64, name='pickup_to_delivery')
print("Train datasets 7d shapes: ", X_train_7d.shape, y_train_7d.shape)
print("Test datasets 7d shapes: ", X_test_7d.shape, y_test_7d.shape)

Train datasets 7d shapes:  (345313, 23) (345313,)
Test datasets 7d shapes:  (169275, 23) (169275,)


In [41]:
start = datetime.datetime.now()
print(f"Start time: {start}")

Start time: 2025-04-16 18:45:38.475072


In [42]:
model_linear_7d = LinearModel(model_type='linear', encoding='dummy')
model_linear_7d.fit(X_train_7d, y_train_7d)

INFO:root:Train datasets shapes: X: (345313, 23), y: (345313,)
INFO:root:Starting to encode variables
INFO:root:Encoded dataset shape: X: (345313, 38)
INFO:root:Finished to encode variables. Starting to fit the model
INFO:root:Finished training the model


<estimator.LinearModel at 0x126dbb3e0>

In [43]:
end = datetime.datetime.now()
print(f"End time: {end}")
model_linear_7d_time = end - start
print(f"Time elapsed: {model_linear_7d_time}")

End time: 2025-04-16 18:45:39.527654
Time elapsed: 0:00:01.052582


In [44]:
model_linear_7d_eval = model_linear_7d.evaluate(X_test_7d, y_test_7d)
model_linear_7d_eval

(np.float64(410102272288.90826), np.float64(2.101754208190989e+23))

#### Train on full data

In [45]:
start = datetime.datetime.now()
print(f"Start time: {start}")

Start time: 2025-04-16 18:45:39.814699


In [46]:
model_linear = LinearModel(model_type='linear', encoding='dummy')
model_linear.fit(X_train, y_train)

INFO:root:Train datasets shapes: X: (678751, 25), y: (678751,)
INFO:root:Starting to encode variables
INFO:root:Encoded dataset shape: X: (678751, 40)
INFO:root:Finished to encode variables. Starting to fit the model
INFO:root:Finished training the model


<estimator.LinearModel at 0x126dbae10>

In [47]:
end = datetime.datetime.now()
print(f"End time: {end}")
model_linear_time = end - start
print(f"Time elapsed: {model_linear_time}")

End time: 2025-04-16 18:45:42.234415
Time elapsed: 0:00:02.419716


In [48]:
model_linear_eval = model_linear.evaluate(X_test, y_test)
model_linear_eval

(np.float64(448.9651072961018), np.float64(282205.9319198323))

#### Linear Model encoding dummy variables with time difference columns

In [49]:
start = datetime.datetime.now()
print(f"Start time: {start}")

Start time: 2025-04-16 18:45:43.139217


In [50]:
model_linear_difference = LinearModel(model_type='linear', encoding='dummy+difference')
model_linear_difference.fit(X_train, y_train)

INFO:root:Train datasets shapes: X: (678751, 25), y: (678751,)
INFO:root:Starting to encode variables
INFO:root:Encoded dataset shape: X: (678751, 44)
INFO:root:Finished to encode variables. Starting to fit the model
INFO:root:Finished training the model


<estimator.LinearModel at 0x126db9d30>

In [51]:
end = datetime.datetime.now()
print(f"End time: {end}")
model_linear_difference_time = end - start
print(f"Time elapsed: {model_linear_difference_time}")

End time: 2025-04-16 18:45:45.809818
Time elapsed: 0:00:02.670601


In [52]:
model_linear_difference_eval = model_linear_difference.evaluate(X_test, y_test)
model_linear_difference_eval

(np.float64(448.96458796861975), np.float64(282206.65591367864))

### Linear Model SGD encoding dummy variables

In [53]:
model_linear_SGD_smaller = LinearModel(model_type='SGD', encoding='dummy')
model_linear_SGD_smaller.fit(X_train_smaller, y_train_smaller)

INFO:root:Train datasets shapes: X: (1000, 25), y: (1000,)
INFO:root:Starting to encode variables
INFO:root:Encoded dataset shape: X: (1000, 39)
INFO:root:Finished to encode variables. Starting to fit the model
INFO:root:Finished training the model


<estimator.LinearModel at 0x126dbba40>

In [54]:
X_test_expanded = X_train_smaller.copy()
X_test_expanded['y_test_predicted'] = model_linear_SGD_smaller.predict(X_test_expanded)
X_test_expanded['y_test'] = (X_test_expanded['delivery_entering_timestamp'] - X_test_expanded['pickup_timestamp']).dt.total_seconds()
X_test_expanded['diff'] = X_test_expanded['y_test_predicted'] - X_test_expanded['y_test']
X_test_expanded

Unnamed: 0,country_code,order_id,courier_id,creation_timestamp,activation_timestamp,transport,pickup_timestamp,delivery_entering_timestamp,delivery_timestamp,pickup_latitude,...,pd_distance_haversine_m,pd_distance_haversine_m_sk,pd_distance_manhattan_m,creation_date,city_code,time,velocity,y_test_predicted,y_test,diff
0,ES,100901000206,169065712,2024-09-30 19:00:49+00:00,2024-09-30 19:00:52+00:00,CAR,2024-09-30 19:20:41.337000+00:00,2024-09-30 19:30:56+00:00,2024-09-30 19:38:11+00:00,38.37,...,3147.42,3147.42,4371.55,2024-09-30,ALC,614.66,5.12,614698882284.64,614.66,614698881669.97
1,ES,100901487351,173633632,2024-09-30 21:54:58+00:00,2024-09-30 21:54:59+00:00,MOTORBIKE,2024-09-30 21:58:40.678000+00:00,2024-09-30 22:12:29+00:00,2024-09-30 22:16:54+00:00,38.34,...,1959.90,1959.90,2228.78,2024-09-30,ALC,828.32,2.37,828246159697.12,828.32,828246158868.80
2,ES,100901615508,2320936,2024-09-30 23:17:55+00:00,2024-09-30 23:17:55+00:00,CAR,2024-09-30 23:25:52.013000+00:00,2024-09-30 23:34:12+00:00,2024-09-30 23:35:27+00:00,38.43,...,3043.08,3043.08,4304.54,2024-09-30,ALC,499.99,6.09,500004585856.64,499.99,500004585356.65
3,ES,100900153723,174681565,2024-09-30 12:50:30+00:00,2024-09-30 12:50:31+00:00,MOTORBIKE,2024-09-30 12:51:54.134000+00:00,2024-09-30 12:53:27+00:00,2024-09-30 12:56:56+00:00,38.39,...,384.77,384.77,538.64,2024-09-30,ALC,92.87,4.14,92901290940.28,92.87,92901290847.41
4,ES,100900235936,142421923,2024-09-30 13:23:50+00:00,2024-09-30 13:23:51+00:00,CAR,2024-09-30 13:49:36.097000+00:00,2024-09-30 13:57:33+00:00,2024-09-30 14:01:33+00:00,38.41,...,2321.10,2321.10,2443.13,2024-09-30,ALC,476.90,4.87,476965780344.04,476.90,476965779867.13
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,ES,100901315231,146195464,2024-09-30 20:49:51+00:00,2024-09-30 20:49:52+00:00,CAR,2024-09-30 20:59:02.847000+00:00,2024-09-30 21:04:33+00:00,2024-09-30 21:05:24+00:00,38.36,...,1485.18,1485.18,2054.80,2024-09-30,ALC,330.15,4.50,330139778257.92,330.15,330139777927.77
996,ES,100901448857,141248791,2024-09-30 21:38:14+00:00,2024-09-30 21:38:15+00:00,MOTORBIKE,2024-09-30 21:46:44.524000+00:00,2024-09-30 22:00:39+00:00,2024-09-30 22:01:02+00:00,38.36,...,6872.88,6872.88,9730.64,2024-09-30,ALC,834.48,8.24,834477335768.72,834.48,834477334934.25
997,ES,100901466311,4623155,2024-09-30 21:45:32+00:00,2024-09-30 21:45:33+00:00,BICYCLE,2024-09-30 21:59:36.356000+00:00,2024-09-30 22:11:08+00:00,2024-09-30 22:15:15+00:00,38.36,...,2490.76,2490.76,3220.66,2024-09-30,ALC,691.64,3.60,691591388213.91,691.64,691591387522.27
998,ES,100901492373,168086136,2024-09-30 21:57:16+00:00,2024-09-30 21:57:17+00:00,CAR,2024-09-30 22:13:17.522000+00:00,2024-09-30 22:20:54+00:00,2024-09-30 22:24:49+00:00,38.35,...,1772.50,1772.50,2455.20,2024-09-30,ALC,456.48,3.88,456429784348.47,456.48,456429783891.99


In [55]:
model_linear_SGD_smaller.predict(X_test.iloc[[0]])

array([-2.05865272e+10])

#### Train on 3 days of data

In [56]:
start = datetime.datetime.now()
print(f"Start time: {start}")

Start time: 2025-04-16 18:45:46.792226


In [57]:
model_linear_SGD_3d = LinearModel(model_type='sgd', encoding='dummy')
model_linear_SGD_3d.fit(X_train_3d, y_train_3d)

INFO:root:Train datasets shapes: X: (130457, 23), y: (130457,)
INFO:root:Starting to encode variables
INFO:root:Encoded dataset shape: X: (130457, 38)
INFO:root:Finished to encode variables. Starting to fit the model
INFO:root:Finished training the model


<estimator.LinearModel at 0x11e0193d0>

In [58]:
end = datetime.datetime.now()
print(f"End time: {end}")
model_linear_SGD_3d_time = end - start
print(f"Time elapsed: {model_linear_SGD_3d_time}")

End time: 2025-04-16 18:45:49.376108
Time elapsed: 0:00:02.583882


In [59]:
model_linear_SGD_3d_eval = model_linear_SGD_3d.evaluate(X_test_3d, y_test_3d)
model_linear_SGD_3d_eval

(np.float64(3397718904354.339), np.float64(1.1631829390496383e+25))

#### Train on a week of data

In [60]:
start = datetime.datetime.now()
print(f"Start time: {start}")

Start time: 2025-04-16 18:45:49.694764


In [61]:
model_linear_SGD_7d = LinearModel(model_type='SGD', encoding='dummy')
model_linear_SGD_7d.fit(X_train_7d, y_train_7d)

INFO:root:Train datasets shapes: X: (345313, 23), y: (345313,)
INFO:root:Starting to encode variables
INFO:root:Encoded dataset shape: X: (345313, 38)
INFO:root:Finished to encode variables. Starting to fit the model
INFO:root:Finished training the model


<estimator.LinearModel at 0x126dba900>

In [62]:
end = datetime.datetime.now()
print(f"End time: {end}")
model_linear_SGD_7d_time = end - start
print(f"Time elapsed: {model_linear_SGD_7d_time}")

End time: 2025-04-16 18:45:56.228983
Time elapsed: 0:00:06.534219


In [63]:
model_linear_SGD_7d_eval = model_linear_SGD_7d.evaluate(X_test_7d, y_test_7d)
model_linear_SGD_7d_eval

(np.float64(147974625486.1344), np.float64(4.577837556596903e+22))

#### Train on full data

In [64]:
start = datetime.datetime.now()
print(f"Start time: {start}")

Start time: 2025-04-16 18:45:56.696478


In [65]:
model_linear_SGD = LinearModel(model_type='sgd', encoding='dummy')
model_linear_SGD.fit(X_train, y_train)

INFO:root:Train datasets shapes: X: (678751, 25), y: (678751,)
INFO:root:Starting to encode variables
INFO:root:Encoded dataset shape: X: (678751, 40)
INFO:root:Finished to encode variables. Starting to fit the model
INFO:root:Finished training the model


<estimator.LinearModel at 0x126dba420>

In [66]:
end = datetime.datetime.now()
print(f"End time: {end}")
model_linear_SGD_time = end - start
print(f"Time elapsed: {model_linear_SGD_time}")

End time: 2025-04-16 18:46:01.343669
Time elapsed: 0:00:04.647191


In [67]:
model_linear_SGD_eval = model_linear_SGD.evaluate(X_test, y_test)
model_linear_SGD_eval

(np.float64(2781503689.460166), np.float64(8.538907542794446e+18))

### LinearModel cyclical encoding

In [68]:
model_linear_cyclical_smaller = LinearModel(model_type='linear', encoding='cyclical')
model_linear_cyclical_smaller.fit(X_train_smaller, y_train_smaller)

INFO:root:Train datasets shapes: X: (1000, 25), y: (1000,)
INFO:root:Starting to encode variables
INFO:root:Encoded dataset shape: X: (1000, 58)
INFO:root:Finished to encode variables. Starting to fit the model
INFO:root:Finished training the model


<estimator.LinearModel at 0x11d50f260>

In [69]:
X_test_expanded = X_train_smaller.copy()
X_test_expanded['y_test_predicted'] = model_linear_cyclical_smaller.predict(X_test_expanded)
X_test_expanded['y_test'] = (X_test_expanded['delivery_entering_timestamp'] - X_test_expanded['pickup_timestamp']).dt.total_seconds()
X_test_expanded['diff'] = X_test_expanded['y_test_predicted'] - X_test_expanded['y_test']
X_test_expanded

Unnamed: 0,country_code,order_id,courier_id,creation_timestamp,activation_timestamp,transport,pickup_timestamp,delivery_entering_timestamp,delivery_timestamp,pickup_latitude,...,pd_distance_haversine_m,pd_distance_haversine_m_sk,pd_distance_manhattan_m,creation_date,city_code,time,velocity,y_test_predicted,y_test,diff
0,ES,100901000206,169065712,2024-09-30 19:00:49+00:00,2024-09-30 19:00:52+00:00,CAR,2024-09-30 19:20:41.337000+00:00,2024-09-30 19:30:56+00:00,2024-09-30 19:38:11+00:00,38.37,...,3147.42,3147.42,4371.55,2024-09-30,ALC,614.66,5.12,614663000000.03,614.66,614662999385.37
1,ES,100901487351,173633632,2024-09-30 21:54:58+00:00,2024-09-30 21:54:59+00:00,MOTORBIKE,2024-09-30 21:58:40.678000+00:00,2024-09-30 22:12:29+00:00,2024-09-30 22:16:54+00:00,38.34,...,1959.90,1959.90,2228.78,2024-09-30,ALC,828.32,2.37,828322000000.01,828.32,828321999171.68
2,ES,100901615508,2320936,2024-09-30 23:17:55+00:00,2024-09-30 23:17:55+00:00,CAR,2024-09-30 23:25:52.013000+00:00,2024-09-30 23:34:12+00:00,2024-09-30 23:35:27+00:00,38.43,...,3043.08,3043.08,4304.54,2024-09-30,ALC,499.99,6.09,499987000000.03,499.99,499986999500.04
3,ES,100900153723,174681565,2024-09-30 12:50:30+00:00,2024-09-30 12:50:31+00:00,MOTORBIKE,2024-09-30 12:51:54.134000+00:00,2024-09-30 12:53:27+00:00,2024-09-30 12:56:56+00:00,38.39,...,384.77,384.77,538.64,2024-09-30,ALC,92.87,4.14,92865999999.98,92.87,92865999907.12
4,ES,100900235936,142421923,2024-09-30 13:23:50+00:00,2024-09-30 13:23:51+00:00,CAR,2024-09-30 13:49:36.097000+00:00,2024-09-30 13:57:33+00:00,2024-09-30 14:01:33+00:00,38.41,...,2321.10,2321.10,2443.13,2024-09-30,ALC,476.90,4.87,476903000000.01,476.90,476902999523.11
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,ES,100901315231,146195464,2024-09-30 20:49:51+00:00,2024-09-30 20:49:52+00:00,CAR,2024-09-30 20:59:02.847000+00:00,2024-09-30 21:04:33+00:00,2024-09-30 21:05:24+00:00,38.36,...,1485.18,1485.18,2054.80,2024-09-30,ALC,330.15,4.50,330153000000.00,330.15,330152999669.85
996,ES,100901448857,141248791,2024-09-30 21:38:14+00:00,2024-09-30 21:38:15+00:00,MOTORBIKE,2024-09-30 21:46:44.524000+00:00,2024-09-30 22:00:39+00:00,2024-09-30 22:01:02+00:00,38.36,...,6872.88,6872.88,9730.64,2024-09-30,ALC,834.48,8.24,834476000000.09,834.48,834475999165.61
997,ES,100901466311,4623155,2024-09-30 21:45:32+00:00,2024-09-30 21:45:33+00:00,BICYCLE,2024-09-30 21:59:36.356000+00:00,2024-09-30 22:11:08+00:00,2024-09-30 22:15:15+00:00,38.36,...,2490.76,2490.76,3220.66,2024-09-30,ALC,691.64,3.60,691644000000.02,691.64,691643999308.37
998,ES,100901492373,168086136,2024-09-30 21:57:16+00:00,2024-09-30 21:57:17+00:00,CAR,2024-09-30 22:13:17.522000+00:00,2024-09-30 22:20:54+00:00,2024-09-30 22:24:49+00:00,38.35,...,1772.50,1772.50,2455.20,2024-09-30,ALC,456.48,3.88,456478000000.00,456.48,456477999543.53


In [70]:
model_linear_cyclical_smaller.predict(X_test.iloc[[0]])

array([-0.08057401])

#### Train on full data

In [71]:
start = datetime.datetime.now()
print(f"Start time: {start}")

Start time: 2025-04-16 18:46:02.626618


In [72]:
model_linear_cyclical = LinearModel(model_type='linear', encoding='cyclical')
model_linear_cyclical.fit(X_train, y_train)

INFO:root:Train datasets shapes: X: (678751, 25), y: (678751,)
INFO:root:Starting to encode variables
INFO:root:Encoded dataset shape: X: (678751, 59)
INFO:root:Finished to encode variables. Starting to fit the model
INFO:root:Finished training the model


<estimator.LinearModel at 0x126d5cdd0>

In [73]:
end = datetime.datetime.now()
print(f"End time: {end}")
model_linear_cyclical_time = end - start
print(f"Time elapsed: {model_linear_cyclical_time}")

End time: 2025-04-16 18:46:10.656137
Time elapsed: 0:00:08.029519


In [74]:
model_linear_cyclical_eval = model_linear_cyclical.evaluate(X_test, y_test)
model_linear_cyclical_eval

(np.float64(448.9647149693681), np.float64(282202.8674738092))

#### LinearModel cyclical encoding with time difference columns

In [75]:
start = datetime.datetime.now()
print(f"Start time: {start}")

Start time: 2025-04-16 18:46:13.266871


In [76]:
model_linear_cyclical_difference = LinearModel(model_type='linear', encoding='cyclical+difference')
model_linear_cyclical_difference.fit(X_train, y_train)

INFO:root:Train datasets shapes: X: (678751, 25), y: (678751,)
INFO:root:Starting to encode variables
INFO:root:Encoded dataset shape: X: (678751, 63)
INFO:root:Finished to encode variables. Starting to fit the model
INFO:root:Finished training the model


<estimator.LinearModel at 0x11db83f50>

In [77]:
end = datetime.datetime.now()
print(f"End time: {end}")
model_linear_cyclical_difference_time = end - start
print(f"Time elapsed: {model_linear_cyclical_difference_time}")

End time: 2025-04-16 18:46:20.874684
Time elapsed: 0:00:07.607813


In [79]:
model_linear_cyclical_difference_eval = model_linear_cyclical_difference.evaluate(X_test, y_test)
model_linear_cyclical_difference_eval

(np.float64(448.9656771115914), np.float64(282207.14166018413))

### LinearModel

Compare performance of the model with and without standardization

## Models comparison

In [80]:
models = ['model_bl_sum' , 'model_bl_mean', 'model_linear_3d', 'model_linear_7d', 'model_linear', 'model_linear_difference', 'model_linear_SGD_3d', 'model_linear_SGD_7d', 'model_linear_SGD', 'model_linear_cyclical', 'model_linear_cyclical_difference']

data = []
for model in models:
    time_value = eval(model + '_time')
    print(f"Time to fit {model}: {time_value}")
    eval_value = eval(model + '_eval')
    print(f"Evaluation of {model}: {eval_value}")
    data.append([model, time_value / datetime.timedelta(milliseconds=1), eval_value[0], eval_value[1]])

models_eval = pd.DataFrame(data, columns=['Model', 'Training time [ms]', 'MAE', 'MSE'])
models_eval

Time to fit model_bl_sum: 0:00:00.360436
Evaluation of model_bl_sum: (np.float64(145.6280391037287), np.float64(52902.38788391615))
Time to fit model_bl_mean: 0:00:00.297521
Evaluation of model_bl_mean: (np.float64(161.23709324791398), np.float64(64837.86641313063))
Time to fit model_linear_3d: 0:00:00.517655
Evaluation of model_linear_3d: (np.float64(3635090825535.1387), np.float64(1.3307022853175335e+25))
Time to fit model_linear_7d: 0:00:01.052582
Evaluation of model_linear_7d: (np.float64(410102272288.90826), np.float64(2.101754208190989e+23))
Time to fit model_linear: 0:00:02.419716
Evaluation of model_linear: (np.float64(448.9651072961018), np.float64(282205.9319198323))
Time to fit model_linear_difference: 0:00:02.670601
Evaluation of model_linear_difference: (np.float64(448.96458796861975), np.float64(282206.65591367864))
Time to fit model_linear_SGD_3d: 0:00:02.583882
Evaluation of model_linear_SGD_3d: (np.float64(3397718904354.339), np.float64(1.1631829390496383e+25))
Time to

Unnamed: 0,Model,Training time [ms],MAE,MSE
0,model_bl_sum,360.44,145.63,52902.39
1,model_bl_mean,297.52,161.24,64837.87
2,model_linear_3d,517.65,3635090825535.14,1.3307022853175335e+25
3,model_linear_7d,1052.58,410102272288.91,2.1017542081909888e+23
4,model_linear,2419.72,448.97,282205.93
5,model_linear_difference,2670.6,448.96,282206.66
6,model_linear_SGD_3d,2583.88,3397718904354.34,1.1631829390496383e+25
7,model_linear_SGD_7d,6534.22,147974625486.13,4.577837556596903e+22
8,model_linear_SGD,4647.19,2781503689.46,8.538907542794445e+18
9,model_linear_cyclical,8029.52,448.96,282202.87
