In [20]:
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F

from torch.utils.data import Dataset, random_split, DataLoader

In [21]:
delivery_df = pd.read_csv("/content/delivery_raw.csv", sep = '\t')

delivery_df

Unnamed: 0,market_id,created_at,actual_delivery_time,store_id,store_primary_category,order_protocol,total_items,subtotal,num_distinct_items,min_item_price,max_item_price,total_onshift,total_busy,total_outstanding_orders,estimated_order_place_duration,estimated_store_to_consumer_driving_duration
0,1.0,2015-02-06 22:24:17,2015-02-06 23:27:16,1845,american,1.0,4,3441,4,557,1239,33.0,14.0,21.0,446,861.0
1,2.0,2015-02-10 21:49:25,2015-02-10 22:56:29,5477,mexican,2.0,1,1900,1,1400,1400,1.0,2.0,2.0,446,690.0
2,3.0,2015-01-22 20:39:28,2015-01-22 21:09:09,5477,,1.0,1,1900,1,1900,1900,1.0,0.0,0.0,446,690.0
3,3.0,2015-02-03 21:21:45,2015-02-03 22:13:00,5477,,1.0,6,6900,5,600,1800,1.0,1.0,2.0,446,289.0
4,3.0,2015-02-15 02:40:36,2015-02-15 03:20:26,5477,,1.0,3,3900,3,1100,1600,6.0,6.0,9.0,446,650.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
197423,1.0,2015-02-17 00:19:41,2015-02-17 01:24:48,2956,fast,4.0,3,1389,3,345,649,17.0,17.0,23.0,251,331.0
197424,1.0,2015-02-13 00:01:59,2015-02-13 00:58:22,2956,fast,4.0,6,3010,4,405,825,12.0,11.0,14.0,251,915.0
197425,1.0,2015-01-24 04:46:08,2015-01-24 05:36:16,2956,fast,4.0,5,1836,3,300,399,39.0,41.0,40.0,251,795.0
197426,1.0,2015-02-01 18:18:15,2015-02-01 19:23:22,3630,sandwich,1.0,1,1175,1,535,535,7.0,7.0,12.0,446,384.0


In [22]:
delivery_df.dtypes

market_id                                       float64
created_at                                       object
actual_delivery_time                             object
store_id                                          int64
store_primary_category                           object
order_protocol                                  float64
total_items                                       int64
subtotal                                          int64
num_distinct_items                                int64
min_item_price                                    int64
max_item_price                                    int64
total_onshift                                   float64
total_busy                                      float64
total_outstanding_orders                        float64
estimated_order_place_duration                    int64
estimated_store_to_consumer_driving_duration    float64
dtype: object

In [23]:
delivery_df.dropna(inplace=True)
delivery_df = delivery_df.reset_index(drop=True)

In [24]:
delivery_df['created_at'] = pd.to_datetime(delivery_df['created_at'])
delivery_df['actual_delivery_time'] = pd.to_datetime(delivery_df['actual_delivery_time'])
delivery_df['delivery_duration'] = delivery_df['actual_delivery_time'] - delivery_df['created_at']
delivery_df['delivery_duration'] = delivery_df['delivery_duration'].dt.total_seconds()

delivery_df[['created_at', 'delivery_duration','estimated_store_to_consumer_driving_duration']]

Unnamed: 0,created_at,delivery_duration,estimated_store_to_consumer_driving_duration
0,2015-02-06 22:24:17,3779.0,861.0
1,2015-02-10 21:49:25,4024.0,690.0
2,2015-02-16 00:11:35,1586.0,289.0
3,2015-02-12 03:36:46,2273.0,795.0
4,2015-01-27 02:12:36,2988.0,205.0
...,...,...,...
175772,2015-02-17 00:19:41,3907.0,331.0
175773,2015-02-13 00:01:59,3383.0,915.0
175774,2015-01-24 04:46:08,3008.0,795.0
175775,2015-02-01 18:18:15,3907.0,384.0


In [25]:
delivery_df.describe()

Unnamed: 0,market_id,store_id,order_protocol,total_items,subtotal,num_distinct_items,min_item_price,max_item_price,total_onshift,total_busy,total_outstanding_orders,estimated_order_place_duration,estimated_store_to_consumer_driving_duration,delivery_duration
count,175777.0,175777.0,175777.0,175777.0,175777.0,175777.0,175777.0,175777.0,175777.0,175777.0,175777.0,175777.0,175777.0,175777.0
mean,2.743726,3556.299561,2.911752,3.204976,2697.111147,2.67506,684.965433,1160.158616,44.918664,41.861381,58.230115,307.818105,546.07724,2866.422672
std,1.330963,2055.959006,1.513128,2.674055,1828.554893,1.625681,519.882924,560.828571,34.544724,32.168505,52.731043,89.796587,218.717798,1660.741001
min,1.0,1.0,1.0,1.0,0.0,1.0,-86.0,0.0,-4.0,-5.0,-6.0,0.0,0.0,101.0
25%,2.0,1703.0,1.0,2.0,1412.0,1.0,299.0,799.0,17.0,15.0,17.0,251.0,384.0,2105.0
50%,2.0,3620.0,3.0,3.0,2224.0,2.0,595.0,1095.0,37.0,35.0,41.0,251.0,544.0,2662.0
75%,4.0,5341.0,4.0,4.0,3410.0,3.0,942.0,1395.0,66.0,63.0,85.0,446.0,703.0,3383.0
max,6.0,6987.0,7.0,411.0,26800.0,20.0,14700.0,14700.0,171.0,154.0,285.0,2715.0,2088.0,373879.0


In [26]:
delivery_df.corr()

  delivery_df.corr()


Unnamed: 0,market_id,store_id,order_protocol,total_items,subtotal,num_distinct_items,min_item_price,max_item_price,total_onshift,total_busy,total_outstanding_orders,estimated_order_place_duration,estimated_store_to_consumer_driving_duration,delivery_duration
market_id,1.0,0.044918,-0.01334,0.003567,-0.000724,0.015506,-0.010939,-0.00726,0.074289,0.065351,0.068223,-0.052356,0.019141,-0.037958
store_id,0.044918,1.0,0.013309,-0.009154,-0.012625,-0.013197,-0.007054,-0.016991,-0.023585,-0.024182,-0.021119,0.02238,-0.001163,-0.005618
order_protocol,-0.01334,0.013309,1.0,0.007305,-0.051889,-0.023943,-0.043845,-0.090518,0.147408,0.152001,0.136881,-0.682451,-0.009994,-0.047027
total_items,0.003567,-0.009154,0.007305,1.0,0.554951,0.758339,-0.389471,-0.053749,0.032087,0.029084,0.034818,-0.02586,0.006589,0.077331
subtotal,-0.000724,-0.012625,-0.051889,0.554951,1.0,0.680842,0.038778,0.509787,0.131239,0.12615,0.130481,0.03357,0.038156,0.143549
num_distinct_items,0.015506,-0.013197,-0.023943,0.758339,0.680842,1.0,-0.446503,0.047113,0.065793,0.060508,0.06773,0.000734,0.024535,0.105881
min_item_price,-0.010939,-0.007054,-0.043845,-0.389471,0.038778,-0.446503,1.0,0.541522,0.042655,0.044311,0.041478,0.054131,0.004464,0.009013
max_item_price,-0.00726,-0.016991,-0.090518,-0.053749,0.509787,0.047113,0.541522,1.0,0.133786,0.131835,0.131364,0.085226,0.029366,0.089278
total_onshift,0.074289,-0.023585,0.147408,0.032087,0.131239,0.065793,0.042655,0.133786,1.0,0.943725,0.936121,-0.188318,0.045269,0.047321
total_busy,0.065351,-0.024182,0.152001,0.029084,0.12615,0.060508,0.044311,0.131835,0.943725,1.0,0.932826,-0.194395,0.043948,0.060864


In [27]:
from sklearn.preprocessing import OneHotEncoder

print(delivery_df['store_primary_category'])
encoder = OneHotEncoder(handle_unknown='ignore', sparse_output=False)
encoded_df = pd.DataFrame(encoder.fit_transform(delivery_df[['store_primary_category']]))

print(encoded_df.shape)
print(delivery_df.shape)
delivery_df = pd.concat([delivery_df, encoded_df], axis = 1)

0         american
1          mexican
2           indian
3          italian
4          italian
            ...   
175772        fast
175773        fast
175774        fast
175775    sandwich
175776    sandwich
Name: store_primary_category, Length: 175777, dtype: object
(175777, 73)
(175777, 17)


In [28]:
delivery_df.isna().sum()

market_id                 0
created_at                0
actual_delivery_time      0
store_id                  0
store_primary_category    0
                         ..
68                        0
69                        0
70                        0
71                        0
72                        0
Length: 90, dtype: int64

In [29]:
removed_list = ['delivery_duration',
                'created_at',
                'actual_delivery_time',
                'store_primary_category',
                'estimated_store_to_consumer_driving_duration',
                'estimated_order_place_duration',
]
class delivery_dataset(Dataset) :
    def __init__(self, df) :
        super().__init__()
        self.data = df[df.columns.difference(removed_list)].to_numpy()
        self.label = df['delivery_duration'].to_numpy()[np.newaxis]
        self.label = self.label.T
        
    def __getitem__(self, idx) :
        return self.data[idx], self.label[idx]
    
    def __len__(self) :
        return len(self.label)      


In [30]:
tot_dset = delivery_dataset(delivery_df)
data_len = len(tot_dset[0][0])
print(data_len)
print(len(tot_dset))
print(tot_dset[5])

84
175777
(array([0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00,
       0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00,
       0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00,
       0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00,
       0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00,
       0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00,
       0.000e+00, 0.000e+00, 1.000e+00, 0.000e+00, 0.000e+00, 0.000e+00,
       0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00,
       0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00,
       0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00,
       0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00,
       0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00,
       0.000e+00, 1.000e+00, 1.825e+03, 9.250e+02, 3.000e+00, 1.000e+00,
       2.841e+03, 4.475e+03, 1.000e+00, 

In [31]:
train_dset, test_dset = random_split(tot_dset, [0.9, 0.1])
train_dset, val_dset = random_split(train_dset, [0.9, 0.1])

In [32]:
train_dataloader = DataLoader(train_dset, batch_size = 64)
val_dataloader = DataLoader(val_dset, batch_size = 64)
test_dataloader = DataLoader(test_dset, batch_size = 64)

In [33]:
class mlp_model(nn.Module) :
    def __init__(self, input_dim, emb_dim, output_dim, hidden_depth = 1):
        super().__init__()
        self.input_layer = nn.Linear(input_dim, emb_dim)
        self.hidden = nn.ModuleList(
            [ nn.Linear(emb_dim, emb_dim) for _ in range(hidden_depth)]
        )
        self.output_layer = nn.Linear(emb_dim, output_dim)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.input_layer(x))
        for layer in self.hidden :
            x = self.relu(layer(x))
        x = self.output_layer(x)

        return x


In [34]:
class MSELoss(nn.Module) :
    def __init__(self) :
        super().__init__()
        self.mse = nn.MSELoss(reduction = 'sum')
    
    def forward(self, yhat, y) :
        return self.mse(yhat, y)
    
class Weighted_RMSELoss(nn.Module) :
    def __init__(self, weight = 2.) :
        super().__init__()
        self.mse = nn.MSELoss(reduction='none')
        self.weight = weight

    def forward(self, yhat, y) :
        result = self.mse(yhat, y)
        masked_weight = torch.where(yhat < y, self.weight, 1)
        result *= masked_weight
        return torch.sqrt(result.sum() / masked_weight.sum())


In [35]:
from torch import optim

device = torch.device('cuda')
model = mlp_model(input_dim = data_len, emb_dim = 1024, output_dim = 1, hidden_depth = 2)
model.to(device)
optimizer = optim.Adam(model.parameters(), lr = 2e-3)
train_criterion = Weighted_RMSELoss(weight = 4)
test_criterion = MSELoss()

In [36]:
print(model.input_layer.weight.dtype)

torch.float32


In [37]:
from tqdm import tqdm

epoch = 20

for i in range(epoch) :
    tot_train_size = 0
    tot_val_size = 0
    
    train_loss = 0
    val_loss = 0

    train_under_prediction = 0
    val_under_prediction = 0

    for data, y in tqdm(train_dataloader) :
        optimizer.zero_grad()

        data = data.type(torch.FloatTensor).to(device)
        y = y.type(torch.FloatTensor).to(device)
        yhat = model(data)
        loss = train_criterion(yhat, y)

        loss.backward()
        optimizer.step()

        tot_train_size += len(y)
        train_loss += loss.item()
        train_under_prediction += torch.sum( yhat < y.data ).item()

    train_loss /= len(train_dataloader)
    train_under_prediction /= tot_train_size

    with torch.no_grad() :
        for data, y in tqdm(val_dataloader) :
            
            data = data.type(torch.FloatTensor).to(device)
            y = y.type(torch.FloatTensor).to(device)
            yhat = model(data)
            loss = test_criterion(yhat, y)

            tot_val_size += len(y)
            val_loss += loss.item()
            val_under_prediction += torch.sum( yhat < y.data ).item()
    
    val_loss /= tot_val_size
    val_loss = np.sqrt(val_loss)
    val_under_prediction /= tot_val_size

    print('[Epoch {:03d}] : train_under_prediction[{:0.04f}] train_loss [{:0.04f}] val_under_prediction[{:0.04f}] val_loss[{:0.04f}]'.format(
        i+1,
        train_under_prediction,
        train_loss,
        val_under_prediction,
        val_loss
    ))

100%|██████████| 2225/2225 [00:07<00:00, 285.06it/s]
100%|██████████| 248/248 [00:00<00:00, 744.40it/s]


[Epoch 001] : train_under_prediction[0.3267] train_loss [1499.6121] val_under_prediction[0.3956] val_loss[1289.7539]


100%|██████████| 2225/2225 [00:07<00:00, 303.62it/s]
100%|██████████| 248/248 [00:00<00:00, 722.91it/s]


[Epoch 002] : train_under_prediction[0.2906] train_loss [1390.9336] val_under_prediction[0.3456] val_loss[1288.5754]


100%|██████████| 2225/2225 [00:07<00:00, 286.85it/s]
100%|██████████| 248/248 [00:00<00:00, 753.70it/s]


[Epoch 003] : train_under_prediction[0.2845] train_loss [1361.1873] val_under_prediction[0.3344] val_loss[1278.7717]


100%|██████████| 2225/2225 [00:07<00:00, 287.46it/s]
100%|██████████| 248/248 [00:00<00:00, 739.44it/s]


[Epoch 004] : train_under_prediction[0.2811] train_loss [1346.3284] val_under_prediction[0.3578] val_loss[1248.5419]


100%|██████████| 2225/2225 [00:07<00:00, 308.14it/s]
100%|██████████| 248/248 [00:00<00:00, 732.63it/s]


[Epoch 005] : train_under_prediction[0.2769] train_loss [1333.0309] val_under_prediction[0.3287] val_loss[1258.7947]


100%|██████████| 2225/2225 [00:07<00:00, 287.44it/s]
100%|██████████| 248/248 [00:00<00:00, 718.51it/s]


[Epoch 006] : train_under_prediction[0.2744] train_loss [1327.0171] val_under_prediction[0.3525] val_loss[1240.6342]


100%|██████████| 2225/2225 [00:07<00:00, 294.25it/s]
100%|██████████| 248/248 [00:00<00:00, 535.36it/s]


[Epoch 007] : train_under_prediction[0.2718] train_loss [1322.1163] val_under_prediction[0.3294] val_loss[1246.3927]


100%|██████████| 2225/2225 [00:07<00:00, 305.50it/s]
100%|██████████| 248/248 [00:00<00:00, 736.03it/s]


[Epoch 008] : train_under_prediction[0.2696] train_loss [1314.4516] val_under_prediction[0.3624] val_loss[1224.4169]


100%|██████████| 2225/2225 [00:07<00:00, 288.09it/s]
100%|██████████| 248/248 [00:00<00:00, 707.31it/s]


[Epoch 009] : train_under_prediction[0.2682] train_loss [1309.0691] val_under_prediction[0.3350] val_loss[1233.8225]


100%|██████████| 2225/2225 [00:07<00:00, 298.36it/s]
100%|██████████| 248/248 [00:00<00:00, 513.10it/s]


[Epoch 010] : train_under_prediction[0.2666] train_loss [1305.9683] val_under_prediction[0.3071] val_loss[1243.5145]


100%|██████████| 2225/2225 [00:07<00:00, 296.94it/s]
100%|██████████| 248/248 [00:00<00:00, 680.93it/s]


[Epoch 011] : train_under_prediction[0.2657] train_loss [1299.6860] val_under_prediction[0.2550] val_loss[1298.2226]


100%|██████████| 2225/2225 [00:07<00:00, 283.47it/s]
100%|██████████| 248/248 [00:00<00:00, 738.50it/s]


[Epoch 012] : train_under_prediction[0.2644] train_loss [1297.3581] val_under_prediction[0.2658] val_loss[1276.7962]


100%|██████████| 2225/2225 [00:07<00:00, 303.70it/s]
100%|██████████| 248/248 [00:00<00:00, 563.77it/s]


[Epoch 013] : train_under_prediction[0.2629] train_loss [1296.1533] val_under_prediction[0.2826] val_loss[1250.3494]


100%|██████████| 2225/2225 [00:07<00:00, 289.04it/s]
100%|██████████| 248/248 [00:00<00:00, 711.15it/s]


[Epoch 014] : train_under_prediction[0.2617] train_loss [1290.0157] val_under_prediction[0.2875] val_loss[1238.4075]


100%|██████████| 2225/2225 [00:07<00:00, 287.46it/s]
100%|██████████| 248/248 [00:00<00:00, 720.10it/s]


[Epoch 015] : train_under_prediction[0.2609] train_loss [1285.7865] val_under_prediction[0.2673] val_loss[1256.8473]


100%|██████████| 2225/2225 [00:07<00:00, 306.64it/s]
100%|██████████| 248/248 [00:00<00:00, 732.52it/s]


[Epoch 016] : train_under_prediction[0.2601] train_loss [1281.1231] val_under_prediction[0.2657] val_loss[1256.4310]


100%|██████████| 2225/2225 [00:07<00:00, 289.41it/s]
100%|██████████| 248/248 [00:00<00:00, 718.04it/s]


[Epoch 017] : train_under_prediction[0.2583] train_loss [1277.9261] val_under_prediction[0.2337] val_loss[1295.4618]


100%|██████████| 2225/2225 [00:07<00:00, 288.19it/s]
100%|██████████| 248/248 [00:00<00:00, 716.37it/s]


[Epoch 018] : train_under_prediction[0.2565] train_loss [1273.3633] val_under_prediction[0.2578] val_loss[1256.4389]


100%|██████████| 2225/2225 [00:07<00:00, 306.10it/s]
100%|██████████| 248/248 [00:00<00:00, 747.38it/s]


[Epoch 019] : train_under_prediction[0.2553] train_loss [1269.8647] val_under_prediction[0.2130] val_loss[1327.7854]


100%|██████████| 2225/2225 [00:07<00:00, 287.12it/s]
100%|██████████| 248/248 [00:00<00:00, 730.51it/s]

[Epoch 020] : train_under_prediction[0.2542] train_loss [1265.6761] val_under_prediction[0.1853] val_loss[1378.8094]





In [38]:
test_loss = 0
test_under_prediction = 0
test_size = 0

with torch.no_grad() :
    for data, y in tqdm(test_dataloader) :
        data = data.type(torch.FloatTensor).to(device)
        y = y.type(torch.FloatTensor).to(device)
        yhat = model(data)
        
        loss = test_criterion(yhat, y)

        test_size += len(y)
        test_loss += loss.item()
        test_under_prediction += torch.sum( yhat < y.data ).item()


test_loss /= test_size
test_loss = np.sqrt(test_loss)
test_under_prediction /= test_size
print('Test RMSE Loss [{:0.04f}] Test_under_prediction [{:0.04f}]'.format(
    test_loss,
    test_under_prediction
))


100%|██████████| 275/275 [00:00<00:00, 746.81it/s]

Test RMSE Loss [1250.8350] Test_under_prediction [0.1824]



