# **Food Delivery App Recommendation System: Model Evaluation**

In [3]:
# Import libraries
import pandas as pd
import numpy as np
import re

In [None]:
# Load full_data DataFrame
full_data = pd.read_csv('~/code/Alanoudis/food-delivery-rec/data/updated_data/full_data.txt')
full_data.head()

Unnamed: 0,customer_id,customer_geohash,order_id,vendor_id,day_of_week,order_time,order_day,chain_id,vendor_geohash,cuisine_origin,avg_vendor_rating,num_products,total_order_value,products_ordered
0,008ab40ac0,w21z7,9390,8ace9ccb,2,01:52:03,3 days,aece2f12,w21z7,chinese,3.5,1,0.4,Spicy中辣
1,008ce71183,w21zb,7057,f0d84faa,5,18:11:48,34 days,fc3b6153,w21zc,chinese,3.5,2,12.8,"Kway Teow Goreng with Petai & Prawns 虾仁臭豆炒河粉, ..."
2,008ce71183,w21zb,7058,a23e4559,2,19:22:26,31 days,788f82f6,w21zb,american,3.8,4,14.8,"Tuna D'Licious, Spicy Chicken Pizza Baguette, ..."
3,008ce71183,w21zb,7059,a23e4559,3,18:54:16,18 days,788f82f6,w21zb,american,3.6,4,11.6,"Tuna D'Licious, Chocolate Eclair, Tuna D'Licio..."
4,00ba08bab4,w21zt,347,78ce75cb,3,20:39:37,39 days,24975bf7,w21zt,american,4.5,1,3.6,McGriddles Feast


# **1. Feature Engineering: day_of_week, order_time, and order_day**

## 1.1) 1️⃣🕐 **day_of_week**

Type: numeric (0–6) or categorical
Meaning: weekday the order was placed on

✅ Recommended feature engineering:

Add categorical and binary features that capture weekly patterns.

In [7]:
# Ensure day_of_week is integer in range [0–6]
full_data['day_of_week'] = pd.to_numeric(full_data['day_of_week'], errors='coerce').astype('Int64')
full_data = full_data[full_data['day_of_week'].between(0, 6, inclusive='both')]

# Drop any existing dow_* columns if re-running
dow_cols = [c for c in full_data.columns if re.fullmatch(r'dow_[0-6]', c)]
if dow_cols:
    full_data = full_data.drop(columns=dow_cols)

# One-hot encode day_of_week
day_dummies = pd.get_dummies(full_data['day_of_week'].astype(int), prefix='dow')
day_dummies = day_dummies.reindex(columns=[f'dow_{i}' for i in range(7)], fill_value=0)

# Weekend flag
full_data['is_weekend'] = full_data['day_of_week'].isin([5, 6]).astype(int)

# Merge and ensure no duplicate column names
full_data = pd.concat([full_data, day_dummies], axis=1)
full_data = full_data.loc[:, ~full_data.columns.duplicated()]

full_data.head()


Unnamed: 0,customer_id,customer_geohash,order_id,vendor_id,day_of_week,order_time,order_day,chain_id,vendor_geohash,cuisine_origin,...,meal_of_day,order_hour_sin,order_hour_cos,dow_0,dow_1,dow_2,dow_3,dow_4,dow_5,dow_6
0,008ab40ac0,w21z7,9390,8ace9ccb,2,1900-01-01 01:52:03,3 days,aece2f12,w21z7,chinese,...,late_night,0.258819,0.9659258,False,False,True,False,False,False,False
1,008ce71183,w21zb,7057,f0d84faa,5,1900-01-01 18:11:48,34 days,fc3b6153,w21zc,chinese,...,dinner,-1.0,-1.83697e-16,False,False,False,False,False,True,False
2,008ce71183,w21zb,7058,a23e4559,2,1900-01-01 19:22:26,31 days,788f82f6,w21zb,american,...,dinner,-0.965926,0.258819,False,False,True,False,False,False,False
3,008ce71183,w21zb,7059,a23e4559,3,1900-01-01 18:54:16,18 days,788f82f6,w21zb,american,...,dinner,-1.0,-1.83697e-16,False,False,False,True,False,False,False
4,00ba08bab4,w21zt,347,78ce75cb,3,1900-01-01 20:39:37,39 days,24975bf7,w21zt,american,...,dinner,-0.866025,0.5,False,False,False,True,False,False,False


✅ Useful for:
- Capturing weekly habits (e.g., “orders more on Fridays”)
- Analyzing order frequency by weekday

## 1.2) 2️⃣⏰ **order_time**

Type: string like '18:11:48'
Meaning: time of order during the day

✅ Recommended feature engineering:

Extract hour, meal period, and optionally a sin/cos time encoding (cyclical representation).

In [6]:
# convert to datetime.time
full_data['order_time'] = pd.to_datetime(full_data['order_time'], format='%H:%M:%S', errors='coerce')

# extract hour
full_data['order_hour'] = full_data['order_time'].dt.hour

# categorize into meal periods
def meal_of_day(h):
    if 5 <= h < 11:
        return 'breakfast'
    elif 11 <= h < 16:
        return 'lunch'
    elif 16 <= h < 22:
        return 'dinner'
    else:
        return 'late_night'

full_data['meal_of_day'] = full_data['order_hour'].apply(meal_of_day)

# optional: cyclical encoding for time-based models
import numpy as np
full_data['order_hour_sin'] = np.sin(2 * np.pi * full_data['order_hour'] / 24)
full_data['order_hour_cos'] = np.cos(2 * np.pi * full_data['order_hour'] / 24)


✅ Useful for:
- Understanding time-of-day preferences
- Clustering orders by meal time
- Feature input for time-aware models

## 1.3) 3️⃣📅 **order_day**
Type: string like "85 days"
Meaning: how many days from a reference start (maybe relative or absolute)

✅ Recommended feature engineering:

Convert it to numeric or datetime — it’s essential for sorting chronologically.

In [10]:
# Convert everything to string, strip ' days', then back to int
full_data['order_day'] = (
    full_data['order_day']
    .astype(str)
    .str.replace(' days', '', regex=False)
    .str.replace(' day', '', regex=False)
    .astype(float)   # some datasets include floats
    .astype(int)
)


In [13]:
# Test output
print(full_data['order_day'].head())
print(full_data['order_day'].dtype)

0     3
1    34
2    31
3    18
4    39
Name: order_day, dtype: int64
int64


# 🧪 A. Model Evaluation Plan (using train/test split)

## 1️⃣ Test and train data

### 🧩 Step 1. Understand what you’re predicting

You’re doing a food delivery recommendation model — that means:

“Given a user’s past orders, predict what vendors they’ll order from next.”

So, the split should mimic a real-world timeline:

Train: past user behavior (earlier orders)

Test: future unseen orders

### ⚙️ Step 2. Choose the right split strategy
#### **🎯 Option A — Temporal Split (recommended)**

Best for behavioral data where orders have a time sequence.

Steps:

Convert order_day or actual order timestamp to datetime.

Sort all orders by date per user.

Split each user’s orders chronologically (e.g., 80% train, 20% test).

This ensures:

The model learns from older orders.

You test it on realistic “future” behavior.

In [14]:


# clean or convert order_day if needed
full_data['order_day'] = pd.to_timedelta(full_data['order_day'].str.replace(' days','')).dt.days

def temporal_split(df, test_ratio=0.2):
    train_list, test_list = [], []
    for _, group in df.groupby('customer_id'):
        group = group.sort_values('order_day')
        n_test = max(1, int(len(group) * test_ratio))
        test = group.tail(n_test)
        train = group.drop(test.index)
        train_list.append(train)
        test_list.append(test)
    return pd.concat(train_list), pd.concat(test_list)

train_df, test_df = temporal_split(full_data, test_ratio=0.2)


AttributeError: Can only use .str accessor with string values!