In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [6]:
df = pd.read_csv("/content/Food_Delivery_Route_Efficiency_Dataset.csv")

In [7]:
df.head()

Unnamed: 0,order_id,distance_km,delivery_time_min,traffic_level,route_length_km,delivery_mode,weather,order_time,restaurant_zone,customer_zone
0,1,7.97,63.8,High,9.75,Bicycle,Clear,2025-01-01 15:29,South,North
1,2,0.9,7.6,High,1.28,Car,Cloudy,2025-01-03 00:47,West,North
2,3,11.12,78.0,Medium,16.65,Bike,Rainy,2025-01-04 17:32,South,Central
3,4,4.9,24.8,Low,5.25,Scooter,Rainy,2025-01-01 14:12,Central,Central
4,5,10.04,56.0,High,11.34,Car,Rainy,2025-01-02 16:50,West,North


In [8]:
df.describe()

Unnamed: 0,order_id,distance_km,delivery_time_min,route_length_km
count,200.0,200.0,200.0,200.0
mean,100.5,6.62475,44.7445,8.15585
std,57.879185,3.371918,25.083925,4.303207
min,1.0,0.53,2.4,0.58
25%,50.75,3.6875,23.725,4.42
50%,100.5,6.845,44.65,8.52
75%,150.25,9.7975,63.9,11.8
max,200.0,12.0,108.8,17.29


In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 10 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   order_id           200 non-null    int64  
 1   distance_km        200 non-null    float64
 2   delivery_time_min  200 non-null    float64
 3   traffic_level      200 non-null    object 
 4   route_length_km    200 non-null    float64
 5   delivery_mode      200 non-null    object 
 6   weather            200 non-null    object 
 7   order_time         200 non-null    object 
 8   restaurant_zone    200 non-null    object 
 9   customer_zone      200 non-null    object 
dtypes: float64(3), int64(1), object(6)
memory usage: 15.8+ KB


**feature engineering**

In [10]:
# We're converting the traffic_level column into an ordinal categorical variable.
# The order here is: Low < Medium < High.

traffic_mapping = {
    'Low': 0,
    'Medium': 1,
    'High': 2
}

df['traffic_level_encoded'] = df['traffic_level'].map(traffic_mapping)

# Drop the original column
df = df.drop('traffic_level', axis=1)
df

Unnamed: 0,order_id,distance_km,delivery_time_min,route_length_km,delivery_mode,weather,order_time,restaurant_zone,customer_zone,traffic_level_encoded
0,1,7.97,63.8,9.75,Bicycle,Clear,2025-01-01 15:29,South,North,2
1,2,0.90,7.6,1.28,Car,Cloudy,2025-01-03 00:47,West,North,2
2,3,11.12,78.0,16.65,Bike,Rainy,2025-01-04 17:32,South,Central,1
3,4,4.90,24.8,5.25,Scooter,Rainy,2025-01-01 14:12,Central,Central,0
4,5,10.04,56.0,11.34,Car,Rainy,2025-01-02 16:50,West,North,2
...,...,...,...,...,...,...,...,...,...,...
195,196,9.02,38.4,9.57,Bike,Cloudy,2025-01-03 09:22,Central,South,0
196,197,6.73,59.2,10.04,Bike,Cloudy,2025-01-03 03:32,South,South,0
197,198,6.98,51.4,7.83,Scooter,Cloudy,2025-01-01 08:03,Central,West,2
198,199,3.80,29.2,5.34,Bicycle,Rainy,2025-01-04 07:34,South,North,1


In [11]:
# one-hot encoding for delivery_mode
delivery_dummies = pd.get_dummies(df['delivery_mode'], prefix='delivery_mode')

# Merge with original df
df = pd.concat([df, delivery_dummies], axis=1)

# We can delete the original delivery_mode column
df.drop(columns=['delivery_mode'], inplace=True)
df

Unnamed: 0,order_id,distance_km,delivery_time_min,route_length_km,weather,order_time,restaurant_zone,customer_zone,traffic_level_encoded,delivery_mode_Bicycle,delivery_mode_Bike,delivery_mode_Car,delivery_mode_Scooter
0,1,7.97,63.8,9.75,Clear,2025-01-01 15:29,South,North,2,True,False,False,False
1,2,0.90,7.6,1.28,Cloudy,2025-01-03 00:47,West,North,2,False,False,True,False
2,3,11.12,78.0,16.65,Rainy,2025-01-04 17:32,South,Central,1,False,True,False,False
3,4,4.90,24.8,5.25,Rainy,2025-01-01 14:12,Central,Central,0,False,False,False,True
4,5,10.04,56.0,11.34,Rainy,2025-01-02 16:50,West,North,2,False,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,196,9.02,38.4,9.57,Cloudy,2025-01-03 09:22,Central,South,0,False,True,False,False
196,197,6.73,59.2,10.04,Cloudy,2025-01-03 03:32,South,South,0,False,True,False,False
197,198,6.98,51.4,7.83,Cloudy,2025-01-01 08:03,Central,West,2,False,False,False,True
198,199,3.80,29.2,5.34,Rainy,2025-01-04 07:34,South,North,1,True,False,False,False


In [12]:
df['weather'].unique()

array(['Clear', 'Cloudy', 'Rainy', 'Windy'], dtype=object)

In [13]:
# one-hot encoding for weather
weather_dummies = pd.get_dummies(df['weather'], prefix='weather')

# Merge with original df
df = pd.concat([df, weather_dummies], axis=1)

# We can delete the original weather column
df.drop(columns=['weather'], inplace=True)
df

Unnamed: 0,order_id,distance_km,delivery_time_min,route_length_km,order_time,restaurant_zone,customer_zone,traffic_level_encoded,delivery_mode_Bicycle,delivery_mode_Bike,delivery_mode_Car,delivery_mode_Scooter,weather_Clear,weather_Cloudy,weather_Rainy,weather_Windy
0,1,7.97,63.8,9.75,2025-01-01 15:29,South,North,2,True,False,False,False,True,False,False,False
1,2,0.90,7.6,1.28,2025-01-03 00:47,West,North,2,False,False,True,False,False,True,False,False
2,3,11.12,78.0,16.65,2025-01-04 17:32,South,Central,1,False,True,False,False,False,False,True,False
3,4,4.90,24.8,5.25,2025-01-01 14:12,Central,Central,0,False,False,False,True,False,False,True,False
4,5,10.04,56.0,11.34,2025-01-02 16:50,West,North,2,False,False,True,False,False,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,196,9.02,38.4,9.57,2025-01-03 09:22,Central,South,0,False,True,False,False,False,True,False,False
196,197,6.73,59.2,10.04,2025-01-03 03:32,South,South,0,False,True,False,False,False,True,False,False
197,198,6.98,51.4,7.83,2025-01-01 08:03,Central,West,2,False,False,False,True,False,True,False,False
198,199,3.80,29.2,5.34,2025-01-04 07:34,South,North,1,True,False,False,False,False,False,True,False


In [14]:
# convert to order_time column from string to a datetime
df['order_time'] = pd.to_datetime(df['order_time'])

In [15]:
# Month
df['order_month'] = df['order_time'].dt.month

# Day (day of the month)
df['order_day'] = df['order_time'].dt.day

# Hour
df['order_hour'] = df['order_time'].dt.hour

# Minute
df['order_minute'] = df['order_time'].dt.minute
df

Unnamed: 0,order_id,distance_km,delivery_time_min,route_length_km,order_time,restaurant_zone,customer_zone,traffic_level_encoded,delivery_mode_Bicycle,delivery_mode_Bike,delivery_mode_Car,delivery_mode_Scooter,weather_Clear,weather_Cloudy,weather_Rainy,weather_Windy,order_month,order_day,order_hour,order_minute
0,1,7.97,63.8,9.75,2025-01-01 15:29:00,South,North,2,True,False,False,False,True,False,False,False,1,1,15,29
1,2,0.90,7.6,1.28,2025-01-03 00:47:00,West,North,2,False,False,True,False,False,True,False,False,1,3,0,47
2,3,11.12,78.0,16.65,2025-01-04 17:32:00,South,Central,1,False,True,False,False,False,False,True,False,1,4,17,32
3,4,4.90,24.8,5.25,2025-01-01 14:12:00,Central,Central,0,False,False,False,True,False,False,True,False,1,1,14,12
4,5,10.04,56.0,11.34,2025-01-02 16:50:00,West,North,2,False,False,True,False,False,False,True,False,1,2,16,50
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,196,9.02,38.4,9.57,2025-01-03 09:22:00,Central,South,0,False,True,False,False,False,True,False,False,1,3,9,22
196,197,6.73,59.2,10.04,2025-01-03 03:32:00,South,South,0,False,True,False,False,False,True,False,False,1,3,3,32
197,198,6.98,51.4,7.83,2025-01-01 08:03:00,Central,West,2,False,False,False,True,False,True,False,False,1,1,8,3
198,199,3.80,29.2,5.34,2025-01-04 07:34:00,South,North,1,True,False,False,False,False,False,True,False,1,4,7,34


In [16]:
# We can delete unnecessary columns for our model

df.drop(columns=['order_id', 'order_time', 'customer_zone', 'restaurant_zone'], inplace=True)
df

Unnamed: 0,distance_km,delivery_time_min,route_length_km,traffic_level_encoded,delivery_mode_Bicycle,delivery_mode_Bike,delivery_mode_Car,delivery_mode_Scooter,weather_Clear,weather_Cloudy,weather_Rainy,weather_Windy,order_month,order_day,order_hour,order_minute
0,7.97,63.8,9.75,2,True,False,False,False,True,False,False,False,1,1,15,29
1,0.90,7.6,1.28,2,False,False,True,False,False,True,False,False,1,3,0,47
2,11.12,78.0,16.65,1,False,True,False,False,False,False,True,False,1,4,17,32
3,4.90,24.8,5.25,0,False,False,False,True,False,False,True,False,1,1,14,12
4,10.04,56.0,11.34,2,False,False,True,False,False,False,True,False,1,2,16,50
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,9.02,38.4,9.57,0,False,True,False,False,False,True,False,False,1,3,9,22
196,6.73,59.2,10.04,0,False,True,False,False,False,True,False,False,1,3,3,32
197,6.98,51.4,7.83,2,False,False,False,True,False,True,False,False,1,1,8,3
198,3.80,29.2,5.34,1,True,False,False,False,False,False,True,False,1,4,7,34


***feature engineering finished**

**LİNEAR REGRESSİON MODEL**

In [17]:
##independent and dependent features

X = df.drop(columns=['delivery_time_min'])
y = df['delivery_time_min']


In [18]:
# train test split
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.25,random_state=15)

In [19]:
from sklearn.preprocessing import StandardScaler

scaler=StandardScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)

In [20]:
from sklearn.linear_model import LinearRegression
regression=LinearRegression()

In [21]:
regression.fit(X_train,y_train)

In [22]:
## prediction
y_pred=regression.predict(X_test)

In [23]:
## Performance Metrics
from sklearn.metrics import mean_absolute_error,mean_squared_error
mse=mean_squared_error(y_test,y_pred)
mae=mean_absolute_error(y_test,y_pred)
rmse=np.sqrt(mse)
print("mse: ", mse)
print("mae: ", mae)
print("rmse: ", rmse)

mse:  72.75819971709167
mae:  6.412957709641237
rmse:  8.529841717001064


In [24]:
from sklearn.metrics import r2_score
score=r2_score(y_test,y_pred)
print("r2 score: ", score)
#adjusted R-squared
print(1 - (1-score)*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1))

r2 score:  0.8919800217049342
0.8443241489276992


**SUMMARY**

The model’s predictions have an average error of 6–8.5 minutes, which is an acceptable range for most delivery applications.

R² indicates how much of the variability in the target variable the model explains.

0 → the model explains nothing

1 → perfect explanation

In this case, the model explains 89% of the variability in delivery times.