# Delivery Time Predection

In [12]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score
import joblib
import datetime

### Dataset

In [13]:
# 1. Load dataset
df = pd.read_csv("../Dataset/clean_amazon_delivery.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,Order_ID,Agent_Age,Agent_Rating,Store_Latitude,Store_Longitude,Drop_Latitude,Drop_Longitude,Order_Date,Order_Time,...,Vehicle,Area,Delivery_Time,Category,Order_of_week_day,Order_Month,Prep_Time,Pickup_Hour,PM_AM,Distance
0,0,ialx566343618,37,4.9,0.396976,1.324573,0.397325,1.324923,2022-03-19,2022-03-19 11:30:00,...,motorcycle,Urban,120,Clothing,5,3,15,11,AM,3.025149
1,1,akqg208421122,34,4.5,0.225375,1.355828,0.227644,1.358097,2022-03-25,2022-03-25 19:45:00,...,scooter,Metropolitian,165,Electronics,4,3,5,19,PM,20.18353
2,2,njpu434582536,23,4.4,0.225396,1.355744,0.225571,1.355918,2022-03-19,2022-03-19 08:30:00,...,motorcycle,Urban,130,Sports,5,3,15,8,AM,1.552758
3,3,rjto796129700,38,4.7,0.19205,1.343493,0.192923,1.344366,2022-04-05,2022-04-05 18:00:00,...,motorcycle,Metropolitian,105,Cosmetics,1,4,10,18,PM,7.790401
4,4,zguw716275638,32,4.6,0.226418,1.400626,0.227116,1.401325,2022-03-26,2022-03-26 13:30:00,...,scooter,Metropolitian,150,Toys,5,3,15,13,PM,6.210138


In [14]:
df.columns

Index(['Unnamed: 0', 'Order_ID', 'Agent_Age', 'Agent_Rating', 'Store_Latitude',
       'Store_Longitude', 'Drop_Latitude', 'Drop_Longitude', 'Order_Date',
       'Order_Time', 'Pickup_Time', 'Weather', 'Traffic', 'Vehicle', 'Area',
       'Delivery_Time', 'Category', 'Order_of_week_day', 'Order_Month',
       'Prep_Time', 'Pickup_Hour', 'PM_AM', 'Distance'],
      dtype='object')

In [None]:
# 2. Define the features (X) and target (y)
X = df.drop(columns=["Delivery_Time", "Order_ID", "Store_Latitude", "Store_Longitude", "Drop_Latitude", "Drop_Longitude"])
y = df["Delivery_Time"]

In [None]:
# 3. Encoding categorical data
df = pd.get_dummies(df, columns=["Weather", "Traffic", "Vehicle", "Area", "Category", "Pickup_AMPM"], drop_first=True)

In [None]:
# 4. Split dataset into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### Model

In [None]:
# 5. Build model
model = LinearRegression()

# Fit model
model.fit(X_train, y_train)

In [None]:
# 6. Evaluation
y_pred = model.predict(X_test)

mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"MAE={mae:.2f}\nR²={r2:.3f}")

RMSE=1091.24
R²=0.608




In [None]:
# 7. Save model
joblib.dump(model, "model.pkl")

['f1_model.pkl']

### Prediction

In [None]:
# Sample input
sample = {
    "Agent_Age": 30,
    "Agent_Rating": 4.6,
    "Distance": 7.8,              
    "pickup_hour": 14,            
    "Order_of_week_day": 2,       
    "Order_Month": 3,             
    "Prep_Time": 15,              
    "Weather": "Sunny",
    "Traffic": "Medium",
    "Vehicle": "motorcycle",
    "Area": "Urban",
    "Category": "Electronics",
    "Pickup_AMPM": "PM"
}


# Predict
prediction = model.predict(sample)[0]
print("Predicted delivery time:", prediction)

Predicted race time: 1:15:28
