In [1]:
import pandas as pd
import json

In [None]:
with open("India-Food-Delivery-Time-Prediction.txt", "r") as file:
    raw_text = file.read()

# Parse the text as JSON
data = json.loads(raw_text)

# Convert to DataFrame
df = pd.DataFrame(data)

In [6]:

df.to_csv("food_delivery_data.csv", index=False)
df

Unnamed: 0,ID,Delivery_person_ID,Delivery_person_Age,Delivery_person_Ratings,Restaurant_latitude,Restaurant_longitude,Delivery_location_latitude,Delivery_location_longitude,Order_Date,Time_Orderd,Time_Order_picked,Weatherconditions,Road_traffic_density,Vehicle_condition,Type_of_order,Type_of_vehicle,multiple_deliveries,Festival,City,Time_taken(min)
0,0x4607,INDORES13DEL02,37,4.9,22.745049,75.892471,22.765049,75.912471,19-03-2022,11:33:33,11:45:29,conditions Sunny,High,2,Snack,motorcycle,0,No,Urban,(min) 24
1,0xb379,BANGRES18DEL02,34,4.5,12.913041,77.683237,13.043041,77.813237,25-03-2022,19:45:37,19:51:49,conditions Stormy,Jam,2,Snack,scooter,1,No,Metropolitian,(min) 33
2,0x5d6d,BANGRES19DEL01,23,4.4,12.914264,77.678400,12.924264,77.688400,19-03-2022,8:32:58,8:48:47,conditions Sandstorms,Low,0,Drinks,motorcycle,1,No,Urban,(min) 26
3,0x7a6a,COIMBRES13DEL02,38,4.7,11.003669,76.976494,11.053669,77.026494,05-04-2022,18:03:58,18:12:52,conditions Sunny,Medium,0,Buffet,motorcycle,1,No,Metropolitian,(min) 21
4,0x70a2,CHENRES12DEL01,32,4.6,12.972793,80.249982,13.012793,80.289982,26-03-2022,13:34:16,13:45:36,conditions Cloudy,High,1,Snack,scooter,1,No,Metropolitian,(min) 30
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41948,0x1178,RANCHIRES16DEL01,35,4.2,23.371292,85.327872,23.481292,85.437872,08-03-2022,21:47:47,21:59:27,conditions Windy,Jam,2,Drinks,motorcycle,1,No,Metropolitian,(min) 33
41949,0x7c09,JAPRES04DEL01,30,4.8,26.902328,75.794257,26.912328,75.804257,24-03-2022,11:36:22,11:48:07,conditions Windy,High,1,Meal,motorcycle,0,No,Metropolitian,(min) 32
41950,0x4f8d,CHENRES08DEL03,30,4.9,13.022394,80.242439,13.052394,80.272439,11-03-2022,23:50:56,0:08:55,conditions Cloudy,Low,1,Drinks,scooter,0,No,Metropolitian,(min) 16
41951,0x5eee,COIMBRES11DEL01,20,4.7,11.001753,76.986241,11.041753,77.026241,07-03-2022,13:40:13,13:42:00,conditions Cloudy,High,0,Snack,motorcycle,1,No,Metropolitian,(min) 26


In [4]:
df.isnull().sum()

ID                                0
Delivery_person_ID                0
Delivery_person_Age               0
Delivery_person_Ratings           0
Restaurant_latitude               0
Restaurant_longitude              0
Delivery_location_latitude        0
Delivery_location_longitude       0
Order_Date                        0
Time_Orderd                    1600
Time_Order_picked                 0
Weatherconditions                 0
Road_traffic_density              0
Vehicle_condition                 0
Type_of_order                     0
Type_of_vehicle                   0
multiple_deliveries               0
Festival                          0
City                              0
Time_taken(min)                   0
dtype: int64

#### Since you can see the time_ordered column has many null values we cannot drop all the rows as it may affect the accuracy of the model so we should fill it strategically.
We’re using Time_Order_picked and an estimated median preparation time to impute/fill these missing values.

In [7]:
# Time columns are currently stored as strings. To perform time calculations (like differences), we convert them to proper datetime objects.
# errors='coerce' means: If a value can’t be converted (e.g., blank or malformed), it becomes NaT (Not a Time = missing datetime).
df['Time_Order_picked'] = pd.to_datetime(df['Time_Order_picked'], format='%H:%M:%S', errors='coerce')
df['Time_Orderd'] = pd.to_datetime(df['Time_Orderd'], format='%H:%M:%S', errors='coerce')

In [18]:
# .dt.total_seconds() converts the timedelta to seconds, and we divide by 60 to get minutes.
prep_time = (df['Time_Order_picked'] - df['Time_Orderd']).dt.total_seconds() / 60
# We take the median (middle value) of all calculated prep times
median_prep_time = prep_time.median()

In [19]:
filtered_rows = df['Time_Orderd'].isna() & df['Time_Order_picked'].notna()
df.loc[filtered_rows, 'Time_Orderd'] = df.loc[filtered_rows, 'Time_Order_picked'] - pd.to_timedelta(median_prep_time, unit='m')

In [20]:
df.isnull().sum()

ID                             0
Delivery_person_ID             0
Delivery_person_Age            0
Delivery_person_Ratings        0
Restaurant_latitude            0
Restaurant_longitude           0
Delivery_location_latitude     0
Delivery_location_longitude    0
Order_Date                     0
Time_Orderd                    0
Time_Order_picked              0
Weatherconditions              0
Road_traffic_density           0
Vehicle_condition              0
Type_of_order                  0
Type_of_vehicle                0
multiple_deliveries            0
Festival                       0
City                           0
Time_taken(min)                0
dtype: int64