In [49]:
import pandas as pd
import json

In [50]:
with open("India-Food-Delivery-Time-Prediction.txt", "r") as file:
    raw_text = file.read()

# Parse the text as JSON
data = json.loads(raw_text)

# Convert to DataFrame
df = pd.DataFrame(data)

In [51]:

df.to_csv("food_delivery_data.csv", index=False)
df

Unnamed: 0,ID,Delivery_person_ID,Delivery_person_Age,Delivery_person_Ratings,Restaurant_latitude,Restaurant_longitude,Delivery_location_latitude,Delivery_location_longitude,Order_Date,Time_Orderd,Time_Order_picked,Weatherconditions,Road_traffic_density,Vehicle_condition,Type_of_order,Type_of_vehicle,multiple_deliveries,Festival,City,Time_taken(min)
0,0x4607,INDORES13DEL02,37,4.9,22.745049,75.892471,22.765049,75.912471,19-03-2022,11:33:33,11:45:29,conditions Sunny,High,2,Snack,motorcycle,0,No,Urban,(min) 24
1,0xb379,BANGRES18DEL02,34,4.5,12.913041,77.683237,13.043041,77.813237,25-03-2022,19:45:37,19:51:49,conditions Stormy,Jam,2,Snack,scooter,1,No,Metropolitian,(min) 33
2,0x5d6d,BANGRES19DEL01,23,4.4,12.914264,77.678400,12.924264,77.688400,19-03-2022,8:32:58,8:48:47,conditions Sandstorms,Low,0,Drinks,motorcycle,1,No,Urban,(min) 26
3,0x7a6a,COIMBRES13DEL02,38,4.7,11.003669,76.976494,11.053669,77.026494,05-04-2022,18:03:58,18:12:52,conditions Sunny,Medium,0,Buffet,motorcycle,1,No,Metropolitian,(min) 21
4,0x70a2,CHENRES12DEL01,32,4.6,12.972793,80.249982,13.012793,80.289982,26-03-2022,13:34:16,13:45:36,conditions Cloudy,High,1,Snack,scooter,1,No,Metropolitian,(min) 30
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41948,0x1178,RANCHIRES16DEL01,35,4.2,23.371292,85.327872,23.481292,85.437872,08-03-2022,21:47:47,21:59:27,conditions Windy,Jam,2,Drinks,motorcycle,1,No,Metropolitian,(min) 33
41949,0x7c09,JAPRES04DEL01,30,4.8,26.902328,75.794257,26.912328,75.804257,24-03-2022,11:36:22,11:48:07,conditions Windy,High,1,Meal,motorcycle,0,No,Metropolitian,(min) 32
41950,0x4f8d,CHENRES08DEL03,30,4.9,13.022394,80.242439,13.052394,80.272439,11-03-2022,23:50:56,0:08:55,conditions Cloudy,Low,1,Drinks,scooter,0,No,Metropolitian,(min) 16
41951,0x5eee,COIMBRES11DEL01,20,4.7,11.001753,76.986241,11.041753,77.026241,07-03-2022,13:40:13,13:42:00,conditions Cloudy,High,0,Snack,motorcycle,1,No,Metropolitian,(min) 26


In [52]:
df.isnull().sum()

ID                                0
Delivery_person_ID                0
Delivery_person_Age               0
Delivery_person_Ratings           0
Restaurant_latitude               0
Restaurant_longitude              0
Delivery_location_latitude        0
Delivery_location_longitude       0
Order_Date                        0
Time_Orderd                    1600
Time_Order_picked                 0
Weatherconditions                 0
Road_traffic_density              0
Vehicle_condition                 0
Type_of_order                     0
Type_of_vehicle                   0
multiple_deliveries               0
Festival                          0
City                              0
Time_taken(min)                   0
dtype: int64

#### Since you can see the time_ordered column has many null values we cannot drop all the rows as it may affect the accuracy of the model so we should fill it strategically.
We’re using Time_Order_picked and an estimated median preparation time to impute/fill these missing values.

In [53]:
# Time columns are currently stored as strings. To perform time calculations (like differences), we convert them to proper datetime objects.
# errors='coerce' means: If a value can’t be converted (e.g., blank or malformed), it becomes NaT (Not a Time = missing datetime).
df['Time_Order_picked'] = pd.to_datetime(df['Time_Order_picked'], format='%H:%M:%S', errors='coerce')
df['Time_Orderd'] = pd.to_datetime(df['Time_Orderd'], format='%H:%M:%S', errors='coerce')

In [54]:
# .dt.total_seconds() converts the timedelta to seconds, and we divide by 60 to get minutes.
prep_time = (df['Time_Order_picked'] - df['Time_Orderd']).dt.total_seconds() / 60
# We take the median (middle value) of all calculated prep times
median_prep_time = prep_time.median()

In [55]:
filtered_rows = df['Time_Orderd'].isna() & df['Time_Order_picked'].notna()
df.loc[filtered_rows, 'Time_Orderd'] = df.loc[filtered_rows, 'Time_Order_picked'] - pd.to_timedelta(median_prep_time, unit='m')

In [56]:
df.isnull().sum()

ID                             0
Delivery_person_ID             0
Delivery_person_Age            0
Delivery_person_Ratings        0
Restaurant_latitude            0
Restaurant_longitude           0
Delivery_location_latitude     0
Delivery_location_longitude    0
Order_Date                     0
Time_Orderd                    0
Time_Order_picked              0
Weatherconditions              0
Road_traffic_density           0
Vehicle_condition              0
Type_of_order                  0
Type_of_vehicle                0
multiple_deliveries            0
Festival                       0
City                           0
Time_taken(min)                0
dtype: int64

In [57]:
# Find every occurrence of the word conditions and replace it with nothing ('').
# regex=False ensures that the string 'conditions' is treated literally, not as a regular expression.
# Removes any leading or trailing whitespace from each value in the column.
df['Weatherconditions'] = df['Weatherconditions'].str.replace('conditions','',regex=False).str.strip()

In [58]:
df.describe()

Unnamed: 0,Restaurant_latitude,Restaurant_longitude,Delivery_location_latitude,Delivery_location_longitude,Time_Orderd,Time_Order_picked,Vehicle_condition
count,41953.0,41953.0,41953.0,41953.0,41953,41953,41953.0
mean,18.911397,76.923408,18.975064,76.987074,1900-01-01 17:54:55.850880768,1900-01-01 17:39:51.540199680,1.022716
min,9.957144,72.768726,9.967144,72.778726,1899-12-31 23:51:20.000000002,1900-01-01 00:00:02,0.0
25%,12.986047,73.89852,13.065996,73.940327,1900-01-01 15:20:57,1900-01-01 14:37:04,0.0
50%,19.065838,76.618203,19.123249,76.663622,1900-01-01 19:15:58,1900-01-01 19:12:09,1.0
75%,22.751234,78.368855,22.82004,78.405467,1900-01-01 21:37:03,1900-01-01 21:37:15,2.0
max,30.914057,88.433452,31.054057,88.563452,1900-01-01 23:59:57,1900-01-01 23:59:57,3.0
std,5.467829,3.50291,5.469616,3.503073,,,0.838893


In [59]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41953 entries, 0 to 41952
Data columns (total 20 columns):
 #   Column                       Non-Null Count  Dtype         
---  ------                       --------------  -----         
 0   ID                           41953 non-null  object        
 1   Delivery_person_ID           41953 non-null  object        
 2   Delivery_person_Age          41953 non-null  object        
 3   Delivery_person_Ratings      41953 non-null  object        
 4   Restaurant_latitude          41953 non-null  float64       
 5   Restaurant_longitude         41953 non-null  float64       
 6   Delivery_location_latitude   41953 non-null  float64       
 7   Delivery_location_longitude  41953 non-null  float64       
 8   Order_Date                   41953 non-null  object        
 9   Time_Orderd                  41953 non-null  datetime64[ns]
 10  Time_Order_picked            41953 non-null  datetime64[ns]
 11  Weatherconditions            41953 non-nu

In [60]:
#doing the same for time taken column and converting it into numeric type
df['Time_taken(min)'] = df['Time_taken(min)'].str.replace('(min)', '', regex=False).str.strip()
df['Time_taken(min)'] = pd.to_numeric(df['Time_taken(min)'], errors='coerce')

In [61]:
#changing the data type of columns to numeric
df['Delivery_person_Age'] = pd.to_numeric(df['Delivery_person_Age'], errors='coerce')
df['Delivery_person_Ratings'] = pd.to_numeric(df['Delivery_person_Ratings'], errors='coerce')
df['multiple_deliveries'] = pd.to_numeric(df['multiple_deliveries'], errors='coerce')

In [62]:
#droping duplicates if any
df.drop_duplicates(inplace=True)

In [63]:
df

Unnamed: 0,ID,Delivery_person_ID,Delivery_person_Age,Delivery_person_Ratings,Restaurant_latitude,Restaurant_longitude,Delivery_location_latitude,Delivery_location_longitude,Order_Date,Time_Orderd,Time_Order_picked,Weatherconditions,Road_traffic_density,Vehicle_condition,Type_of_order,Type_of_vehicle,multiple_deliveries,Festival,City,Time_taken(min)
0,0x4607,INDORES13DEL02,37.0,4.9,22.745049,75.892471,22.765049,75.912471,19-03-2022,1900-01-01 11:33:33,1900-01-01 11:45:29,Sunny,High,2,Snack,motorcycle,0.0,No,Urban,24
1,0xb379,BANGRES18DEL02,34.0,4.5,12.913041,77.683237,13.043041,77.813237,25-03-2022,1900-01-01 19:45:37,1900-01-01 19:51:49,Stormy,Jam,2,Snack,scooter,1.0,No,Metropolitian,33
2,0x5d6d,BANGRES19DEL01,23.0,4.4,12.914264,77.678400,12.924264,77.688400,19-03-2022,1900-01-01 08:32:58,1900-01-01 08:48:47,Sandstorms,Low,0,Drinks,motorcycle,1.0,No,Urban,26
3,0x7a6a,COIMBRES13DEL02,38.0,4.7,11.003669,76.976494,11.053669,77.026494,05-04-2022,1900-01-01 18:03:58,1900-01-01 18:12:52,Sunny,Medium,0,Buffet,motorcycle,1.0,No,Metropolitian,21
4,0x70a2,CHENRES12DEL01,32.0,4.6,12.972793,80.249982,13.012793,80.289982,26-03-2022,1900-01-01 13:34:16,1900-01-01 13:45:36,Cloudy,High,1,Snack,scooter,1.0,No,Metropolitian,30
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41948,0x1178,RANCHIRES16DEL01,35.0,4.2,23.371292,85.327872,23.481292,85.437872,08-03-2022,1900-01-01 21:47:47,1900-01-01 21:59:27,Windy,Jam,2,Drinks,motorcycle,1.0,No,Metropolitian,33
41949,0x7c09,JAPRES04DEL01,30.0,4.8,26.902328,75.794257,26.912328,75.804257,24-03-2022,1900-01-01 11:36:22,1900-01-01 11:48:07,Windy,High,1,Meal,motorcycle,0.0,No,Metropolitian,32
41950,0x4f8d,CHENRES08DEL03,30.0,4.9,13.022394,80.242439,13.052394,80.272439,11-03-2022,1900-01-01 23:50:56,1900-01-01 00:08:55,Cloudy,Low,1,Drinks,scooter,0.0,No,Metropolitian,16
41951,0x5eee,COIMBRES11DEL01,20.0,4.7,11.001753,76.986241,11.041753,77.026241,07-03-2022,1900-01-01 13:40:13,1900-01-01 13:42:00,Cloudy,High,0,Snack,motorcycle,1.0,No,Metropolitian,26


In [64]:
# Strip and lower-case categories
df['City'] = df['City'].str.strip().str.lower()
df['Road_traffic_density'] = df['Road_traffic_density'].str.strip().str.lower()
df['Weatherconditions'] = df['Weatherconditions'].str.strip().str.lower()
df['Type_of_order'] = df['Type_of_order'].str.strip().str.lower()
df['Festival'] = df['Festival'].str.strip().str.lower()

In [65]:
df[['Delivery_person_Age', 'Time_taken(min)']].describe()

Unnamed: 0,Delivery_person_Age,Time_taken(min)
count,40234.0,41953.0
mean,29.56333,26.311539
std,5.812361,9.380753
min,15.0,10.0
25%,25.0,19.0
50%,30.0,26.0
75%,35.0,32.0
max,50.0,54.0


In [66]:
df.isna().sum().sort_values(ascending=False)

Delivery_person_Ratings        1763
Delivery_person_Age            1719
multiple_deliveries             905
Delivery_person_ID                0
ID                                0
Restaurant_latitude               0
Delivery_location_latitude        0
Restaurant_longitude              0
Order_Date                        0
Time_Orderd                       0
Time_Order_picked                 0
Delivery_location_longitude       0
Weatherconditions                 0
Road_traffic_density              0
Type_of_order                     0
Vehicle_condition                 0
Type_of_vehicle                   0
Festival                          0
City                              0
Time_taken(min)                   0
dtype: int64

In [67]:
df['City'].value_counts(), df['Road_traffic_density'].value_counts(), df['Weatherconditions'].value_counts(), df['Type_of_order'].value_counts(), df['Festival'].value_counts()

(City
 metropolitian    31411
 urban             9279
 nan               1114
 semi-urban         149
 Name: count, dtype: int64,
 Road_traffic_density
 low       14200
 jam       13043
 medium    10084
 high       4071
 nan         555
 Name: count, dtype: int64,
 Weatherconditions
 fog           7012
 stormy        6974
 cloudy        6932
 sandstorms    6906
 windy         6832
 sunny         6728
 nan            569
 Name: count, dtype: int64,
 Type_of_order
 snack     10616
 meal      10524
 drinks    10445
 buffet    10368
 Name: count, dtype: int64,
 Festival
 no     40916
 yes      822
 nan      215
 Name: count, dtype: int64)

In [68]:
df['Type_of_vehicle'].value_counts()

Type_of_vehicle
motorcycle           24396
scooter              14029
electric_scooter      3468
bicycle                 60
Name: count, dtype: int64

### Came across many fake null values so decided to convert it so that i can deal with them

In [69]:
import numpy as np
df.replace(['NaN', 'nan', 'null', 'Null', 'N/A', '', ' ', 'na', 'NA'], np.nan, inplace=True)

In [70]:
df.isna().sum().sort_values(ascending=False)

Delivery_person_Ratings        1763
Delivery_person_Age            1719
City                           1114
multiple_deliveries             905
Weatherconditions               569
Road_traffic_density            555
Festival                        215
Delivery_person_ID                0
ID                                0
Restaurant_latitude               0
Delivery_location_latitude        0
Restaurant_longitude              0
Time_Order_picked                 0
Time_Orderd                       0
Order_Date                        0
Delivery_location_longitude       0
Type_of_vehicle                   0
Type_of_order                     0
Vehicle_condition                 0
Time_taken(min)                   0
dtype: int64

Filling delivery person ratings by median citywise

In [71]:
# df['Delivery_person_Ratings'] = df.groupby('City')['Delivery_person_Ratings'].transform(
#     lambda x: x.fillna(x.median())
# )
city_median = df.groupby('City')['Delivery_person_Ratings'].transform('median')
df['Delivery_person_Ratings'] = df['Delivery_person_Ratings'].fillna(city_median)
df['Delivery_person_Ratings'] = df['Delivery_person_Ratings'].fillna(df['Delivery_person_Ratings'].median())

In [72]:
df['Delivery_person_Ratings'].isna().sum()

np.int64(0)

In [73]:
#ensuring there are no outliers
df = df[(df['Delivery_person_Ratings'] >= 1.0) & (df['Delivery_person_Ratings'] <= 5.0)]

In [74]:
df['Delivery_person_Ratings'].value_counts().sort_index()

Delivery_person_Ratings
1.0      35
2.5      18
2.6      20
2.7      21
2.8      17
2.9      18
3.0       6
3.1      28
3.2      26
3.3      23
3.4      32
3.5     236
3.6     194
3.7     203
3.8     218
3.9     176
4.0     998
4.1    1323
4.2    1331
4.3    1301
4.4    1246
4.5    3067
4.6    6398
4.7    8280
4.8    6558
4.9    6468
5.0    3666
Name: count, dtype: int64

Filling city column with the most frequent city

In [75]:
# Fill missing values in 'City' with the most frequent city (mode)
city_mode = df['City'].mode()[0]
df['City'] = df['City'].fillna(city_mode)
df['City'].isna().sum()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['City'] = df['City'].fillna(city_mode)


np.int64(0)

Filling delivery_person_age by the median value of the column grouped by city

In [76]:
city_median = df.groupby('City')['Delivery_person_Age'].transform('median')

In [77]:

df.loc[:, 'Delivery_person_Age'] = df['Delivery_person_Age'].fillna(city_median)
df.loc[:, 'Delivery_person_Age'] = df['Delivery_person_Age'].fillna(df['Delivery_person_Age'].median())

In [78]:
df = df[df['Delivery_person_Age'] >= 21]

Filling multiple deliveries by most frequent value i.e mode

In [79]:
df['multiple_deliveries'].value_counts()

multiple_deliveries
1.0    24718
0.0    12219
2.0     1793
3.0      319
Name: count, dtype: int64

In [80]:
df.loc[:,'multiple_deliveries'] = df['multiple_deliveries'].fillna(df['multiple_deliveries'].mode()[0]).astype(int)
df['multiple_deliveries'].isna().sum()

np.int64(0)

Filling Weather conditions by the mode value grouped by city type

In [81]:
df['Weatherconditions'].value_counts()

Weatherconditions
fog           6649
stormy        6642
sandstorms    6616
cloudy        6611
windy         6514
sunny         6388
Name: count, dtype: int64

In [82]:
df.loc[:, 'Weatherconditions'] = df.groupby('City')['Weatherconditions'].transform(
    lambda x: x.fillna(x.mode()[0] if not x.mode().empty else 'unknown')
)

Filling Traffic density by the city wise mode value and festival by no 

In [83]:
df.loc[:,'Road_traffic_density'] = df.groupby('City')['Road_traffic_density'].transform( 
    lambda x: x.fillna(x.mode()[0] if not x.mode().empty else 'unknown')
)
df['Festival'].value_counts(dropna=False)
df.loc[:,'Festival'] = df['Festival'].fillna('no')

In [84]:
#Checking for any remaining missing values
df.isna().sum().sort_values(ascending=False)

ID                             0
Delivery_person_ID             0
Delivery_person_Age            0
Delivery_person_Ratings        0
Restaurant_latitude            0
Restaurant_longitude           0
Delivery_location_latitude     0
Delivery_location_longitude    0
Order_Date                     0
Time_Orderd                    0
Time_Order_picked              0
Weatherconditions              0
Road_traffic_density           0
Vehicle_condition              0
Type_of_order                  0
Type_of_vehicle                0
multiple_deliveries            0
Festival                       0
City                           0
Time_taken(min)                0
dtype: int64

In [85]:
df

Unnamed: 0,ID,Delivery_person_ID,Delivery_person_Age,Delivery_person_Ratings,Restaurant_latitude,Restaurant_longitude,Delivery_location_latitude,Delivery_location_longitude,Order_Date,Time_Orderd,Time_Order_picked,Weatherconditions,Road_traffic_density,Vehicle_condition,Type_of_order,Type_of_vehicle,multiple_deliveries,Festival,City,Time_taken(min)
0,0x4607,INDORES13DEL02,37.0,4.9,22.745049,75.892471,22.765049,75.912471,19-03-2022,1900-01-01 11:33:33,1900-01-01 11:45:29,sunny,high,2,snack,motorcycle,0.0,no,urban,24
1,0xb379,BANGRES18DEL02,34.0,4.5,12.913041,77.683237,13.043041,77.813237,25-03-2022,1900-01-01 19:45:37,1900-01-01 19:51:49,stormy,jam,2,snack,scooter,1.0,no,metropolitian,33
2,0x5d6d,BANGRES19DEL01,23.0,4.4,12.914264,77.678400,12.924264,77.688400,19-03-2022,1900-01-01 08:32:58,1900-01-01 08:48:47,sandstorms,low,0,drinks,motorcycle,1.0,no,urban,26
3,0x7a6a,COIMBRES13DEL02,38.0,4.7,11.003669,76.976494,11.053669,77.026494,05-04-2022,1900-01-01 18:03:58,1900-01-01 18:12:52,sunny,medium,0,buffet,motorcycle,1.0,no,metropolitian,21
4,0x70a2,CHENRES12DEL01,32.0,4.6,12.972793,80.249982,13.012793,80.289982,26-03-2022,1900-01-01 13:34:16,1900-01-01 13:45:36,cloudy,high,1,snack,scooter,1.0,no,metropolitian,30
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41947,0x4e21,BANGRES16DEL03,28.0,4.9,13.029198,77.570997,13.059198,77.600997,30-03-2022,1900-01-01 21:57:44,1900-01-01 22:03:10,sandstorms,jam,1,meal,scooter,1.0,no,metropolitian,29
41948,0x1178,RANCHIRES16DEL01,35.0,4.2,23.371292,85.327872,23.481292,85.437872,08-03-2022,1900-01-01 21:47:47,1900-01-01 21:59:27,windy,jam,2,drinks,motorcycle,1.0,no,metropolitian,33
41949,0x7c09,JAPRES04DEL01,30.0,4.8,26.902328,75.794257,26.912328,75.804257,24-03-2022,1900-01-01 11:36:22,1900-01-01 11:48:07,windy,high,1,meal,motorcycle,0.0,no,metropolitian,32
41950,0x4f8d,CHENRES08DEL03,30.0,4.9,13.022394,80.242439,13.052394,80.272439,11-03-2022,1900-01-01 23:50:56,1900-01-01 00:08:55,cloudy,low,1,drinks,scooter,0.0,no,metropolitian,16


In [86]:
# Convert Delivery_person_Age and multiple_deliveries to int
df[ 'Delivery_person_Age'] = df['Delivery_person_Age'].astype(int)
df['multiple_deliveries'] = df['multiple_deliveries'].astype(int)

In [87]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 39908 entries, 0 to 41952
Data columns (total 20 columns):
 #   Column                       Non-Null Count  Dtype         
---  ------                       --------------  -----         
 0   ID                           39908 non-null  object        
 1   Delivery_person_ID           39908 non-null  object        
 2   Delivery_person_Age          39908 non-null  int64         
 3   Delivery_person_Ratings      39908 non-null  float64       
 4   Restaurant_latitude          39908 non-null  float64       
 5   Restaurant_longitude         39908 non-null  float64       
 6   Delivery_location_latitude   39908 non-null  float64       
 7   Delivery_location_longitude  39908 non-null  float64       
 8   Order_Date                   39908 non-null  object        
 9   Time_Orderd                  39908 non-null  datetime64[ns]
 10  Time_Order_picked            39908 non-null  datetime64[ns]
 11  Weatherconditions            39908 non-null  o

In [88]:
df

Unnamed: 0,ID,Delivery_person_ID,Delivery_person_Age,Delivery_person_Ratings,Restaurant_latitude,Restaurant_longitude,Delivery_location_latitude,Delivery_location_longitude,Order_Date,Time_Orderd,Time_Order_picked,Weatherconditions,Road_traffic_density,Vehicle_condition,Type_of_order,Type_of_vehicle,multiple_deliveries,Festival,City,Time_taken(min)
0,0x4607,INDORES13DEL02,37,4.9,22.745049,75.892471,22.765049,75.912471,19-03-2022,1900-01-01 11:33:33,1900-01-01 11:45:29,sunny,high,2,snack,motorcycle,0,no,urban,24
1,0xb379,BANGRES18DEL02,34,4.5,12.913041,77.683237,13.043041,77.813237,25-03-2022,1900-01-01 19:45:37,1900-01-01 19:51:49,stormy,jam,2,snack,scooter,1,no,metropolitian,33
2,0x5d6d,BANGRES19DEL01,23,4.4,12.914264,77.678400,12.924264,77.688400,19-03-2022,1900-01-01 08:32:58,1900-01-01 08:48:47,sandstorms,low,0,drinks,motorcycle,1,no,urban,26
3,0x7a6a,COIMBRES13DEL02,38,4.7,11.003669,76.976494,11.053669,77.026494,05-04-2022,1900-01-01 18:03:58,1900-01-01 18:12:52,sunny,medium,0,buffet,motorcycle,1,no,metropolitian,21
4,0x70a2,CHENRES12DEL01,32,4.6,12.972793,80.249982,13.012793,80.289982,26-03-2022,1900-01-01 13:34:16,1900-01-01 13:45:36,cloudy,high,1,snack,scooter,1,no,metropolitian,30
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41947,0x4e21,BANGRES16DEL03,28,4.9,13.029198,77.570997,13.059198,77.600997,30-03-2022,1900-01-01 21:57:44,1900-01-01 22:03:10,sandstorms,jam,1,meal,scooter,1,no,metropolitian,29
41948,0x1178,RANCHIRES16DEL01,35,4.2,23.371292,85.327872,23.481292,85.437872,08-03-2022,1900-01-01 21:47:47,1900-01-01 21:59:27,windy,jam,2,drinks,motorcycle,1,no,metropolitian,33
41949,0x7c09,JAPRES04DEL01,30,4.8,26.902328,75.794257,26.912328,75.804257,24-03-2022,1900-01-01 11:36:22,1900-01-01 11:48:07,windy,high,1,meal,motorcycle,0,no,metropolitian,32
41950,0x4f8d,CHENRES08DEL03,30,4.9,13.022394,80.242439,13.052394,80.272439,11-03-2022,1900-01-01 23:50:56,1900-01-01 00:08:55,cloudy,low,1,drinks,scooter,0,no,metropolitian,16


In [90]:
df.describe()

Unnamed: 0,Delivery_person_Age,Delivery_person_Ratings,Restaurant_latitude,Restaurant_longitude,Delivery_location_latitude,Delivery_location_longitude,Time_Orderd,Time_Order_picked,Vehicle_condition,multiple_deliveries,Time_taken(min)
count,39908.0,39908.0,39908.0,39908.0,39908.0,39908.0,39908,39908,39908.0,39908.0,39908.0
mean,30.024757,4.634502,18.916897,76.921955,18.980579,76.985637,1900-01-01 17:55:14.630650368,1900-01-01 17:39:55.180114176,1.019044,0.754736,26.482284
min,21.0,2.5,9.957144,72.768726,9.967144,72.778726,1899-12-31 23:51:20.000000002,1900-01-01 00:00:02,0.0,0.0,10.0
25%,26.0,4.5,12.986047,73.897902,13.066279,73.940327,1900-01-01 15:21:15.500000,1900-01-01 14:37:21.750000128,0.0,0.0,19.0
50%,30.0,4.7,19.065838,76.618203,19.124049,76.662278,1900-01-01 19:16:32,1900-01-01 19:12:40.500000,1.0,1.0,26.0
75%,35.0,4.8,22.751234,78.368855,22.820048,78.405467,1900-01-01 21:37:21.500000,1900-01-01 21:37:37.249999872,2.0,1.0,33.0
max,39.0,5.0,30.914057,88.433452,31.054057,88.563452,1900-01-01 23:59:57,1900-01-01 23:59:57,3.0,3.0,54.0
std,5.357983,0.308651,5.466447,3.503813,5.468178,3.504007,,,0.83637,0.568273,9.386411


In [91]:
df['Order_Date'] = pd.to_datetime(df['Order_Date'], format='%d-%m-%Y', errors='coerce')

In [92]:
df['Time_Orderd'] = pd.to_datetime(df['Time_Orderd'], errors='coerce').dt.time
df['Time_Order_picked'] = pd.to_datetime(df['Time_Order_picked'], errors='coerce').dt.time

In [93]:
df.to_csv("cleaned_dataset.csv", index=False)