## Importing Libraries

In [21]:
import pandas as pd
import numpy as np

## Importing Dataset

In [22]:
data = pd.read_csv("delivery_time.csv")
data.head()

Unnamed: 0,ID,Delivery_person_ID,Delivery_person_Age,Delivery_person_Ratings,Restaurant_latitude,Restaurant_longitude,Delivery_location_latitude,Delivery_location_longitude,Order_Date,Time_Orderd,Time_Order_picked,Weatherconditions,Road_traffic_density,Vehicle_condition,Type_of_order,Type_of_vehicle,multiple_deliveries,Festival,City,Time_taken(min)
0,0x4607,INDORES13DEL02,37,4.9,22.745049,75.892471,22.765049,75.912471,19-03-2022,11:30:00,11:45:00,conditions Sunny,High,2,Snack,motorcycle,0,No,Urban,(min) 24
1,0xb379,BANGRES18DEL02,34,4.5,12.913041,77.683237,13.043041,77.813237,25-03-2022,19:45:00,19:50:00,conditions Stormy,Jam,2,Snack,scooter,1,No,Metropolitian,(min) 33
2,0x5d6d,BANGRES19DEL01,23,4.4,12.914264,77.6784,12.924264,77.6884,19-03-2022,08:30:00,08:45:00,conditions Sandstorms,Low,0,Drinks,motorcycle,1,No,Urban,(min) 26
3,0x7a6a,COIMBRES13DEL02,38,4.7,11.003669,76.976494,11.053669,77.026494,05-04-2022,18:00:00,18:10:00,conditions Sunny,Medium,0,Buffet,motorcycle,1,No,Metropolitian,(min) 21
4,0x70a2,CHENRES12DEL01,32,4.6,12.972793,80.249982,13.012793,80.289982,26-03-2022,13:30:00,13:45:00,conditions Cloudy,High,1,Snack,scooter,1,No,Metropolitian,(min) 30


## Data Cleaning

In [23]:
data['Time_taken(min)'] = data['Time_taken(min)'].str.replace(r'\(min\)', '', regex=True).astype(np.int64)
data.head()

Unnamed: 0,ID,Delivery_person_ID,Delivery_person_Age,Delivery_person_Ratings,Restaurant_latitude,Restaurant_longitude,Delivery_location_latitude,Delivery_location_longitude,Order_Date,Time_Orderd,Time_Order_picked,Weatherconditions,Road_traffic_density,Vehicle_condition,Type_of_order,Type_of_vehicle,multiple_deliveries,Festival,City,Time_taken(min)
0,0x4607,INDORES13DEL02,37,4.9,22.745049,75.892471,22.765049,75.912471,19-03-2022,11:30:00,11:45:00,conditions Sunny,High,2,Snack,motorcycle,0,No,Urban,24
1,0xb379,BANGRES18DEL02,34,4.5,12.913041,77.683237,13.043041,77.813237,25-03-2022,19:45:00,19:50:00,conditions Stormy,Jam,2,Snack,scooter,1,No,Metropolitian,33
2,0x5d6d,BANGRES19DEL01,23,4.4,12.914264,77.6784,12.924264,77.6884,19-03-2022,08:30:00,08:45:00,conditions Sandstorms,Low,0,Drinks,motorcycle,1,No,Urban,26
3,0x7a6a,COIMBRES13DEL02,38,4.7,11.003669,76.976494,11.053669,77.026494,05-04-2022,18:00:00,18:10:00,conditions Sunny,Medium,0,Buffet,motorcycle,1,No,Metropolitian,21
4,0x70a2,CHENRES12DEL01,32,4.6,12.972793,80.249982,13.012793,80.289982,26-03-2022,13:30:00,13:45:00,conditions Cloudy,High,1,Snack,scooter,1,No,Metropolitian,30


In [24]:
data.Delivery_person_Age = pd.to_numeric(data.Delivery_person_Age, errors='coerce')
data.Delivery_person_Age.fillna(data.Delivery_person_Age.mean(),inplace=True)

In [25]:
data.Delivery_person_Ratings = pd.to_numeric(data.Delivery_person_Ratings, errors='coerce')
data.Delivery_person_Ratings.fillna(data.Delivery_person_Ratings.mean(),inplace=True)

In [26]:
data['Weatherconditions'] = data['Weatherconditions'].str.replace('conditions ', '')
data['Weatherconditions'].replace('NaN',data['Weatherconditions'].describe()['top'],inplace=True)

In [52]:
data['Road_traffic_density'] = data['Road_traffic_density'].str.strip()
data['Road_traffic_density'].replace('NaN',data['Road_traffic_density'].describe()['top'],inplace=True)

In [53]:
data['Type_of_order'] = data['Type_of_order'].str.strip()
data['Type_of_order'].unique()

array(['Snack', 'Drinks', 'Buffet', 'Meal'], dtype=object)

In [54]:
data['Type_of_vehicle'] = data['Type_of_vehicle'].str.strip()
data['Type_of_vehicle'].unique()

array(['motorcycle', 'scooter', 'electric_scooter', 'bicycle'],
      dtype=object)

In [55]:
data['multiple_deliveries'].replace('NaN','0',inplace=True)
data['multiple_deliveries'] = pd.to_numeric(data['multiple_deliveries'],errors='coerce')
data['multiple_deliveries'].fillna(0,inplace=True)
data['multiple_deliveries'].unique()

array([0., 1., 3., 2.])

In [56]:
data['Festival'].replace('NaN ',data['Festival'].describe()['top'],inplace=True)
data['Festival'] = data['Festival'].str.strip()
data['Festival'].unique()

array(['No', 'Yes'], dtype=object)

In [57]:
data['City'] = data['City'].str.strip()
data['City'].replace('NaN',data['City'].describe()['top'],inplace=True)
data['City'].unique()

array(['Urban', 'Metropolitian', 'Semi-Urban'], dtype=object)

## Feature Engineering

In [58]:
from math import radians,sin,cos,sqrt,atan2

In [59]:
def haversine(lat1,lon1,lat2,lon2):
    R = 6371.0
    
    lat1,lon1,lat2,lon2 = map(radians,[lat1,lon1,lat2,lon2])
    
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    
    a = sin(dlat/2)**2 + cos(lat1)*cos(lat2)*sin(dlon/2)**2
    c = 2 * atan2(sqrt(a),sqrt(1-a))
    
    dist = R*c
    return(dist)

In [60]:
data['Distance'] = np.nan

In [61]:
for i in range(len(data)):
    data.loc[i, 'Distance'] = haversine(data.loc[i, 'Restaurant_latitude'], 
                                        data.loc[i, 'Restaurant_longitude'], 
                                        data.loc[i, 'Delivery_location_latitude'], 
                                        data.loc[i, 'Delivery_location_longitude'])

In [62]:
data.describe()

Unnamed: 0,Delivery_person_Age,Delivery_person_Ratings,Restaurant_latitude,Restaurant_longitude,Delivery_location_latitude,Delivery_location_longitude,Vehicle_condition,multiple_deliveries,Time_taken(min),Distance
count,45593.0,45593.0,45593.0,45593.0,45593.0,45593.0,45593.0,45593.0,45593.0,45593.0
mean,29.567137,4.63378,17.017729,70.231332,17.465186,70.845702,1.023359,0.728445,26.294607,99.303911
std,5.695692,0.327638,8.185109,22.883647,7.335122,21.118812,0.839065,0.576543,9.383806,1099.731281
min,15.0,1.0,-30.905562,-88.366217,0.01,0.01,0.0,0.0,10.0,1.465067
25%,25.0,4.6,12.933284,73.17,12.988453,73.28,0.0,0.0,19.0,4.663493
50%,29.567137,4.7,18.546947,75.898497,18.633934,76.002574,1.0,1.0,26.0,9.264281
75%,34.0,4.8,22.728163,78.044095,22.785049,78.107044,2.0,1.0,32.0,13.763977
max,50.0,6.0,30.914057,88.433452,31.054057,88.563452,3.0,3.0,54.0,19692.674606


In [63]:
data.describe(include=object)

Unnamed: 0,ID,Delivery_person_ID,Order_Date,Time_Orderd,Time_Order_picked,Weatherconditions,Road_traffic_density,Type_of_order,Type_of_vehicle,Festival,City
count,45593,45593,45593,45593.0,45593,45593,45593,45593,45593,45593,45593
unique,45593,1320,44,177.0,193,6,4,4,4,2,3
top,0x4607,PUNERES01DEL01,15-03-2022,,21:30:00,Fog,Low,Snack,motorcycle,No,Metropolitian
freq,1,67,1192,1731.0,496,8270,16078,11533,26435,44697,35293


In [64]:
data.to_csv("Clean Data.csv",index=False)