In [None]:
import pandas as pd
import numpy as np
import plotly.express as px

In [None]:
data=pd.read_csv("DeliveryTime.csv")
print(data.head())

In [None]:
# column insights
data.info()

In [None]:
# any null values?
data.isnull().sum()

## Calculate distance between 2 locations

In [None]:
# use haversine formula to calculate distance btn. 2 locations

# earth's radius
R=6371

# function to convert degrees to radians
def deg_to_rad(degrees):
    return degrees*(np.pi/180)

# function to calculate distance btn. 2 points
def calculate(lat1,lon1,lat2,lon2):
    d_lat=deg_to_rad(lat2-lat1)
    d_lon=deg_to_rad(lon2-lon1)
    a=np.sin(d_lat/2)**2 + np.cos(deg_to_rad(lat1)) * np.cos(deg_to_rad(lat2)) * np.sin(d_lon/2)**2
    c=2*np.arctan2(np.sqrt(a),np.sqrt(1-a))
    return R*c

# distance btn. each pair of points
data['distance'] = np.nan
for i in range(len(data)):
    data.loc[i,'distance'] = calculate(data.loc[i,'Restaurant_latitude'],
                                      data.loc[i,'Restaurant_longitude'],
                                      data.loc[i,'Delivery_location_latitude'],
                                      data.loc[i,'Delivery_location_longitude'])

In [None]:
# let us look at 'data'
print(data.head())

## Data exploration

In [None]:
figure=px.scatter(data_frame=data,x='distance',y='Time_taken(min)',
                 size='Time_taken(min)', trendline='ols',
                 title='Relationship between distance and time taken')
figure.show()

There is consistent relationship between the time taken and distance travelled to deliver the food. It means that most delivery partners deliver food within 25-30 minutes, regardless of distance

In [None]:
# relationship btn. time taken to deliver food & age of delivery partner
figure=px.scatter(data_frame=data,x='Delivery_person_Age',y='Time_taken(min)',
                 size='Time_taken(min)',color='distance',trendline='ols',
                 title='Relationship between time taken to delivery food & Age of delivery partner')
figure.show()

There is a linear relationship between the time taken to delivery the food & age of delivery partner. It means young delivery partners take less time to deliver food compared to the elder partners

In [None]:
# relationship btn. time taken to delivery the food & ratings of delivery partner
figure=px.scatter(data_frame=data,x='Delivery_person_Ratings',y='Time_taken(min)',
                 size='Time_taken(min)',color='distance',trendline='ols',
                 title='Relationship between time taken to delivery food & Ratings of delivery partner')
figure.show()

There is an inverse linear relationship between the time taken to deliver the food and the ratings of the delivery partner. It means delivery partners with higher ratings take less time to deliver the food compared to partners with low ratings.

In [None]:
# type of food ordered by customer & type of vehicle used by delivery partner 
# affects delivery time or not
fig = px.box(data,x="Type_of_vehicle",y="Time_taken(min)", 
             color="Type_of_order")
fig.show()

There is not much difference btn. time taken by delivery partners depending on the vehicle they are driving and the type of food they are delivering.

## Food Delivery time prediction model

Machine learning model - LSTM neural network model for food delivery time prediction

In [None]:
# splitting data
from sklearn.model_selection import train_test_split
x=np.array(data[['Delivery_person_Age','Delivery_person_Ratings','distance']])
y=np.array(data[['Time_taken(min)']])
xtrain,xtest,ytrain,ytest=train_test_split(x,y,test_size=0.10,random_state=42)


In [None]:
# creating the LSTM neural network model
from keras.models import Sequential
from keras.layers import Dense, LSTM
model = Sequential()
model.add(LSTM(128, return_sequences=True, input_shape= (xtrain.shape[1], 1)))
model.add(LSTM(64, return_sequences=False))
model.add(Dense(25))
model.add(Dense(1))
model.summary()

In [None]:
# training the model
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(xtrain, ytrain, batch_size=1, epochs=9)

In [None]:
# let us test the performance of our model by giving inputs 
# to predict the food delivery time
print('Food Delivery Time Prediction')
a=int(input('Age of Delivery Partner:'))
b=float(input('Ratings of Previous Deliveries:'))
c=int(input('Total Distance:'))

features=np.array([[a,b,c]])
print('Predicted Delivery time in minutes is:', model.predict(features))