In [48]:
import pandas as pd
import numpy as np

# Load the dataset
file_path = r"C:\Users\kunal\Downloads\Food_Delivery_Time_Prediction.csv"
df = pd.read_csv(file_path)

# Inspect dataset
print(df.info())
print(df.head())

# Handle missing values
for col in df.select_dtypes(include=np.number).columns:
    df[col].fillna(df[col].median(), inplace=True)
for col in df.select_dtypes(include='object').columns:
    df[col].fillna(df[col].mode()[0], inplace=True)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 15 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   Order_ID                    200 non-null    object 
 1   Customer_Location           200 non-null    object 
 2   Restaurant_Location         200 non-null    object 
 3   Distance                    200 non-null    float64
 4   Weather_Conditions          200 non-null    object 
 5   Traffic_Conditions          200 non-null    object 
 6   Delivery_Person_Experience  200 non-null    int64  
 7   Order_Priority              200 non-null    object 
 8   Order_Time                  200 non-null    object 
 9   Vehicle_Type                200 non-null    object 
 10  Restaurant_Rating           200 non-null    float64
 11  Customer_Rating             200 non-null    float64
 12  Delivery_Time               200 non-null    float64
 13  Order_Cost                  200 non

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(df[col].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(df[col].mode()[0], inplace=True)


In [None]:
from sklearn.preprocessing import OneHotEncoder, LabelEncoder

# One-Hot Encoding for nominal categories
df = pd.get_dummies(df, columns=['weather', 'traffic', 'vehicle_type'])

# Label encode target variable
le = LabelEncoder()
df['delivery_status'] = le.fit_transform(df['delivery_status'])  # 0 = Fast, 1 = Delayed


In [None]:
def haversine(lat1, lon1, lat2, lon2):
    R = 6371  # Earth radius in km
    lat1_rad, lon1_rad, lat2_rad, lon2_rad = np.radians([lat1, lon1, lat2, lon2])
    dlat = lat2_rad - lat1_rad
    dlon = lon2_rad - lon1_rad
    a = np.sin(dlat/2) ** 2 + np.cos(lat1_rad) * np.cos(lat2_rad) * np.sin(dlon/2) ** 2
    c = 2 * np.arcsin(np.sqrt(a))
    return R * c

df['geo_distance'] = haversine(df['restaurant_lat'], df['restaurant_lon'], df['customer_lat'], df['customer_lon'])


In [None]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
df[['distance', 'delivery_time', 'geo_distance']] = scaler.fit_transform(df[['distance', 'delivery_time', 'geo_distance']])


In [None]:
# Example: Rush hour binary feature based on order_time if available
df['order_hour'] = pd.to_datetime(df['order_time']).dt.hour
df['rush_hour'] = df['order_hour'].apply(lambda x: 1 if 7 <= x <= 9 or 17 <= x <= 20 else 0)


In [None]:
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt

# Select features for clustering
features = ['geo_distance', 'distance', 'rush_hour'] + [col for col in df.columns if col.startswith('weather_') or col.startswith('traffic_') or col.startswith('vehicle_type_')]
X = df[features]

# Elbow method to find optimal k
inertia = []
for k in range(1, 11):
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(X)
    inertia.append(kmeans.inertia_)

plt.plot(range(1, 11), inertia, marker='o')
plt.title('Elbow Method for Optimal K')
plt.xlabel('Number of clusters')
plt.ylabel('Inertia')
plt.show()

# Fit KMeans with optimal clusters, e.g., k=3
kmeans = KMeans(n_clusters=3, random_state=42)
df['cluster_kmeans'] = kmeans.fit_predict(X)


In [None]:
from scipy.cluster.hierarchy import dendrogram, linkage
import matplotlib.pyplot as plt

linked = linkage(X.sample(500, random_state=42), method='ward')  # sample if dataset is large

plt.figure(figsize=(10, 7))
dendrogram(linked, orientation='top', distance_sort='descending', show_leaf_counts=True)
plt.title('Hierarchical Clustering Dendrogram')
plt.show()

from sklearn.cluster import AgglomerativeClustering
agg = AgglomerativeClustering(n_clusters=3)
df['cluster_hierarchical'] = agg.fit_predict(X)


In [None]:
from sklearn.model_selection import train_test_split

# Features and target
y = df['delivery_status']
X_nn = df[features]

X_train, X_test, y_train, y_test = train_test_split(X_nn, y, test_size=0.2, random_state=42)


In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

model = Sequential([
    Dense(32, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
history = model.fit(X_train, y_train, epochs=30, batch_size=32, validation_split=0.2)


In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix

y_pred_prob = model.predict(X_test).flatten()
y_pred = (y_pred_prob > 0.5).astype(int)

print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))

# Plot training & validation accuracy values
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()
