In [4]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from geopy.distance import geodesic

data = pd.read_csv("/train.csv")
data.head()

Unnamed: 0,ID,Delivery_person_ID,Delivery_person_Age,Delivery_person_Ratings,Restaurant_latitude,Restaurant_longitude,Delivery_location_latitude,Delivery_location_longitude,Order_Date,Time_Orderd,Time_Order_picked,Weatherconditions,Road_traffic_density,Vehicle_condition,Type_of_order,Type_of_vehicle,multiple_deliveries,Festival,City,Time_taken(min)
0,0x4607,INDORES13DEL02,37,4.9,22.745049,75.892471,22.765049,75.912471,19-03-2022,11:30:00,11:45:00,conditions Sunny,High,2,Snack,motorcycle,0,No,Urban,(min) 24
1,0xb379,BANGRES18DEL02,34,4.5,12.913041,77.683237,13.043041,77.813237,25-03-2022,19:45:00,19:50:00,conditions Stormy,Jam,2,Snack,scooter,1,No,Metropolitian,(min) 33
2,0x5d6d,BANGRES19DEL01,23,4.4,12.914264,77.6784,12.924264,77.6884,19-03-2022,08:30:00,08:45:00,conditions Sandstorms,Low,0,Drinks,motorcycle,1,No,Urban,(min) 26
3,0x7a6a,COIMBRES13DEL02,38,4.7,11.003669,76.976494,11.053669,77.026494,05-04-2022,18:00:00,18:10:00,conditions Sunny,Medium,0,Buffet,motorcycle,1,No,Metropolitian,(min) 21
4,0x70a2,CHENRES12DEL01,32,4.6,12.972793,80.249982,13.012793,80.289982,26-03-2022,13:30:00,13:45:00,conditions Cloudy,High,1,Snack,scooter,1,No,Metropolitian,(min) 30


In [5]:
def calculate_distance(row):
    return geodesic(
        (row['Restaurant_latitude'], row['Restaurant_longitude']),
        (row['Delivery_location_latitude'], row['Delivery_location_longitude'])
    ).km

data['Distance_km'] = data.apply(calculate_distance, axis=1)

In [6]:
import plotly.graph_objects as go

fig = go.Figure()

fig.add_trace(go.Scattergeo(
    lon=data['Delivery_location_longitude'],
    lat=data['Delivery_location_latitude'],
    mode='markers',
    marker=dict(color='blue', size=6, opacity=0.7),
    name='Delivery Locations',
    hovertemplate='Lat: %{lat:.4f}<br>Lon: %{lon:.4f}<extra>Delivery</extra>'
))

fig.update_layout(
    title='📦 Mapping Our Reach — Delivery Locations Across India',
    geo=dict(
        scope='asia',
        showland=True,
        landcolor='rgb(229, 229, 229)',
        showcountries=True,
        countrycolor='rgb(200, 200, 200)',
        showlakes=False,
        lonaxis=dict(range=[68, 98]),  # focus on India
        lataxis=dict(range=[6, 38])
    ),
    margin=dict(l=0, r=0, t=60, b=0),
    showlegend=False
)

fig.show()

In [7]:
from sklearn.cluster import KMeans

X = data[['Delivery_location_latitude', 'Delivery_location_longitude']]
k = 3
kmeans = KMeans(n_clusters=k, random_state=42)
data['Cluster'] = kmeans.fit_predict(X)
centroids = kmeans.cluster_centers_

fig = go.Figure()

for cluster_label in sorted(data['Cluster'].unique()):
    cluster_data = data[data['Cluster'] == cluster_label]
    fig.add_trace(go.Scattergeo(
        lon=cluster_data['Delivery_location_longitude'],
        lat=cluster_data['Delivery_location_latitude'],
        mode='markers',
        name=f'Cluster {cluster_label}',
        marker=dict(size=6, opacity=0.7),
        hovertemplate='<b>Cluster:</b> %{text}<br>Lat: %{lat:.4f}<br>Lon: %{lon:.4f}<extra></extra>',
        text=[f"{cluster_label}"] * len(cluster_data)
    ))

fig.add_trace(go.Scattergeo(
    lon=centroids[:, 1],
    lat=centroids[:, 0],
    mode='markers',
    name='Centroids',
    marker=dict(size=15, symbol='x', color='red', line=dict(width=2, color='black')),
    hovertemplate='<b>Centroid</b><br>Lat: %{lat:.4f}<br>Lon: %{lon:.4f}<extra></extra>'
))

fig.update_layout(
    title=f'📍 Geo-Spatial Clustering of Delivery Locations (k = {k})',
    geo=dict(
        scope='asia',
        showland=True,
        landcolor="rgb(229, 229, 229)",
        showcountries=True,
        countrycolor="rgb(204, 204, 204)",
        lonaxis=dict(range=[68, 98]),
        lataxis=dict(range=[6, 38]),
    ),
    legend_title='Clusters',
    margin=dict(l=0, r=0, t=60, b=0)
)

fig.show()

In [8]:
filtered_data = data[data['Cluster'] != 1]
filtered_centroids = centroids[[0, 2]]  # Keep only Cluster 0 and 2

# Step 3: Map cluster names
cluster_labels = {
    0: "Central Delivery Zone",
    2: "Southern Delivery Zone"
}
filtered_data['Optimized_Zone'] = filtered_data['Cluster'].map(cluster_labels)