## Last Mile Delivery Batching 

### Importing Required Librariers

In [68]:
import pandas as pd
from datetime import datetime
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import DBSCAN
import numpy as np

### Preprocessing Data
##### This step involves deleting unnecessary attributes and renaming them
##### Dataset Link: https://huggingface.co/datasets/Cainiao-AI/LaDe

In [89]:
def preprocessData(data: pd.DataFrame):
    
    riders = pd.unique(data['courier_id'])
    data['pickupLoc'] = list(zip(data['accept_gps_lat'], data['accept_gps_lng']))
    data['deliveryLoc'] = list(zip(data['delivery_gps_lat'], data['delivery_gps_lng']))

    # Removing unnecessary attributes
    data.drop(columns = ['region_id', 'city', 'courier_id', 'lng', 'lat', 
                            'aoi_id', 'aoi_type', 'accept_gps_time', 'accept_gps_lng', 
                            'accept_gps_lat', 'delivery_time', 'delivery_gps_lng',
                            'delivery_gps_lat', 'delivery_gps_time', 'ds'], inplace = True)
    
    # Renaming attributes
    data.rename(columns = {'order_id': 'orderId', 'accept_time': 'pickupTime'}, inplace = True)

    return data, riders

data = pd.read_csv('delivery_cq.csv')
data = list(data.groupby('region_id'))[1][1]
data, riders = preprocessData(data)
data

Unnamed: 0,orderId,pickupTime,pickupLoc,deliveryLoc
4730,956995,07-13 09:01:00,"(29.60407, 106.66987)","(29.62018, 106.64418)"
4731,1412922,07-10 13:19:00,"(29.60411, 106.66996)","(29.63526, 106.66632)"
4732,653883,10-15 08:44:00,"(29.60415, 106.66992)","(29.61859, 106.64815)"
4733,3144155,06-30 13:23:00,"(29.60401, 106.66988)","(29.62031, 106.64429)"
4734,177770,10-12 08:39:00,"(29.60414, 106.66989)","(29.62032, 106.64411)"
...,...,...,...,...
11316,1366479,05-30 14:01:00,"(29.60413, 106.66996)","(29.6196, 106.64947)"
11317,2957007,08-05 17:37:00,"(29.60406, 106.66981)","(29.56781, 106.67811)"
11318,3821510,07-31 12:20:00,"(29.6041, 106.66991)","(29.28387, 106.90821)"
11319,2298664,07-12 13:58:00,"(29.60419, 106.66984)","(29.65655, 106.66603)"


### Grouping Data by Date

In [102]:
def splitDataByDate(data: pd.DataFrame):
    n = len(data)
    dataByDate = {date.split()[0]: [] for date in pd.unique(data['pickupTime'])}
    for rowNo in range(n):
        order = data.iloc[rowNo]
        date, time = order['pickupTime'].split()
        dataByDate[date].append(order)
        
    dataByDate = {date: pd.DataFrame(dataByDate[date]) for date in dataByDate}
    for date in dataByDate:
        dataByDate[date]['pickupTime'] = dataByDate[date]['pickupTime'].apply(lambda x: x.split()[1])
        
    return dataByDate

dataByDate = splitDataByDate(data)
dataByDate['05-01']

Unnamed: 0,orderId,pickupTime,pickupLoc,deliveryLoc
5232,3456425,09:15:00,"(29.60415, 106.66984)","(29.61631, 106.64965)"
5266,4025144,13:20:00,"(29.60418, 106.66993)","(29.6163, 106.65174)"
5336,1672377,09:12:00,"(29.60417, 106.66997)","(29.62087, 106.6412)"
5757,4118235,08:53:00,"(29.60403, 106.66985)","(29.6112, 106.66472)"
5812,2843855,13:19:00,"(29.60407, 106.66997)","(29.61812, 106.67197)"
5911,4172738,13:19:00,"(29.60406, 106.66989)","(29.65983, 106.66487)"
6459,2218393,08:53:00,"(29.60409, 106.66997)","(29.62443, 106.61578)"
6496,1077151,13:20:00,"(29.60412, 106.66998)","(29.62612, 106.6193)"
6524,869731,08:53:00,"(29.60404, 106.66996)","(29.62288, 106.61559)"
6525,4196017,13:20:00,"(29.60407, 106.66996)","(29.62587, 106.62053)"


### K-Means Clustering

In [91]:
def kMeansClustering(data, numClusters):

    scaler = StandardScaler()
    scaledData = scaler.fit_transform(data)

    kmeans = KMeans(n_clusters = numClusters, random_state = 42)
    kmeans.fit(scaledData)
    clusters = kmeans.labels_

    return clusters

### Clustering by Timestamps

In [92]:
def preprocessTimestamps(timestamps):
    features = []
    for timestamp in timestamps:
        dt = datetime.strptime(timestamp, '%H:%M:%S')
        hour = dt.hour
        minute = dt.minute
        features.append([hour, minute])
    return features

timestamps = preprocessTimestamps(dataByDate['05-01']['pickupTime'])
pickupTimeClusters = kMeansClustering(timestamps, len(riders))

# Pickup Time Clusters
print('Pickup Time Clusters', pickupTimeClusters)

Pickup Time Clusters [3 7 3 1 7 7 1 7 1 7 5 4 2 5 0 2 4 5 5 0 6 4 6 1 4 7 4 7 5 3 5 6 6 6 7 7 3
 5 5 7 7 7 0 7 0 2 4 5 7 4 4 2 3]


### Clustering by Pickup Location Co-ordinates

In [93]:
pickupLocs = list(dataByDate['05-01']['pickupLoc'])
pickupLocClusters = kMeansClustering(pickupLocs, len(riders))

# Pickup Location Clusters
print('Pickup Location Clusters:', pickupLocClusters)

Pickup Location Clusters: [3 2 2 0 4 1 7 7 4 4 7 5 4 6 6 5 6 6 2 0 4 3 5 6 5 3 0 0 7 6 3 4 3 5 6 1 7
 0 1 1 0 7 4 2 7 5 6 3 0 6 1 1 2]


### Clustering by Delivery Location Co-ordinates

In [94]:
deliveryLocs = list(dataByDate['05-01']['deliveryLoc'])
deliveryLocClusters = kMeansClustering(deliveryLocs, len(riders))

# Delivery Location Clusters
print('Delivery Location Clusters:', deliveryLocClusters)

Delivery Location Clusters: [7 7 7 7 7 5 1 1 1 1 1 1 1 2 0 0 3 1 1 0 3 3 3 3 1 4 6 2 2 7 6 1 1 1 1 2 7
 1 1 1 2 3 0 2 0 0 3 2 3 1 1 2 7]


### Merging the clusters
##### If a group of orders belong to the same cluster in all three attribute classifications, they are merged

In [95]:
# Combining cluster labels
combinedLabels = np.vstack((pickupTimeClusters, pickupLocClusters, deliveryLocClusters)).T
clusters = {}

for i, row in enumerate(combinedLabels):
    key = tuple(row)
    if key in clusters:
        clusters[key].append(i)
    else:
        clusters[key] = [i]

# Group items that belong to the same cluster across different attributes
finalClusters = {}
for clusterLabel, data_points in clusters.items():

    # Convert the cluster label to a unique identifier for the final clusters
    uniqueClusterLabel = tuple(sorted(set(clusterLabel)))
    if uniqueClusterLabel in finalClusters:
        finalClusters[uniqueClusterLabel].extend(data_points)
    else:
        finalClusters[uniqueClusterLabel] = data_points

finalClusters

{(3, 7): [0, 36, 41],
 (2, 7): [1, 43],
 (2, 3, 7): [2, 52],
 (0, 1, 7): [3],
 (4, 7): [4],
 (1, 5, 7): [5, 10],
 (1, 7): [6, 7, 39],
 (1, 4): [8, 50],
 (1, 4, 7): [9],
 (1, 4, 5): [11, 24],
 (1, 2, 4): [12],
 (2, 5, 6): [13],
 (0, 6): [14],
 (0, 2, 5): [15, 45],
 (3, 4, 6): [16, 46, 20],
 (1, 5, 6): [17, 33],
 (1, 2, 5): [18],
 (0,): [19],
 (3, 4): [21],
 (3, 5, 6): [22, 30],
 (1, 3, 6): [23, 32],
 (3, 4, 7): [25],
 (0, 4, 6): [26],
 (0, 2, 7): [27, 40],
 (2, 5, 7): [28],
 (3, 6, 7): [29],
 (1, 4, 6): [31, 49],
 (1, 6, 7): [34],
 (1, 2, 7): [35],
 (0, 1, 5): [37],
 (1, 5): [38],
 (0, 4): [42],
 (0, 7): [44],
 (2, 3, 5): [47],
 (0, 3, 7): [48],
 (1, 2): [51]}

### Assigning order batches to riders

In [96]:
maxRiderCount = len(riders)
currentRiderNo = 0
riderOrderMap = {riderNo: [] for riderNo in range(maxRiderCount)}
while finalClusters != {}:
    riderOrderMap[currentRiderNo].append(finalClusters.popitem()[1])
    currentRiderNo = (currentRiderNo + 1) % maxRiderCount

for rider in riderOrderMap:
    print(f'Rider {rider}')
    for batchNo, batch in enumerate(riderOrderMap[rider]):
        print(f'  Batch {batchNo}: {batch}')
    print()

Rider 0
  Batch 0: [51]
  Batch 1: [34]
  Batch 2: [22, 30]
  Batch 3: [13]
  Batch 4: [3]

Rider 1
  Batch 0: [48]
  Batch 1: [31, 49]
  Batch 2: [21]
  Batch 3: [12]
  Batch 4: [2, 52]

Rider 2
  Batch 0: [47]
  Batch 1: [29]
  Batch 2: [19]
  Batch 3: [11, 24]
  Batch 4: [1, 43]

Rider 3
  Batch 0: [44]
  Batch 1: [28]
  Batch 2: [18]
  Batch 3: [9]
  Batch 4: [0, 36, 41]

Rider 4
  Batch 0: [42]
  Batch 1: [27, 40]
  Batch 2: [17, 33]
  Batch 3: [8, 50]

Rider 5
  Batch 0: [38]
  Batch 1: [26]
  Batch 2: [16, 46, 20]
  Batch 3: [6, 7, 39]

Rider 6
  Batch 0: [37]
  Batch 1: [25]
  Batch 2: [15, 45]
  Batch 3: [5, 10]

Rider 7
  Batch 0: [35]
  Batch 1: [23, 32]
  Batch 2: [14]
  Batch 3: [4]

