## Notes


#### Parameters to be defined by a client when uploading orders for route planning
- n_clusters - number of vehicles available to be assigned to each route/driver.
- max_weight_stops - max. delivery stops or max. total weight (measured in kg/tons/volume etc) per vehicle.


#### KMeansConstrained (Algorithm/Model) Parameters
- n_clusters - number of clusters/vehicles available to be assigned to each route/driver, as well as the number of central points to generate for each cluster.
-  init = 'k-means++' - This is a technique that speeds up convergence of the algorithm (i.e quickly assigning orders to correct clusters). 
- max_iter - Maximum number of iterations of the algorithm for a single run, basically how many times the algorithm runs to cluster all orders before it provides the best final clustered data. eg max_iter = 300, for 1000 orders means 1 iteration will cluster all 1000 orders, same as the 2nd iteration until 300 iterations. The final iteration will provide the best possible clusters for these orders.
- size_max - maximum number of orders/stops/weight allowed per cluster
- n_jobs = -1 - for parallel computing, also helps speed up convergence of the algorithm

#### Expected Results

- After all orders/addresses have been successfully assigned to their clusters, a new field 'cluster' gets created specifying which cluster an address was allocated to and these can be viewed on the resulting clustered raw data and the map with different colors marking each cluster/route.
- The created cluster field ranges from 0 to n_clusters-1. Meaning if there were 50 clusters defined, it will range from 0 to 49. This can be used as an indicator of a unique route (say route_id or cluster_id, e.g route_id/cluster_id 2 having 10 orders allocated to it).

In [63]:
# >>>>>>>>>>>>>>>>>>>>>> Required libraries >>>>>>>>>>>>>>>>>>>>>>>>

# pip install k-means-constrained   - (actual machine learning algorithm used)
# pip install pandas               - (processing data: such as reading the csv file)
# pip install numpy
# pip install folium              - (for map visual)
# pip install scikit-learn        (machine learning library)

In [11]:
import pandas as pd 
import numpy as np
import folium
from k_means_constrained import KMeansConstrained
from sklearn.preprocessing import StandardScaler
import time
import warnings
warnings.filterwarnings('ignore')

def cluster_data(file, n_clusters, max_weight_stops):
    start_time = time.time()
    # Read the CSV file into a Pandas dataframe
    data = pd.read_csv(file)
    
    # check if weight constraint column exists in the csv file 
    # for clients using weight as a constraint then this field needs to be present in the csv file
    if 'Weight' in data.columns:
        data = data.loc[data.index.repeat(data['Weight'])].reset_index(drop=True)
        data['Weight'] = 1
    else:
        data['Weight'] = 1
    
    # Error handling message if data cannot be clustered based on provided clusters and constraints
    if len(data) > n_clusters * max_weight_stops:
            raise ValueError(f"""
                Clustering of the data is impossible with the defined combination of number of clusters ({n_clusters}) 
                and max stops/weight constraints ({max_weight_stops}).
                Either increase the number of clusters or the max stops/weight constraints.
                The total weight or number of orders in your data ({len(data)}) should be less than or equal to the 
                multiplication of number of clusters and max stops/weight ({n_clusters*max_weight_stops}).
                """)

    print(len(data))
    
    # Scale the data before clustering
    scaler = StandardScaler()
    data[['longitude', 'latitude']] = scaler.fit_transform(data[['longitude', 'latitude']])
    
    # >>>>>>>>>>>>>>>> BEGINNING OF THE ACTUAL MACHINE LEARNING CLUSTERING ALGORITHM >>>>>>>>>>>>>>
    # Apply the KMeansConstrained algorithm to the data 
    km_cons = KMeansConstrained(n_clusters = n_clusters,
                                init = 'k-means++',
                                size_max = max_weight_stops,
                                random_state = 42,
                                max_iter = 1,
                                n_jobs=-1)
    y_predicted = km_cons.fit_predict(data[['longitude', 'latitude']])
    
    # >>>>>>>>>>>>>>>>>>>>>>>> END OF ACTUAL CLUSTERING ALGORITHM >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
    # Add the cluster labels to the data - these are the clusters that orders are assigned to
    data['cluster'] = y_predicted
    
    # Transform the data back to its original form
    data[['longitude', 'latitude']] = scaler.inverse_transform(data[['longitude', 'latitude']])

    # Group data back to its original state
    data = data.groupby([col for col in data.columns if col != 'Weight']).agg({'Weight': 'sum'}).reset_index()

    # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> CREATE A MAP VISUAL >>>>>>>>>>>>>>>>>>>>>>>>>>>
    # Create a list of colors for the clusters
    colors = ['red', 'blue', 'green', 'purple', 'orange', 'darkred', 'lightred', 'beige', 
              'darkblue', 'darkgreen', 'cadetblue', 'darkpurple', 'pink', 'lightblue', 
              'lightgreen', 'gray', 'black', 'lightgray']
    colors *= n_clusters // len(colors) + 1
    
    # Create a folium map object centered on the central point of delivery locations
    map_obj = folium.Map(location=[data['latitude'].mean(), data['longitude'].mean()], zoom_start=12)
    
    # Add cluster markers and lines to the map
    for i in range(1, n_clusters):
        cluster_data = data[data['cluster'] == i]
        color = colors[i-1]
        cluster_points = []
        for _, row in cluster_data.iterrows():
            # Add marker for each point
            folium.Marker(location=[row['latitude'], row['longitude']],
                          icon=folium.Icon(color=color),
                          popup=f"Cluster {i}").add_to(map_obj)
            # Add point to list of cluster points
            cluster_points.append([row['latitude'], row['longitude']])
        # Add polyline connecting the cluster points
        folium.PolyLine(locations=cluster_points,
                        color=color).add_to(map_obj)
        
    end_time = time.time()
    time_taken = end_time - start_time
    print(f"Time taken to run the algorithm: {time_taken:.2f} seconds")
    # Return the original data with the cluster labels and the folium map visual
    return data, map_obj, km_cons

In [26]:
# call the function and display clustered raw data with the map visual 
data, map_obj, km_cons = cluster_data('C:/Users/leemn/Downloads/test_data_no_weight2.csv', 50, 28)
print(data) 
map_obj

1379
Time taken to run the algorithm: 4.17 seconds
      waybillid  driverid   Delivery Timestamp   latitude  longitude  \
0      94389655     51071  2/27/2023  15:09:55 -34.041221  18.617111   
1      94740920    275861  2/27/2023  18:13:29 -33.949555  19.823844   
2      94750406     96706  2/27/2023  18:20:59 -34.023055  18.631935   
3      94753415    275873  2/27/2023   9:06:09 -33.845447  18.543877   
4      95907038    276875  2/27/2023  12:35:57 -33.824684  18.533127   
...         ...       ...        ...       ...        ...        ...   
1374   96887956    275861  2/27/2023  16:51:49 -33.480085  19.627500   
1375   96888102    275861  2/27/2023  16:51:02 -33.480113  19.627487   
1376   96890113     51083  2/27/2023  17:43:17 -34.077916  18.877596   
1377   96890130    270926  2/27/2023   8:26:16 -34.145388  18.996853   
1378   96892100    275873  2/27/2023   8:30:00 -33.840365  18.552172   

     Origin_Hub  Origin_Lat  Origin_Long  cluster  Weight  
0           CTD  -33.862

In [31]:
data = data[data['waybillid']!=96836779]
data = data[data['waybillid']!=96804265]

In [13]:
data = data[data['order_no']!='A1ECO01']

In [32]:
# Define the new address with its latitude and longitude values
new_address1 = {'waybillid': 96836779, 'driverid':277324, 'Delivery':'2023-02-27','Timestamp':'17:12:42.912',
             'latitude':-34.1401067, 'longitude': 18.3242567,'Origin_Hub':'CTD',
             'Origin_Lat':-33.862592,'Origin_Long':18.521128, 'Weight':1}

In [None]:
# Define the new address with its latitude and longitude values
new_address1 = {'order_no': 'A1ECO01', 'latitude':-26.073674, 'longitude': 28.187738, 'Weight':15}

In [33]:
n_clusters = 50
max_weight_stops = 28

In [77]:
# CPT DC
src_lat = -33.8625925
src_long = 18.5211281

src_long2 = 28.220773
src_lat2 = -26.239191

# Create a dictionary to store the coordinates of each cluster
import folium
from collections import defaultdict

cluster_coords = defaultdict(list)

colormap = {
    0:'green',
    1: 'blue',      2: 'green',     3: 'orange',
    4: 'red',       5: 'purple',    6: 'darkgreen',
    7: 'pink',      8: 'lightgreen',9: 'darkblue',
    10: 'blue',    11: 'gray',     12: 'cadetblue',
    13: 'lightblue',14: 'red',     15: 'lightgray',
    16: 'cadetblue', 17: 'darkred', 18: 'lightgray',
    
    19: 'blue',      20: 'green',     21: 'orange',
    22: 'orange',       23: 'purple',    24: 'darkgreen',
    25: 'pink',      26: 'lightgreen',27: 'darkblue',
    28: 'purple',     29: 'gray',      30: 'purple',
    31: 'lightblue',32: 'green',      33: 'lightgray',
    34: 'cadetblue',35: 'darkred',    36: 'lightgray',
    
    37: 'black',     38: 'gray',      39: 'beige',
    40: 'lightblue', 41: 'darkpurple',42: 'black',
    43: 'cadetblue',44: 'darkred',    45: 'lightgray',
    
    46: 'darkpurple',47: 'black', 48: 'cadetblue',49: 'darkred',  
    50: 'lightgray',
    
      
    51: 'black',     52: 'gray',      53: 'beige',
    54: 'lightblue', 55: 'darkpurple',56: 'black',
    57: 'cadetblue',58: 'darkred',    59: 'lightgray',
    
    60: 'darkpurple',
    'default': 'gray'
}


# Define the list of desired cluster numbers
cluster_numbers = [23,49,8,47,16]

# Iterate through the data and group the coordinates by cluster
for index, row in data[data['cluster'].isin(cluster_numbers)].iterrows():
    # Convert the cluster number to an integer
    cluster = int(row['cluster'])
    # Append the coordinates to the corresponding cluster list in the dictionary
    cluster_coords[cluster].append([row['latitude'], row['longitude']])

map1 = folium.Map(location=[src_lat, src_long], zoom_start=12)

# Iterate through the clusters and add lines connecting the points
for cluster, coords in cluster_coords.items():
    if cluster in cluster_numbers:
        folium.PolyLine(coords, color=colormap[cluster], weight=2.5, opacity=1).add_to(map1)

# Iterate through the data and add markers to the map
for index, row in data[data['cluster'].isin(cluster_numbers)].iterrows():
    cluster = int(row['cluster'])
    if cluster in cluster_numbers:
        color = colormap.get(cluster, colormap['default'])
        icon = folium.Icon(color=color)
        icon.color = color
        folium.Marker(location=[row['latitude'], row['longitude']], icon=icon).add_to(map1)

map1


## Add new stops

In [34]:
import math

def assign_new_address_to_cluster(new_address, data,  km_cons, max_weight_stops):
    # Calculate the distances between the new address and the centroids of each cluster
    def haversine(lat1, lon1, lat2, lon2):
        R = 6371  # Radius of the earth in km
        dLat = math.radians(lat2 - lat1)
        dLon = math.radians(lon2 - lon1)
        a = math.sin(dLat / 2) * math.sin(dLat / 2) + math.cos(math.radians(lat1)) \
            * math.cos(math.radians(lat2)) * math.sin(dLon / 2) * math.sin(dLon / 2)
        c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
        d = R * c  # Distance in km
        return d

    # Sort the clusters by available space
    clusters = []
    for i in range(len(data['cluster'].unique())):
        if 'Weight' in data.columns:
            cluster_weight = sum(data.loc[data['cluster'] == i, 'Weight'])
            if cluster_weight < max_weight_stops:
                clusters.append((i, cluster_weight))
        else:
            cluster_size = sum(data['cluster'] == i)
            if cluster_size < max_weight_stops:
                clusters.append((i, cluster_size))
    clusters = sorted(clusters, key=lambda x: x[1], reverse=True)

    # Calculate the distances between the new address and the centroids of each cluster with available space
    distances = []
    for i in range(len(data['cluster'].unique())):
        centroid = km_cons.cluster_centers_[i]
        dist = haversine(new_address['latitude'], new_address['longitude'], centroid[1], centroid[0])
        distances.append((i, dist))

    # Sort the distances in ascending order
    distances = sorted(distances, key=lambda x: x[1])

    # Loop through the sorted distances and check if the corresponding cluster has room to take more orders
    allocated = False
    for i in range(len(distances)):
        cluster = distances[i][0]
        if 'Weight' in data.columns:
            cluster_weight = sum(data.loc[data['cluster'] == cluster, 'Weight'])
            if cluster_weight < max_weight_stops:
                # Add the new address to the cluster and update the cluster assignment in the data dataframe
                data = data.append(new_address, ignore_index=True)
                data.at[len(data) - 1, 'cluster'] = cluster
                allocated = True
                break
        else:
            cluster_size = sum(data['cluster'] == cluster)
            if cluster_size < max_weight_stops:
                # Add thenew address to the cluster and update the cluster assignment in the data dataframe
                data = data.append(new_address, ignore_index=True)
                data.at[len(data)-1, 'cluster'] = cluster
                print("New order address assigned to cluster:", cluster)
            else:
                # If the cluster is already at maximum capacity, print a message indicating so
                print("The closest cluster is at maximum capacity, new order cannot be assigned to this cluster.")
                
    # If no cluster with room for more orders is found, print a message indicating so
    if not allocated:
        print("No cluster with available capacity found.")
    else:
        # If an available cluster is found, print the cluster number
        print("New order address assigned to cluster:", cluster)
        # Return the updated data dataframe
    return data

In [35]:
assign_new_address_to_cluster(new_address1, data, km_cons, max_weight_stops)

New order address assigned to cluster: 49


Unnamed: 0,waybillid,driverid,Delivery,Timestamp,latitude,longitude,Origin_Hub,Origin_Lat,Origin_Long,cluster,Weight
0,94389655,51071,2/27/2023,15:09:55,-34.041221,18.617111,CTD,-33.862592,18.521128,1.0,1
1,94740920,275861,2/27/2023,18:13:29,-33.949555,19.823844,CTD,-33.862592,18.521128,9.0,1
2,94750406,96706,2/27/2023,18:20:59,-34.023055,18.631935,CTD,-33.862592,18.521128,37.0,1
3,94753415,275873,2/27/2023,9:06:09,-33.845447,18.543877,CTD,-33.862592,18.521128,3.0,1
4,95907038,276875,2/27/2023,12:35:57,-33.824684,18.533127,CTD,-33.862592,18.521128,3.0,1
...,...,...,...,...,...,...,...,...,...,...,...
1373,96888102,275861,2/27/2023,16:51:02,-33.480113,19.627487,CTD,-33.862592,18.521128,12.0,1
1374,96890113,51083,2/27/2023,17:43:17,-34.077916,18.877596,CTD,-33.862592,18.521128,2.0,1
1375,96890130,270926,2/27/2023,8:26:16,-34.145388,18.996853,CTD,-33.862592,18.521128,19.0,1
1376,96892100,275873,2/27/2023,8:30:00,-33.840365,18.552172,CTD,-33.862592,18.521128,3.0,1


In [18]:
data.groupby('cluster')['Weight'].sum()

cluster
0    142
1    187
2    187
3    130
4    187
5    187
6    187
Name: Weight, dtype: int64

In [25]:
src_lat = -26.0612827405177475
src_long = 28.02797773344386

src_long2 = 28.220773
src_lat2 = -26.239191

# Create a dictionary to store the coordinates of each cluster
import folium
from collections import defaultdict

cluster_coords = defaultdict(list)

colormap = {
    0:'green',
    1: 'blue',      2: 'green',     3: 'orange',
    4: 'red',       5: 'purple',    6: 'darkgreen',
    7: 'pink',      8: 'lightgreen',9: 'darkblue',
    10: 'blue',    11: 'gray',     12: 'cadetblue',
    13: 'lightblue',14: 'red',     15: 'lightgray',
    16: 'cadetblue', 17: 'darkred', 18: 'lightgray',
    
    19: 'blue',      20: 'green',     21: 'orange',
    22: 'orange',       23: 'purple',    24: 'darkgreen',
    25: 'pink',      26: 'lightgreen',27: 'darkblue',
    28: 'purple',     29: 'gray',      30: 'purple',
    31: 'lightblue',32: 'green',      33: 'lightgray',
    34: 'cadetblue',35: 'darkred',    36: 'lightgray',
    
    37: 'black',     38: 'gray',      39: 'beige',
    40: 'lightblue', 41: 'darkpurple',42: 'black',
    43: 'cadetblue',44: 'darkred',    45: 'lightgray',
    
    46: 'darkpurple',47: 'black', 48: 'cadetblue',49: 'darkred',  
    50: 'lightgray',
    
      
    51: 'black',     52: 'gray',      53: 'beige',
    54: 'lightblue', 55: 'darkpurple',56: 'black',
    57: 'cadetblue',58: 'darkred',    59: 'lightgray',
    
    60: 'darkpurple',
    'default': 'gray'
}


# Define the list of desired cluster numbers
cluster_numbers = [0,3]

# Iterate through the data and group the coordinates by cluster
for index, row in data[data['cluster'].isin(cluster_numbers)].iterrows():
    # Convert the cluster number to an integer
    cluster = int(row['cluster'])
    # Append the coordinates to the corresponding cluster list in the dictionary
    cluster_coords[cluster].append([row['latitude'], row['longitude']])

map1 = folium.Map(location=[src_lat, src_long], zoom_start=12)

# Iterate through the clusters and add lines connecting the points
for cluster, coords in cluster_coords.items():
    if cluster in cluster_numbers:
        folium.PolyLine(coords, color=colormap[cluster], weight=2.5, opacity=1).add_to(map1)

# Iterate through the data and add markers to the map
for index, row in data[data['cluster'].isin(cluster_numbers)].iterrows():
    cluster = int(row['cluster'])
    if cluster in cluster_numbers:
        color = colormap.get(cluster, colormap['default'])
        icon = folium.Icon(color=color)
        icon.color = color
        folium.Marker(location=[row['latitude'], row['longitude']], icon=icon).add_to(map1)

map1
