## Form the clusters

**Author**: Imad Saddik
<br/>
**Date**: 21/12/2024

---

**Table of contents**<a id='toc0_'></a>    
- [Load nodes](#toc1_1_)    
- [Pydantic models](#toc1_2_)    
- [Create clusters](#toc1_3_)    
- [Show data in the map](#toc1_4_)    
- [Store the clusters](#toc1_5_)    

---

In [None]:
import psycopg2

connection = psycopg2.connect(
    dbname='routes',
    user='postgres',
    password='postgres',
    host='localhost'
)
connection.autocommit = True
cursor = connection.cursor()

## <a id='toc1_1_'></a>[Load nodes](#toc0_)

In [2]:
from pydantic import BaseModel


class Node(BaseModel):
    id: int
    latitude: float
    longitude: float
    type_: str

In [3]:
response = cursor.execute("""
SELECT * FROM nodes
""")

data = cursor.fetchall()
nodes = [Node(id=row[0], latitude=row[1], longitude=row[2], type_=row[3])
         for row in data]

In [4]:
bus_nodes = [node for node in nodes if node.type_ == 'bus']
employee_nodes = [node for node in nodes if node.type_ == 'employee']

len(bus_nodes), len(employee_nodes)

(40, 500)

In [None]:
bus_ids = [node.id for node in bus_nodes]
employee_ids = [node.id for node in employee_nodes]

## <a id='toc1_2_'></a>[Pydantic models](#toc0_)

In [None]:
class Location(BaseModel):
    latitude: float
    longitude: float


class RouteSegment(BaseModel):
    source_node_id: int
    destination_node_id: int
    distance: float
    duration: float
    coordinates: list[Location]

## <a id='toc1_3_'></a>[Create clusters](#toc0_)

In [None]:
def assign_employees_to_buses_with_routes(bus_nodes, employee_nodes, max_capacity=18):
    # Bus index -> employee indices
    bus_ids = [node.id for node in bus_nodes]
    employee_ids = [node.id for node in employee_nodes]

    assignments = {i: [] for i in bus_ids}
    unassigned_employees = set(employee_ids)

    for bus_node in bus_nodes:
        if not unassigned_employees:
            break

        distances = {}
        bus_idx = bus_node.id
        for emp_idx in unassigned_employees:
            cursor.execute("""
            SELECT * FROM route_mapping 
            WHERE source_node_id = %s AND destination_node_id = %s
            """, (bus_idx, emp_idx))

            data = cursor.fetchall()
            if not data:
                distances[emp_idx] = float('inf')
                continue

            # If we have data, we should have only one row
            row = data[0]
            route_segment = RouteSegment(
                source_node_id=row[0],
                destination_node_id=row[1],
                distance=row[2],
                duration=row[3],
                coordinates=[
                    Location(
                        latitude=latitudeLongitude[0],
                        longitude=latitudeLongitude[1]
                    )
                    for latitudeLongitude in row[4]
                ]
            )
            distances[emp_idx] = route_segment.distance

        sorted_employee_indices = sorted(distances, key=distances.get)

        for emp_idx in sorted_employee_indices[:max_capacity]:
            if len(assignments[bus_idx]) < max_capacity:
                assignments[bus_idx].append(emp_idx)

        assigned = set(assignments[bus_idx])
        unassigned_employees -= assigned

    return assignments


bus_assignments = assign_employees_to_buses_with_routes(
    bus_nodes=bus_nodes,
    employee_nodes=employee_nodes
)

for bus, employees in bus_assignments.items():
    print(f"Bus {bus} assigned {len(employees)
                                } employees with indices {employees}")

Bus 1 assigned 18 employees with indices [167, 448, 455, 476, 283, 182, 267, 207, 409, 231, 376, 360, 155, 261, 72, 156, 45, 157]
Bus 2 assigned 18 employees with indices [292, 431, 380, 190, 341, 236, 396, 340, 492, 382, 466, 108, 74, 505, 511, 479, 55, 357]
Bus 3 assigned 18 employees with indices [214, 297, 458, 86, 384, 491, 437, 496, 383, 65, 159, 117, 98, 163, 485, 483, 333, 88]
Bus 4 assigned 18 employees with indices [47, 517, 187, 459, 345, 307, 56, 152, 275, 49, 125, 178, 359, 82, 346, 501, 138, 224]
Bus 5 assigned 18 employees with indices [223, 286, 113, 328, 457, 192, 447, 205, 96, 52, 116, 490, 534, 43, 303, 335, 58, 378]
Bus 6 assigned 18 employees with indices [493, 281, 295, 206, 472, 81, 309, 334, 132, 71, 516, 441, 239, 451, 221, 293, 84, 271]
Bus 7 assigned 18 employees with indices [60, 73, 425, 107, 480, 168, 215, 265, 244, 149, 410, 471, 253, 506, 401, 355, 395, 274]
Bus 8 assigned 18 employees with indices [413, 290, 287, 258, 229, 481, 100, 530, 369, 299, 164, 

## <a id='toc1_4_'></a>[Show data in the map](#toc0_)

In [None]:
import folium

colors = ['red', 'blue', 'green', 'black', 'orange',]

bus_index_to_visualize = bus_ids[0]
assigned_employees = bus_assignments[bus_index_to_visualize]

bus_node = bus_nodes[bus_index_to_visualize]
bus_map = folium.Map(
    location=[bus_node.latitude, bus_node.longitude], zoom_start=16)

folium.Marker(
    [bus_node.latitude, bus_node.longitude],
    popup="Bus Depot",
    icon=folium.Icon(color="red", icon="bus", prefix="fa")
).add_to(bus_map)

for i, emp_idx in enumerate(assigned_employees):
    cursor.execute("""
    SELECT * FROM nodes WHERE id = %s
    """, (emp_idx,))

    data = cursor.fetchall()
    if not data:
        print(f"Skipping for employee {emp_idx}")
        continue

    row = data[0]
    employee_node = Node(id=row[0], latitude=row[1],
                         longitude=row[2], type_=row[3])
    employee_node_id = employee_node.id
    bus_node_id = bus_node.id

    cursor.execute("""
        SELECT * FROM route_mapping 
        WHERE source_node_id = %s AND destination_node_id = %s
        """, (bus_node_id, employee_node_id))

    data = cursor.fetchall()
    if not data:
        continue

    row = data[0]
    route_segment = RouteSegment(
        source_node_id=row[0],
        destination_node_id=row[1],
        distance=row[2],
        duration=row[3],
        coordinates=[
            Location(
                latitude=latitudeLongitude[0],
                longitude=latitudeLongitude[1]
            )
            for latitudeLongitude in row[4]
        ]
    )
    route_coordinates = [[point.latitude, point.longitude]
                         for point in route_segment.coordinates]

    folium.PolyLine(
        route_coordinates,
        color=colors[i % len(colors)],
        weight=3,
        opacity=1
    ).add_to(bus_map)

    folium.Marker(
        [employee_node.latitude, employee_node.longitude],
        popup=f"Employee {emp_idx}",
        icon=folium.Icon(color="green", icon="user")
    ).add_to(bus_map)

bus_map

## <a id='toc1_5_'></a>[Store the clusters](#toc0_)

In [9]:
import pickle

with open('../data/clusters/bus_assignments.pkl', 'wb') as f:
    pickle.dump(bus_assignments, f)