# Traffic Link Clustering

This notebook clusters urban road links into groups based on geometric and congestion-related features using k-means clustering.


In [None]:
import pathlib
import sys

import numpy as np
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

PROJECT_ROOT = pathlib.Path('..').resolve()
if str(PROJECT_ROOT) not in sys.path:
    sys.path.append(str(PROJECT_ROOT))

from transport_system.utils.math_models import bpr_travel_time


In [None]:
links = pd.read_csv(PROJECT_ROOT / 'data' / 'traffic_links.csv')

volume_factor = 0.9
links['peak_volume_vph'] = links['capacity_vph'] * volume_factor

links['travel_time_min'] = links.apply(
    lambda row: bpr_travel_time(
        free_flow_time=row['free_flow_time_min'],
        volume=row['peak_volume_vph'],
        capacity=row['capacity_vph'],
    ),
    axis=1,
)

links['speed_kmph'] = links['length_km'] / (links['travel_time_min'] / 60.0)
links['congestion_ratio'] = links['travel_time_min'] / links['free_flow_time_min']

links


In [None]:
features = links[['length_km', 'capacity_vph', 'speed_kmph', 'congestion_ratio']].copy()

scaler = StandardScaler()
X_scaled = scaler.fit_transform(features)

kmeans = KMeans(n_clusters=3, random_state=42, n_init=10)
links['cluster'] = kmeans.fit_predict(X_scaled)

links[['link_id', 'cluster', 'length_km', 'capacity_vph', 'speed_kmph', 'congestion_ratio']]


In [None]:
plt.figure(figsize=(6, 4))
for cluster_id in sorted(links['cluster'].unique()):
    subset = links[links['cluster'] == cluster_id]
    plt.scatter(
        subset['congestion_ratio'],
        subset['speed_kmph'],
        label=f'Cluster {cluster_id}',
    )

plt.xlabel('Congestion ratio (TT / TT_free)')
plt.ylabel('Speed (km/h)')
plt.title('Traffic Link Clusters')
plt.grid(True)
plt.legend()
plt.tight_layout()
