In [17]:
import pandas as pd
import geopandas as gpd
import numpy as np
from datetime import datetime
import numpy
import shapely
import osmnx as ox
from keplergl import KeplerGl
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler
import hdbscan

In [18]:
def series_to_array(s):
    s_list = np.array([list(p.coords)[0] for p in s])
    s_scaled = StandardScaler().fit_transform(s_list)
    return s_scaled

In [19]:
def fit_hdbscan(X, min_cluster_size=5):
    clusterer = hdbscan.HDBSCAN(
        min_cluster_size=min_cluster_size, gen_min_span_tree=True
    )
    clusterer.fit(X)
    labels = clusterer.labels_

    # Number of clusters in labels, ignoring noise if present.
    n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
    n_noise_ = list(labels).count(-1)

    print("Estimated number of clusters: %d" % n_clusters_)
    print("Estimated number of noise points: %d" % n_noise_)
    # print(f"Silhouette Coefficient: {metrics.silhouette_score(X, labels):.3f}")

    if n_clusters_ > 0:
        return labels
    else:
        return None

In [20]:
gdf = gpd.read_file('../data/external/gbfs/bolt_destinations.geojson')

In [21]:
gdf.head(3)

Unnamed: 0,geometry
0,POINT (14.42614 50.08208)
1,POINT (14.44426 50.06294)
2,POINT (14.43784 50.05152)


In [22]:
dest = series_to_array(gdf.geometry)

In [23]:
labels = fit_hdbscan(dest, min_cluster_size=7)

Estimated number of clusters: 427
Estimated number of noise points: 976


In [24]:
gdf['cluster'] = labels
gdf['counts'] = gdf['cluster'].map(gdf['cluster'].value_counts())

In [None]:
map1 = KeplerGl(data={'cluster':gdf})

User Guide: https://docs.kepler.gl/docs/keplergl-jupyter


KeplerGl(data={'cluster':                        geometry  cluster  counts
0     POINT (14.42614 50.08208)    …

In [None]:
map1