Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion superblockify/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@
The filter used to filter the OSM data for the graph. This is a string that is
passed to the :func:`osmnx.graph_from_place` function.

CLUSTERING_PERCENTILE
The percentile used to determine the betweenness centrality threshold for the
spatial clustering and anisotropy nodes.

logger
The logger for this module. This is used to log information, warnings and errors
throughout the package.
Expand Down Expand Up @@ -72,14 +76,17 @@
'["service"!~"alley|driveway|emergency_access|parking|parking_aisle|private"]'
)

# Metrics
CLUSTERING_PERCENTILE = 90

# Logging configuration using the setup.cfg file
logging.config.fileConfig(join(dirname(__file__), "..", "setup.cfg"))
# Get the logger for this module
logger = logging.getLogger("superblockify")

# Tests
TEST_DATA_PATH = "./tests/test_data/"
HIDE_PLOTS = False
HIDE_PLOTS = True

PLACES_GENERAL = [
("Barcelona", "Barcelona, Catalonia, Spain"),
Expand Down
147 changes: 147 additions & 0 deletions superblockify/metrics/measures.py
Original file line number Diff line number Diff line change
Expand Up @@ -589,3 +589,150 @@ def _sum_bc(loop_indices, pred, dist, edges_uv, edge_padding): # pragma: no cov
edge_padding,
)
return betweennesses


def calculate_high_bc_clustering(node_x, node_y, node_betweenness, percentile):
"""
Calculate the high betweenness clustering coefficient and anisotropy for a
given percentile of nodes with the highest betweenness. [1]_

Parameters
----------
node_x : list
List of x coordinates of the nodes.
node_y : list
List of y coordinates of the nodes, ordered by node index.
node_betweenness : list
List of betweenness values for each node, ordered by node index.
percentile : float
Percentile of nodes with the highest betweenness to calculate the
clustering coefficient for. Between 0 and 1.

Returns
-------
high_bc_clustering : float
Clustering coefficient for the nodes with the highest betweenness.
high_bc_anisotropy : float
Anisotropy for the nodes with the highest betweenness.

Notes
-----
The high betweenness clustering coefficient is calculated as the average
clustering coefficient of the nodes with the highest betweenness. The
high betweenness anisotropy is calculated as the average anisotropy of the
nodes with the highest betweenness.

References
----------
.. [1] Kirkley, A., Barbosa, H., Barthelemy, M. & Ghoshal, G. From the betweenness
centrality in street networks to structural invariants in random planar
graphs. Nat Commun 9, 2501 (2018).
https://www.nature.com/articles/s41467-018-04978-z
"""
coord_bc = np.array([node_x, node_y, node_betweenness]).T
# Sort by betweenness
coord_bc = coord_bc[coord_bc[:, 2].argsort()]
# Threshold betweenness
threshold_idx = int(len(coord_bc) * percentile)
return __calculate_high_bc_clustering(
coord_bc, threshold_idx
), __calculate_high_bc_anisotropy(coord_bc[threshold_idx:, :2])


def __calculate_high_bc_clustering(coord_bc, threshold_idx):
r"""High betweenness nodes clustering coefficient.

.. math::
C_{\theta} =
\frac{1}{N_{\theta}\left\langle X \right\rangle}
\sum_{i = 1}^{N_{\theta}} \| x_i - x_{\mathrm{cm}, \theta} \|

.. math::
\langle X \rangle = \frac{1}{N}
\sum_{i = 1}^{N} \| x_i - x_{\mathrm{cm}, \theta} \|

.. math::
x_{\mathrm{cm}, \theta} = \frac{1}{N_{\theta}}
\sum_{i = 1}^{N_{\theta}} x_i

The distance calculation :math:`\| x_i - x_{\mathrm{cm}, \theta} \|` includes the
x and y coordinates of the node, and is the Euclidean distance. In this case it
is the Frobenius norm of the difference between the node coordinates and the
center of mass of the high betweenness nodes.

Parameters
----------
coord_bc : np.ndarray
Array of node coordinates and betweenness values, sorted by betweenness.
threshold_idx : int
Index of the first node to consider as high betweenness.

Returns
-------
high_bc_clustering : float
Clustering coefficient for the nodes with the highest betweenness.

Raises
------
ValueError
If the coordinate array is has less than two nodes.
ValueError
If the threshold index is greater than the number of nodes.
"""
if len(coord_bc) < 2:
raise ValueError("Coordinate array must have at least two nodes.")
if threshold_idx >= len(coord_bc):
raise ValueError("Threshold index must be less than the number of nodes.")
# Center of mass of high betweenness nodes
high_bc_cm = np.mean(coord_bc[threshold_idx:, :2], axis=0)
# Average distance to center of mass
avg_dist = np.mean(
np.linalg.norm(coord_bc[threshold_idx:, :2] - high_bc_cm, axis=1)
)
# Norm by average distance of all nodes
return avg_dist / np.mean(np.linalg.norm(coord_bc[:, :2] - high_bc_cm, axis=1))


def __calculate_high_bc_anisotropy(coord_high_bc):
r"""High betweenness nodes anisotropy.

The high betweenness anisotropy is the ratio
:math:`A_{\theta}=\lambda_1/\lambda_2`, where :math:`\lambda_i` are the positive
eigenvalues of the covariance matrix of the high betweenness nodes, and
:math:`\lambda_1 \geq \lambda_2`. [1]_

Parameters
----------
coord_high_bc : np.ndarray
Array of node coordinates of the high betweenness nodes.

Returns
-------
high_bc_anisotropy : float
Anisotropy for the nodes with the highest betweenness.

Raises
------
ValueError
If the number of high betweenness nodes is less than 2.

References
----------
.. [1] Kirkley, A., Barbosa, H., Barthelemy, M. & Ghoshal, G. From the betweenness
centrality in street networks to structural invariants in random planar
graphs. Nat Commun 9, 2501 (2018).
https://www.nature.com/articles/s41467-018-04978-z
"""
if len(coord_high_bc) < 2:
raise ValueError(
"High betweenness nodes must be at least 2, for less the anisotropy is "
"not defined."
)
# Covariance matrix
cov = np.cov(coord_high_bc.T)
# Eigenvalues
eigvals = np.linalg.eigvals(cov)
# Sort eigenvalues
eigvals = np.sort(eigvals)[::-1]
# Anisotropy
return eigvals[0] / eigvals[1]
57 changes: 50 additions & 7 deletions superblockify/metrics/metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
write_relative_increase_to_edges,
calculate_coverage,
betweenness_centrality,
calculate_high_bc_clustering,
)
from .plot import (
plot_distance_matrices,
Expand All @@ -21,7 +22,7 @@
plot_relative_difference,
plot_relative_increase_on_graph,
)
from ..config import logger, RESULTS_DIR
from ..config import logger, RESULTS_DIR, CLUSTERING_PERCENTILE
from ..plot import save_plot
from ..utils import compare_dicts

Expand Down Expand Up @@ -102,6 +103,8 @@ def __init__(self, unit="time"):
self.avg_path_length = {"S": None, "N": None}
self.directness = {"SN": None}
self.global_efficiency = {"NS": None}
self.high_bc_clustering = None
self.high_bc_anisotropy = None

self.distance_matrix = {}
self.predecessor_matrix = {}
Expand Down Expand Up @@ -176,6 +179,8 @@ def calculate_before(self, partitioner, make_plots=False):
# No `attr_suffix` for the full graph
)

self.calculate_high_bc_clustering(partitioner.graph, CLUSTERING_PERCENTILE)

def calculate_all(
self,
partitioner,
Expand Down Expand Up @@ -241,12 +246,6 @@ def calculate_all(
plot_distributions=make_plots,
)

self.calculate_all_measure_sums()

write_relative_increase_to_edges(
partitioner.graph, self.distance_matrix, self.node_list, "N", "S"
)

betweenness_centrality(
partitioner.graph,
self.node_list,
Expand All @@ -256,6 +255,12 @@ def calculate_all(
attr_suffix="_restricted",
)

self.calculate_all_measure_sums()

write_relative_increase_to_edges(
partitioner.graph, self.distance_matrix, self.node_list, "N", "S"
)

if make_plots:
# sort distance matrix dictionaries to follow start with E, S, N, ...
d_m = self.distance_matrix
Expand Down Expand Up @@ -347,6 +352,44 @@ def calculate_all_measure_sums(self):
)
logger.debug("Global efficiency %s: %s", key, self.global_efficiency[key])

def calculate_high_bc_clustering(self, graph, percentile):
"""Calculate the high betweenness node clustering and anisotropy.

High betweenness nodes are the nodes above the given percentile of the
betweenness centrality distribution.

Parameters
----------
graph : networkx.Graph
The graph to calculate the high betweenness node clustering for, needs to
have x, y, and node_betweenness_normal attribute for each node.
percentile : float or int
The percentile of the betweenness centrality to use as a threshold for high
betweenness nodes. 0.0 < percentile < 100.0.

Raises
------
ValueError
If percentile is not a float between 0.0 and 100.0.
"""
if not isinstance(percentile, (float, int)):
raise ValueError(
f"percentile needs to be a float or int, not {type(percentile)}"
)
if not 0.0 < percentile < 100.0:
raise ValueError(
f"percentile needs to be between 0.0 and 100.0, not {percentile}"
)

self.high_bc_clustering, self.high_bc_anisotropy = calculate_high_bc_clustering(
node_x=[graph.nodes[node]["x"] for node in self.node_list],
node_y=[graph.nodes[node]["y"] for node in self.node_list],
node_betweenness=[
graph.nodes[node]["node_betweenness_normal"] for node in self.node_list
],
percentile=percentile / 100,
)

def __str__(self):
"""Return a string representation of the metric object.

Expand Down
81 changes: 81 additions & 0 deletions tests/metrics/test_measures.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
wheel_graph,
)
from numpy import full, array, inf, array_equal, int32, int64, allclose
from numpy.random import default_rng
from scipy.sparse.csgraph import dijkstra

from superblockify.metrics.measures import (
Expand All @@ -25,6 +26,8 @@
calculate_coverage,
betweenness_centrality,
_calculate_betweenness,
__calculate_high_bc_clustering,
__calculate_high_bc_anisotropy,
)
from superblockify.utils import __edges_to_1d

Expand Down Expand Up @@ -648,3 +651,81 @@ def test_betweenness_centrality_weight_missing(graph):
del graph.edges[0, 1, 0]["weight"]
with pytest.raises(ValueError):
betweenness_centrality(graph, None, None, None, weight="weight")


@pytest.fixture(scope="module", params=[10, 100, 1000, 60000])
def clustering_data(request):
"""Generate random data for clustering tests."""
rng = default_rng(29384)
coord = array(
[
( # x-coord
rng.uniform(low=-10, high=10, size=request.param)
+ rng.uniform(low=-180, high=180)
),
( # y-coord
rng.uniform(low=-10, high=10, size=request.param)
+ rng.uniform(low=-90, high=90)
), # betweenness centrality
rng.uniform(low=0, high=1, size=request.param),
]
).T
return coord[coord[:, 2].argsort()], rng.integers(low=0, high=request.param)


def test___calculate_high_bc_clustering(
clustering_data,
): # pylint: disable=redefined-outer-name
"""Test calculation of betweenness centrality clustering."""
assert 0.0 < __calculate_high_bc_clustering(*clustering_data) < 1.0


@pytest.mark.parametrize(
"coord_bc,threshold_idx",
[
(array([]), 0), # length 0
(array([[0, 0, 0]]), 1), # length 1
(array([[0, 0, 0], [1, 1, 1]]), 2), # index out of bounds
],
)
def test___calculate_high_bc_clustering_faulty(coord_bc, threshold_idx):
"""Test error catching for betweenness centrality clustering."""
with pytest.raises(ValueError):
__calculate_high_bc_clustering(coord_bc, threshold_idx)


def test___calculate_high_bc_anisotropy(
clustering_data,
): # pylint: disable=redefined-outer-name
"""Test calculation of betweenness centrality anisotropy."""
coord_high_bc = clustering_data[0][clustering_data[1] :, :2]
anisotropy = __calculate_high_bc_anisotropy(coord_high_bc)
assert 1.0 <= anisotropy
# check invariance to x and y coordinate swap
assert __calculate_high_bc_anisotropy(coord_high_bc[:, ::-1]) == anisotropy


@pytest.mark.parametrize(
"coords,expected",
[
([[0, 0], [1, 0], [0, 1], [1, 1]], 1.0), # square, round distribution
([[-20, 10], [-10, 10], [-20, 20], [-10, 20]], 1.0), # square, round distr.
([[1, 0], [0, 1], [1, 2], [2, 1]], 1.0), # diamond, round distribution
([[0, 0], [1, 0], [0, 1], [1, 1], [0.5, 0.5]], 1.0), # square + center
([[0, 0], [1, 0], [0, 2], [1, 2]], 4.0), # 2:1 rectangle, long distr.
([[0, 0], [2, 0], [0, 1], [2, 1]], 4.0), # 1:2 rectangle, long distr.
([[0, 0], [1, 0], [0, 2], [1, 2], [0.5, 1]], 4.0), # 2:1 rect. + center
([[0, 0], [0, 1]], inf), # vertical line, infinite anisotropy
([[0, 0], [1, 0]], inf), # horizontal line, infinite anisotropy
],
)
def test___calculate_high_bc_anisotropy_special_cases(coords, expected):
"""Test calculation of betweenness centrality anisotropy."""
assert __calculate_high_bc_anisotropy(array(coords)) == expected


@pytest.mark.parametrize("coords", [[], [[0, 0]], [[1, 1]]])
def test___test___calculate_high_bc_anisotropy_faulty(coords):
"""Test error catching of betweenness centrality anisotropy."""
with pytest.raises(ValueError):
__calculate_high_bc_anisotropy(array(coords))
Loading