In [7]:
import pandas as pd
import numpy as np
import geopandas as gp
import matplotlib.pyplot as plt

In [8]:
import networkx as nx
import osmnx as ox

ox.settings.log_console=True

map_graph = ox.graph_from_place('Burgos, Spain', network_type='drive')
largest_cc = max(nx.strongly_connected_components(map_graph), key=len)
map_graph = map_graph.subgraph(largest_cc)

METS

In [9]:
def matrix_element_similarity(G,T):
    M=np.zeros((len(T),len(T)))
    G_aux=G.copy()

    for i,track_i in enumerate(T):
        for j,track_j in enumerate(T):
            if i==j:
                continue

            G_aux.add_node("vSource")
            for n in track_i:
                G_aux.add_edge("vSource",n,length=0) #NOT EQUAL TO: n,"vSource" (directed)
            dists=[nx.shortest_path_length(G_aux,"vSource",m, weight='length') for m in track_j]
            G_aux.remove_node("vSource")

            M[i,j]=sum(dists)/len(track_i)
    return M

In [10]:
def matrix_element_similarity_v2(G,T):
    M=np.zeros((len(T),len(T)))
    G_aux=G.copy()

    for i,track_i in enumerate(T):
        for j,track_j in enumerate(T):
            if i==j:
                continue

            G_aux.add_node("vSource")
            G_aux.add_edges_from(("vSource", n, {'length': 0}) for n in track_i)
            dists=[nx.shortest_path_length(G_aux,"vSource",m, weight='length') for m in track_j]
            G_aux.remove_node("vSource")

            M[i,j]=sum(dists)/len(track_i)
    return M

ROW-TS

In [14]:
def row_wise_track_similarity(G,T):
    M=np.zeros((len(T),len(T)))
    G_aux=G.copy()

    for i,track_i in enumerate(T):

        G_aux.add_node("vSource")
        G_aux.add_edges_from(("vSource", n, {'length': 0}) for n in track_i)
        dists=nx.single_source_shortest_path_length(G_aux,"vSource")
        G_aux.remove_node("vSource")

        for j,track_j in enumerate(T):
            if i==j:
                continue
            
            max_dist=max(dists[m] for m in track_j)
                    
            M[i,j]=max_dist
    return M

Performance test

In [27]:
#Choose map
G=map_graph

#Generate random paths to test
from random import choice
n_paths=10
T=[]
for i in range(n_paths):
    orig=choice(list(G.nodes()))
    dest=choice(list(G.nodes()))
    T.append(nx.shortest_path(G, orig, dest, weight='length'))

#Compute tha matrix
import time
t0=time.time()
M_mets=matrix_element_similarity(G,T)
t1=time.time()
print("METS (v1) takes {:.2f}s".format(t1-t0))
t0=time.time()
M_mets=matrix_element_similarity_v2(G,T)
t1=time.time()
print("METS (v2) takes {:.2f}s".format(t1-t0))
t0=time.time()
M_rowts=row_wise_track_similarity(G,T)
t1=time.time()
print("ROW-TS takes {:.2f}s".format(t1-t0))

Clustering

In [78]:
from sklearn_extra.cluster import KMedoids

def k_corridors_medoid_summarizer(M,k,T,**kwargs):
    kmedoids = KMedoids(n_clusters=k, metric='precomputed', **kwargs) #For example random_state=0
    kmedoids.fit(M.T) #M.T is not M!! WHICH ONE TO CHOOSE? :((

    cluster_labels = kmedoids.labels_
    medoid_indices = kmedoids.medoid_indices_

    k_corridors=[T[i] for i in medoid_indices]
    return k_corridors,medoid_indices,cluster_labels

Track converters from list of edges and lists of nodes

In [24]:
def edges_to_nodes(track):
    return [edge[0] for edge in track]

def nodes_to_edges(track):
    return [(track[i],track[i+1]) for i in range(len(track)-1)]

Metrics definitions

In [28]:
def absolute_intersect(track_i,track_j):
    """
    Returns 1 if there is at least one edge from track_i present in track_j,
    and 0 otherwise
    """

    #Convert list of nodes into list of edges
    set_i=set(nodes_to_edges(track_i))
    set_j=set(nodes_to_edges(track_j))

    #Calculate intesection
    intersect=set_i.intersection(set_j)
    n=len(intersect)
    
    return 0 if n==0 else 1
    

In [36]:
def relative_intersect(track_i,track_j):
    """
    Returns what portion of the edges from track_i are present in track_j
    """

    #Convert list of nodes into list of edges
    set_i=set(nodes_to_edges(track_i))
    set_j=set(nodes_to_edges(track_j))

    #Calculate intesection
    intersect=set_i.intersection(set_j)
    n=len(intersect)
    
    return n/len(set_i)

In [81]:
def lenght_relative_intersect(track_i,track_j,G):
    """
    Returns what portion of the length of track_i is in the cummulative length of the edges
    also present in track_j
    """

    #Convert list of nodes into list of edges
    set_i=set(nodes_to_edges(track_i))
    set_j=set(nodes_to_edges(track_j))

    #Calculate intersection
    intersect=set_i.intersection(set_j)

    l_intersect=sum([G.get_edge_data(*edge)[0]['length'] for edge in intersect])
    l_i=sum([G.get_edge_data(*edge)[0]['length'] for edge in set_i])


    
    return l_intersect/l_i

In [76]:
def evaluate_metric(metric_fun,T,k_corridors,**kwargs):
    """
    Returs the cummulative value (normalized by the number of tracks) of the 
    selected metric for the selected k corridors.
    T: A list of tracks to be evaluated (if k_corridors are included they will also be evaluated)
    k_corridors: A list of k selected corridors
    metric_fun: The metric function to evaluate as metric_fun(T[i],k_corridors[j],**kwargs)
    """

    result=0
    for track_i in T:
        for track_j in k_corridors:
            result+=metric_fun(track_i,track_j,**kwargs)

    result/=(len(T)*len(k_corridors))

    return result

Demo:

In [96]:
#Choose map
G=map_graph

#Generate random paths to test
from random import choice
n_paths=100
T=[]
for i in range(n_paths):
    orig=choice(list(G.nodes()))
    dest=choice(list(G.nodes()))
    T.append(nx.shortest_path(G, orig, dest, weight='length'))

#Compute the similarity matrix
M=row_wise_track_similarity(G,T)

#Cluster in k corridors
n_clusters=8
k_corridors,k_index,cluster_labels=k_corridors_medoid_summarizer(M,n_clusters,T,random_state=0)

#Test performance
#*NOTE: Maybe should remove k_corridors from T ...
abs_inter=evaluate_metric(absolute_intersect,T,k_corridors)
print("Absolute intersection: {:.2f}".format(abs_inter))

rel_inter=evaluate_metric(relative_intersect,T,k_corridors)
print("Relative intersection: {:.4f}".format(rel_inter))

l_rel_inter=evaluate_metric(lenght_relative_intersect,T,k_corridors,G=G)
print("Length intersection: {:.4f}".format(l_rel_inter))


Absolute intersection: 0.49
Relative intersection: 0.0807
Length intersection: 0.0828
