In [2]:
import pandas as pd
import numpy as np
import plotly.express as px
import xml.etree.ElementTree as ET
import pickle
import math
import copy
import requests
from rdp import *
token = "pk.eyJ1IjoibG1hZ25hbmEiLCJhIjoiY2s2N3hmNzgwMGNnODNqcGJ1N2l2ZXZpdiJ9.-aOxDLM8KbEQnJfXegtl7A"
px.set_mapbox_access_token(token)
n_voxel = 3
vox_divider = 2
nb_subvox = (10/vox_divider)

# I) Chargement des données GPX

In [3]:
def load_gpx():
    with open('gpx.df','rb') as infile:
        df = pickle.load(infile)
    begin = int(df.iloc[-1]["route_num"])
    print(begin)
    for i in range(begin+1, begin+1109+1):
        tree = ET.parse('Datas/GPS/GPX/data'+str(i)+'.gpx')
        if(len(tree.getroot()) > 1):
            root = tree.getroot()[1][0]
            df_temp = pd.DataFrame(columns=['lat', 'lon'])
            j=0
            for child in root:
                coord = child.attrib
                coord['lat'] = float(coord['lat'])
                coord['lon'] = float(coord['lon'])
                df_temp = df_temp.append(pd.DataFrame(coord, index=[j]))
                j+=1
            df_temp["route_num"] = i
            df = df.append(df_temp)
    with open('gpx.df', 'wb') as outfile:
        pickle.dump(df, outfile)

In [4]:
def request_map_matching(df_route):
    route = df_route.to_numpy()
    coord=""
    tab_requests = []
    i=0
    for i in range(len(route)):
        coord += str(route[i][1])+","+str(route[i][0])+";"
        if(i!=0 and i%99 == 0):
            coord = coord[:-1]
            tab_requests.append(requests.get("https://api.mapbox.com/matching/v5/mapbox/cycling/"+coord+"?access_token="+token))
            coord = ""
    if(i!=0 and i%99 != 0):
        coord = coord[:-1]
        tab_requests.append(requests.get("https://api.mapbox.com/matching/v5/mapbox/cycling/"+coord+"?access_token="+token))
    return tab_requests

# II) Algorithm de Douglas-Peucker

In [5]:
def rd_compression(df, nb_routes=1110, eps=1e-4):
    """
    Compress a dataframe with douglas-peucker's algorithm.

    Parameters
    ----------
    df : pandas' DataFrame with columns=['lat', 'lon', 'route_num']
        Dataframe to compress
    eps : int in [0, 1[ , optional
        Precision of the compression (high value = few points)
    nb_routes : int
        Number of routes to compress

    Returns
    -------
    pandas' DataFrame with columns=['lat', 'lon', 'route_num']
        the compressed DataFrame
    """
    
    df_simplified = pd.DataFrame(columns=['lat', 'lon', 'route_num'])
    for i in range(1, nb_routes+1):
        route = df[df['route_num']==i].values
        if(len(route)>0):
            simplified = rdp(np.delete(route, 2, 1), epsilon=eps)
            simplified = np.insert(simplified, 2, route[0][2], axis=1) #add the route_number to the compressed route
            df_temp = pd.DataFrame(simplified, columns=['lat', 'lon', 'route_num'])
            df_simplified = df_simplified.append(df_temp)
    return df_simplified

# Map Matching

In [136]:
with open("gpx.df",'rb') as infile:
    df = pickle.load(infile)
with open("gpx_matched.df",'rb') as infile:
    df_map_matched = pickle.load(infile)
for i in range(df_map_matched.iloc[-1]["route_num"]+1, df.iloc[-1]["route_num"]+1):
    df_temp = df[df["route_num"]==i]
    tab_requests = request_map_matching(df_temp)
    tab_points = []
    for req in tab_requests:
        response = req.json()
        if("tracepoints" in response):
            route = response["tracepoints"]
            for point in route:
                if(point != None):
                    tab_points.append([point['location'][1], point['location'][0], i])
    df_map_matched = df_map_matched.append(pd.DataFrame(tab_points, columns=["lat", "lon", "route_num"]))
    with open('gpx_matched.df', 'wb') as outfile:
        pickle.dump(df_map_matched, outfile)

In [218]:
with open("gpx_matched.df",'rb') as infile:
    df_map_matched = pickle.load(infile)
with open("gpx_matched_simplified.df",'rb') as infile:
    df_map_matched_simplified = pickle.load(infile)
nb_routes = df_map_matched.iloc[-1]["route_num"] - df_map_matched_simplified.iloc[-1]["route_num"]
df_map_matched_simplified = df_map_matched_simplified.append(rd_compression(df_map_matched, nb_routes))
df_map_matched_simplified
with open('gpx_matched_simplified.df', 'wb') as outfile:
    pickle.dump(df_map_matched_simplified, outfile)

# Pathfinding

In [223]:
def request_route(lat1, long1, lat2, long2, mode="cycling"):
    coord = str(long1)+","+str(lat1)+";"+str(long2)+","+str(lat2)
    return requests.get("https://api.mapbox.com/directions/v5/mapbox/"+mode+"/"+coord, 
                            params={"alternatives": "true", "geometries": "geojson", "steps": "true", "access_token": token}) 

In [317]:
with open("gpx_matched_simplified.df",'rb') as infile:
    df_map_matched_simplified = pickle.load(infile)
with open("gpx_pathfindind_cycling.df",'rb') as infile:
    df_pathfinding = pickle.load(infile)
nb_routes = 250
for i in range(df_pathfinding.iloc[-1]["route_num"]+1, df_pathfinding.iloc[-1]["route_num"]+1+nb_routes+1):
    save_route = True
    df_temp = df_map_matched_simplified[df_map_matched_simplified["route_num"]==i]
    if(not(df_temp.empty)):
        req = request_route(df_temp.iloc[0]["lat"], df_temp.iloc[0]["lon"],
                            df_temp.iloc[-1]["lat"], df_temp.iloc[-1]["lon"]) #mapbox request to find a route between the stations
        response = req.json()
        if(response['code']=='Ok'): #if a route have been found
            steps = response['routes'][0]['legs'][0]['steps'] #we browse all the steps of the route
            for step in steps:
                if(step['maneuver']['instruction'].find("Wharf") != -1):
                    save_route = False #if the route is not good (using a boat) we don't save it
                    break
            if(save_route): #if we save the route
                df_temp = pd.DataFrame.from_records(response['routes'][0]['geometry']['coordinates'], 
                                           columns=['lon', 'lat']) #create a DF from the route (nparray)
                df_temp["route_num"] = i
                df_pathfinding = df_pathfinding.append(df_temp) #save the DF in dict_trips
            else: #if we don't save we store an empty DF in dict_trips
                df_pathfinding = df_pathfinding.append(pd.DataFrame(columns=['lon', 'lat', 'route_num']))
            with open('gpx_pathfindind_cycling.df', 'wb') as outfile:
                pickle.dump(df_pathfinding, outfile)

# III) Affichage

In [6]:
def display(dfdisplay, n=75, line_group="route_num", color=None):
    """
    Display a dataframe of gps points on a mapbox map.
    Parameters
    ----------
    df or str : pandas' DataFrame with columns=['lat', 'lon', 'route_num'] or the name of a file containing one
        Dataframe to display or the file where it is located
    n : int, optional
        Number of routes to display
    line_group : str, optional
        Dataframe's attribute used to differenciate routes
    color : str, optional
        Dataframe's attribute used to color routes
    """
    if(type(dfdisplay) == str): #if df is a file location
        with open(dfdisplay,'rb') as infile:
            n+=1
            dfdisplay = pickle.load(infile) #open the file to load the dataframe
            dfdisplay = dfdisplay[dfdisplay[line_group]<n]
    fig = px.line_mapbox(dfdisplay, lat="lat", lon="lon", line_group=line_group, color=color, zoom=11)
    fig.show()

In [7]:
def display_routes(df, tab_routes, tab_voxels=[], line_group="route_num", color=None):
    dfdisplay = pd.DataFrame(columns=["lat", "lon", "route_num"])
    for i in range(len(tab_routes)):
        dfdisplay = dfdisplay.append(df[df["route_num"]==tab_routes[i]+1])
    display(dfdisplay, len(tab_routes), line_group, color)

# VI) Calcul des voxels

## 1) Fonctions utiles

In [12]:
import math
def truncate(number, digits) -> float:
    stepper = 10.0 ** digits
    return math.trunc(stepper * number) / stepper

In [13]:
def find_voxel_int(p):
    """
    Find the voxel in which a point is by truncating its position. Voxel's position are transformed into 
    int to be manipulated in an easier way.
    Parameters
    ----------
    p : list of two int
        The point 
    n : int, optional
        Number of digits to truncate
        
    Returns
    -------
    list of two int
        Position of the voxel's low left point
    """
    v_lat = math.trunc(p[0]*10**(n_voxel+1))
    v_lon = math.trunc(p[1]*10**(n_voxel+1))
    
    while(v_lat%nb_subvox != 0):
        v_lat -= 1
    while(v_lon%nb_subvox != 0):
        v_lon -= 1
    
    return [v_lat, v_lon]

In [131]:
def line_intersection(line1, line2):
    """
    Find the point of intersection between two lines
    Parameters
    ----------
    line1 : list of two points (a point is a list of two int)
        First line  
    line2 : list of two points (a point is a list of two int)
        Second line  
        
    Returns
    -------
    list of two int
        Position of the intersection
    """
    xdiff = (line1[0][0] - line1[1][0], line2[0][0] - line2[1][0])
    ydiff = (line1[0][1] - line1[1][1], line2[0][1] - line2[1][1])

    def det(a, b):
        return a[0] * b[1] - a[1] * b[0]

    div = det(xdiff, ydiff)
    if div == 0:
        #print("Line does not intersect")
        return [99999999999, 99999999999]

    d = (det(*line1), det(*line2))
    x = det(d, xdiff) / div
    y = det(d, ydiff) / div
    return [x, y]
    

In [15]:
def get_voxel_points(vox, num_vox):
    """
    Take the position of the low left point of a voxel transformed into an int 
    and return this voxel's four real points.
    Parameters
    ----------
    vox : list of two int
        Position of the voxel's low left point transformed into an int
    num_vox : int
        Number of the voxel, used later to differentiate voxels
        
    Returns
    -------
    list 
        list of the four points (a point is a list of two int)
    """
    tab_vox = []
    vox_float = [vox[0]*10**(-n_voxel-1), vox[1]*10**(-n_voxel-1)]
    vox_float.append(num_vox)
    vox_float.append(1)
    tab_vox.append(vox_float)
    tab_vox.append([vox_float[0]+nb_subvox*10**(-n_voxel-1), vox_float[1], num_vox, 1])
    tab_vox.append([vox_float[0]+nb_subvox*10**(-n_voxel-1), vox_float[1]+nb_subvox*10**(-n_voxel-1), num_vox, 1])
    tab_vox.append([vox_float[0], vox_float[1]+nb_subvox*10**(-n_voxel-1), num_vox, 1])
    return tab_vox


In [16]:
def get_adjacent_voxel(vox, lat_diff, lon_diff):
    return [vox[0]+lat_diff*nb_subvox, vox[1]+lon_diff*nb_subvox]

In [17]:
def voxel_convolution(vox, dict_vox, dict_vox_used, num_vox, lat_diff, lon_diff):
    """
    With a voxel, check if one of his neighbour exists and if it has already been used.
    ----------
    vox : list of two int
        Position of the voxel's low left point transformed into an int
    dict_vox : dict
        Dictionary of existing voxels 
    dict_vox_used : dict
        Dictionary of voxels that have already been used
    num_vox : int
        Number of the voxel, used later to differentiate voxels
    lat_diff : int
        Difference of latitude (the unit is voxel) between the voxel and the neighbour
    lon_diff : int
        Difference of longitude (the unit is voxel) between the voxel and the neighbour
        
    Returns
    -------
    list
        If the voxel exists and has not been used : 
            A list containing the voxel's low left point transformed into an int and the list containing all routes
            that are going through the voxel
        Else:
            An empty list
    """
    vox_adj = get_adjacent_voxel(vox, lat_diff, lon_diff)
    key_adj = str(int(vox_adj[0]))+";"+str(int(vox_adj[1]))
    if(key_adj in dict_vox and not(key_adj in dict_vox_used)):
        return [vox_adj, dict_vox[key_adj], key_adj]
    return []
        
    

## 2) Création du dictionaire 'voxel-routes'

In [18]:

def create_dict_vox(df, nb_routes):
    """
    With a dataframe containing gps points separated in routes, creates a dict of voxels.  
    Parameters
    ----------
     df : pandas' DataFrame with columns=['lat', 'lon', 'route_num']
        Dataframe to use 
    nb_routes : int
        Number of routes to use in the dataframe 
        
    Returns
    -------
    dict of voxels
        Keys of this dict are strings containing the position of voxels' low left points transformed to int
        and separated by a ';'.
        Values of this dict are lists containing the number of all routes that pass through the voxel.
    """
    
    dict_vox = {}
    
    for route_num in range(1, nb_routes+1):
        
        route = df[df["route_num"]==route_num]
        points = route.values.tolist()
        
        if(len(points) > 1):
            vox_int = find_voxel_int(points[0])
            key = str(int(vox_int[0]))+";"+str(int(vox_int[1])) #save the voxel
            if key in dict_vox:
                if(route_num not in dict_vox[key][0]):
                    dict_vox[key][2].append(route_num)
                else :
                    dict_vox[key] = [[], [], [route_num], []]
                    
            vox_int = find_voxel_int(points[-1])
            key = str(int(vox_int[0]))+";"+str(int(vox_int[1])) #save the voxel
            if key in dict_vox:
                if(route_num not in dict_vox[key][0]):
                    dict_vox[key][3].append(route_num)
                else :
                    dict_vox[key] = [[], [], [], [route_num]]

        for j in range(len(points)-1):
            p1 = points[j] #we take two points in the dataframe that create a line
            p2 = points[j+1]

            if(p1[0]>p2[0]):
                lat_orientation = -nb_subvox #the line is going down
            else:
                lat_orientation = nb_subvox #the line is going up

            if(p1[1]>p2[1]):
                lon_orientation = -nb_subvox #the line is going left 
            else:
                lon_orientation = nb_subvox #the line is goin right

            vox_int = find_voxel_int(p1) #find the start voxel
            vox_final_int = find_voxel_int(p2) #find the final voxel
            
            
            
            #while the final voxel has not been reached
            while(vox_int[0] != vox_final_int[0] or vox_int[1] != vox_final_int[1]):
                vox_float = [vox_int[0]*10**(-n_voxel-1), vox_int[1]*10**(-n_voxel-1)] #transform the vox into real points
                
                key = str(int(vox_int[0]))+";"+str(int(vox_int[1])) #save the voxel
                if key in dict_vox:
                    if(route_num not in dict_vox[key][0]):
                        dict_vox[key][0].append(route_num)
                else :
                    dict_vox[key] = [[route_num], [], [], []]
                    
                '''find the good intersection point (if the line is going up, we search the intersection between 
                it and the up line of the voxel for example)'''
                if(lat_orientation>0):
                    intersection_lat = line_intersection([p1, p2], [[vox_float[0]+nb_subvox*10**(-n_voxel-1), vox_float[1]],
                                                        [vox_float[0]+nb_subvox*10**(-n_voxel-1), vox_float[1]+nb_subvox*10**(-n_voxel-1)]])
                else:
                    intersection_lat = line_intersection([p1, p2], [vox_float, [vox_float[0], vox_float[1]+nb_subvox*10**(-n_voxel-1)]])

                    
                '''same for left and right'''
                if(lon_orientation>0): 
                    intersection_lon = line_intersection([p1, p2], [[vox_float[0], vox_float[1]+nb_subvox*10**(-n_voxel-1)], 
                                                        [vox_float[0]+nb_subvox*10**(-n_voxel-1), vox_float[1]+nb_subvox*10**(-n_voxel-1)]])
                else:
                    intersection_lon = line_intersection([p1, p2], [vox_float, [vox_float[0]+nb_subvox*10**(-n_voxel-1), vox_float[1]]])

                #calculate the distance between the first point of the line and the two intersection points
                intersection_lon_distance = sqrt((p1[0]-intersection_lon[0])**2+(p1[1]-intersection_lon[1])**2)
                intersection_lat_distance = sqrt((p1[0]-intersection_lat[0])**2+(p1[1]-intersection_lat[1])**2)

                #find the shorter distance then go to the next voxel using the orientation of the line
                if(intersection_lat_distance<intersection_lon_distance): 
                    vox_int[0] += lat_orientation
                else:
                    vox_int[1] += lon_orientation
                    
            key = str(int(vox_int[0]))+";"+str(int(vox_int[1])) #end of the while loop, save the last voxel
            if key in dict_vox:
                if(route_num not in dict_vox[key][0]):
                    dict_vox[key][0].append(route_num)
            else :
                [[route_num], [], []]
    
    for key in dict_vox:
        tab_routes = dict_vox[key][0]
        vox_str = key.split(";")
        vox_int = [int(vox_str[0]), int(vox_str[1])]
        
        #creation of a list containing all neighbours of the voxel
        tab_vox_adj = []
        tab_vox_adj.append(voxel_convolution(vox_int, dict_vox, {}, 0, -1, 0))
        tab_vox_adj.append(voxel_convolution(vox_int, dict_vox, {}, 0, 1, 0))
        tab_vox_adj.append(voxel_convolution(vox_int, dict_vox, {}, 0, 0, 1))
        tab_vox_adj.append(voxel_convolution(vox_int, dict_vox, {}, 0, 0, -1))
        for vox in tab_vox_adj:
            if(len(vox)>0):
                if(not(set(tab_routes).issubset((set(vox[1][0]))))):
                    diff_tab_routes = list((set(tab_routes)-set(vox[1][0]))|(set(vox[1][0])-set(tab_routes)))
                    for i in range(len(diff_tab_routes)):
                        if(diff_tab_routes[i] not in tab_routes
                          and diff_tab_routes[i] not in dict_vox[key][1]):
                            dict_vox[key][1].append(diff_tab_routes[i])
                        
    return dict_vox


    


In [19]:
def get_voxels_with_min_routes(dict_vox, min_routes):
    """
    Return all voxels or groups of voxels that have at least a number of routes passing through themselves.
    Parameters
    ----------
    dict_vox : dict
        Dictionary of existing voxels 
    min_routes : int
        Minimum number of routes passing through voxels / groups of voxels
        
    Returns
    -------
    list 
        List of voxels that have or are part of a group that have at least 'min_routes' routes 
        passing through itself. A voxel is a list of four points.
    """
    num_vox = 0 #used to differentiate voxels
    dict_vox_used = {}
    tab_voxel_with_min_routes = [] #final list containing all voxels that matches with the conditions
    
    for key in dict_vox: #for all voxels
        tab_routes = dict_vox[key][0]+dict_vox[key][1]
        
        #print(dict_vox[key][0], dict_vox[key][1])
        
        vox_str = key.split(";")
        vox_int = [int(vox_str[0]), int(vox_str[1])]
        
        
        #if the voxels has at least 'min_routes' routes and has not been saved we save it
        if(key not in dict_vox_used and len(tab_routes) >= min_routes):
            tab_voxel_with_min_routes += get_voxel_points(vox_int, num_vox)
            dict_vox_used[key] = True
            num_vox -= 1
            
    return tab_voxel_with_min_routes

In [318]:
with open("gpx_matched_simplified.df",'rb') as infile:
        df_simplified = pickle.load(infile)
               
nb_routes = 2
min_routes = 2

df_simplified["type"] = 0
df_display = df_simplified[(df_simplified["route_num"]<=nb_routes)]

dict_voxels = create_dict_vox(df_display, nb_routes)
tab_vox = get_voxels_with_min_routes(dict_voxels, min_routes)
print(len(tab_vox)/4)
        
df = pd.DataFrame(tab_vox, columns=["lat", "lon", "route_num", "type"])
df_display = df_display.append(df)
display(df_display, color="type")    


11.0


In [319]:
with open("gpx_pathfindind_cycling.df",'rb') as infile:
        df_simplified = pickle.load(infile)
               
nb_routes = 2
min_routes = 2

df_simplified["type"] = 0
df_display = df_simplified[(df_simplified["route_num"]<=nb_routes)]

dict_voxels = create_dict_vox(df_display, nb_routes)
tab_vox = get_voxels_with_min_routes(dict_voxels, min_routes)
print(len(tab_vox)/4)
        
df = pd.DataFrame(tab_vox, columns=["lat", "lon", "route_num", "type"])
df_display = df_display.append(df)
display(df_display, color="type")    


52.0


In [118]:
def get_tab_routes_voxels(dict_voxels, nb_routes):
    t = []
    for key in dict_voxels:
        for i in range(nb_routes):
            if(len(t)<=i):
                t.append([])
            tab_routes = dict_voxels[key][0]+dict_voxels[key][1]
            if(i+1 in tab_routes):
                t[i].append(key)
    return t

In [119]:
def get_similitude(tab_routes_voxels, num_route1, num_route2):
    
    common_parts = len(list(set(tab_routes_voxels[num_route1-1]) & set(tab_routes_voxels[num_route2-1])))
    #print(len(tab_routes_voxels[num_route1-1]))
    
    sim1 = -1
    sim2 = -1
    
    if(len(tab_routes_voxels[num_route1-1]) == 0):
        sim1 = 0.0
    if(len(tab_routes_voxels[num_route2-1]) == 0):
        sim2 = 0.0
        
    if(sim1<0):
        sim1 = common_parts/len(tab_routes_voxels[num_route1-1])
    if(sim2<0):
        sim2 = common_parts/len(tab_routes_voxels[num_route2-1])
    
    return [1-sim1, 1-sim2]
    

In [132]:
with open("gpx_matched_simplified.df",'rb') as infile:
        df_cluster = pickle.load(infile)
        
dict_voxels_cluster = create_dict_vox(df_cluster, df_cluster.iloc[-1]["route_num"])
tab_routes_voxels = get_tab_routes_voxels(dict_voxels_cluster, df_cluster.iloc[-1]["route_num"])


# V) Heat Maps

In [328]:
with open("gpx_matched_simplified.df",'rb') as infile:
    df_cluster = pickle.load(infile)       
dict_voxels_cluster = create_dict_vox(df_cluster, df_cluster.iloc[-1]["route_num"])

tab = []
for key in dict_voxels_cluster:
    tab_routes = dict_voxels_cluster[key][0]+dict_voxels_cluster[key][1]
    vox_str = key.split(";")
    vox_int = [int(vox_str[0]), int(vox_str[1])]
    vox_pos = get_voxel_points(vox_int, 0)
    if(len(tab_routes) >= 0):
        tab.append([vox_pos[0][0], vox_pos[0][1], len(tab_routes)])
    
df = pd.DataFrame(tab, columns=["lat", "lon", "value"])
fig = px.scatter_mapbox(df, lat="lat", lon="lon",  color="value", size="value", zoom=10)
fig.show()

In [327]:
with open("gpx_pathfindind_cycling.df",'rb') as infile:
    df_cluster = pickle.load(infile)       
dict_voxels_cluster = create_dict_vox(df_cluster, df_cluster.iloc[-1]["route_num"])

tab = []
for key in dict_voxels_cluster:
    tab_routes = dict_voxels_cluster[key][0]+dict_voxels_cluster[key][1]
    vox_str = key.split(";")
    vox_int = [int(vox_str[0]), int(vox_str[1])]
    vox_pos = get_voxel_points(vox_int, 0)
    if(len(tab_routes) >= 0):
        tab.append([vox_pos[0][0], vox_pos[0][1], len(tab_routes)])
    
df = pd.DataFrame(tab, columns=["lat", "lon", "value"])
fig = px.scatter_mapbox(df, lat="lat", lon="lon",  color="value", size="value", zoom=10)
fig.show()

In [330]:
with open("gpx_pathfindind_cycling.df",'rb') as infile:
    df_cluster = pickle.load(infile)       
dict_voxels_cluster = create_dict_vox(df_cluster, df_cluster.iloc[-1]["route_num"])

tab = []
for key in dict_voxels_cluster:
    tab_routes = dict_voxels_cluster[key][2]
    vox_str = key.split(";")
    vox_int = [int(vox_str[0]), int(vox_str[1])]
    vox_pos = get_voxel_points(vox_int, 0)
    if(len(tab_routes) >= 0):
        tab.append([vox_pos[0][0], vox_pos[0][1], len(tab_routes)])
    
df = pd.DataFrame(tab, columns=["lat", "lon", "value"])
fig = px.scatter_mapbox(df, lat="lat", lon="lon",  color="value", size="value", zoom=10)
fig.show()

In [332]:
with open("gpx_matched_simplified.df",'rb') as infile:
    df_cluster = pickle.load(infile)       
dict_voxels_cluster = create_dict_vox(df_cluster, df_cluster.iloc[-1]["route_num"])

tab = []
for key in dict_voxels_cluster:
    tab_routes = dict_voxels_cluster[key][2]
    vox_str = key.split(";")
    vox_int = [int(vox_str[0]), int(vox_str[1])]
    vox_pos = get_voxel_points(vox_int, 0)
    if(len(tab_routes) >= 0):
        tab.append([vox_pos[0][0], vox_pos[0][1], len(tab_routes)])
    
df = pd.DataFrame(tab, columns=["lat", "lon", "value"])
fig = px.scatter_mapbox(df, lat="lat", lon="lon",  color="value", size="value", zoom=10)
fig.show()

# VI) Matrice de distance et DBSCAN

In [124]:
distance_matrix = np.empty([df_cluster.iloc[-1]["route_num"], df_cluster.iloc[-1]["route_num"]])
for i in range(df_cluster.iloc[-1]["route_num"]):
    for j in range(i, df_cluster.iloc[-1]["route_num"]):
        coeff = get_similitude(tab_routes_voxels, i+1, j+1)
        distance_matrix[i][j] = coeff[0]
        distance_matrix[j][i] = coeff[1]
print(np.diagonal(distance_matrix))

[0. 0. 0. ... 0. 1. 0.]


In [125]:
from sklearn.cluster import *
clustering = DBSCAN(eps=0.3, min_samples=25, metric='precomputed').fit(distance_matrix)
dict_cluster = {}
clusters = clustering.labels_
for i in range(len(clusters)):
    if(clusters[i] in dict_cluster):
        dict_cluster[clusters[i]].append(i)
    else:
        dict_cluster[clusters[i]] = [i]
print(len(dict_cluster))

25


In [126]:
dict_cluster = {}
clusters = clustering.labels_
for i in range(len(clusters)):
    if(clusters[i] in dict_cluster):
        dict_cluster[clusters[i]].append(i)
    else:
        dict_cluster[clusters[i]] = [i]

In [129]:
nb_cluster = 16
print(len(dict_cluster[nb_cluster]))
display_routes(df_cluster, dict_cluster[nb_cluster])

52
