In [45]:
import pandas as pd
import numpy as np
import pickle
import plotly.express as px
import plotly.graph_objects as go
from copy import deepcopy
import json
import torch
import torch.nn as nn
from math import sin, cos, sqrt, atan2, radians
import copy
from sklearn.cluster import *
from sklearn.decomposition import PCA
from sklearn.metrics import silhouette_score
from sklearn_extra.cluster import KMedoids
import geopy.distance
from scipy.spatial.distance import *
import random
import numpy as np
import osmnx as ox
import networkx as nx
from sklearn.neighbors import KDTree
import folium
import matplotlib.pyplot as plt

import python.data as data
import python.display as dp
import python.voxels as voxel
import python.metric as metric
import python.clustering as cl
import python.RNN as RNN
#import python.learning as learning
#from python.NN import *

project_folder = "monresovelo"

display = False
save_dict_voxels_pathfinding_clustered = False
new_voxels_clustering = True
save_kmeans_voxels = False

In [49]:
with open("files/"+project_folder+"/data/observations.df",'rb') as infile:
    df_simplified = pickle.load(infile)
    df_simplified = pd.DataFrame(df_simplified, dtype=object)
tab_routes_voxels_simplified, dict_voxels_simplified = voxel.create_dict_vox(df_simplified, df_simplified.iloc[0]["route_num"], df_simplified.iloc[-1]["route_num"])
tab_routes_voxels_simplified_global = voxel.get_tab_routes_voxels_global(dict_voxels_simplified, df_simplified.iloc[-1]["route_num"], df_simplified.iloc[0]["route_num"])

tab = []
for key in dict_voxels_simplified:
    vox_str = key.split(";")
    vox_int = [int(vox_str[0]), int(vox_str[1])]
    vox_pos = voxel.get_voxel_points(vox_int, 0)
    tab.append([vox_pos[0][0], vox_pos[0][1], dict_voxels_simplified[key]["cyclability_coeff"]])

df = pd.DataFrame(tab, columns=["lat", "lon", "Cyclability coefficient"])
fig = px.scatter_mapbox(df, lat="lat", lon="lon",  color="Cyclability coefficient", size="Cyclability coefficient", zoom=11)
fig.show()

KeyboardInterrupt: 

## Map Matching

In [2]:
data.mapmatching("files/"+project_folder+"/data/observations.df", 
                 "files/"+project_folder+"/data/observations_matched.df", 0)

In [3]:
data.simplify_gps("files/"+project_folder+"/data/observations_matched.df", 
                  "files/"+project_folder+"/data/observations_matched_simplified.df", 0)

In [4]:
data.simplify_gps("files/"+project_folder+"/data/osmnx_pathfinding.df", 
                  "files/"+project_folder+"/data/osmnx_pathfinding_simplified.df", 0)

## Pathfinding

In [5]:
data.pathfinding_mapbox("files/"+project_folder+"/data/observations_matched_simplified.df", 
                        "files/"+project_folder+"/data/mapbox_pathfinding_cycling.df", 0)

## Distance

In [6]:
data.compute_distance("files/"+project_folder+"/data/observations_matched_simplified.df",
                      "files/"+project_folder+"/distances/distances_observations.tab")
data.compute_distance("files/"+project_folder+"/data/mapbox_pathfinding_cycling.df",
                      "files/"+project_folder+"/distances/distances_mapbox.tab")
data.compute_distance("files/"+project_folder+"/data/osmnx_pathfinding_simplified.df", 
                      "files/"+project_folder+"/distances/distances_osmnx.tab")

# Data Loading

In [7]:
with open("files/"+project_folder+"/data/observations_matched_simplified.df",'rb') as infile:
    df_s = pickle.load(infile)
    df_simplified = pd.DataFrame(columns=["lat", "lon", "route_num"])
    for i in range(df_s.iloc[0]["route_num"], df_s.iloc[-1]["route_num"]+1):
        df_temp = df_s[df_s["route_num"]==i]
        df_temp["route_num"] = i-1
        df_simplified = df_simplified.append(df_temp)
tab_routes_voxels_simplified, dict_voxels_simplified = voxel.create_dict_vox(df_simplified, df_simplified.iloc[0]["route_num"], df_simplified.iloc[-1]["route_num"])
tab_routes_voxels_simplified_global = voxel.get_tab_routes_voxels_global(dict_voxels_simplified, df_simplified.iloc[-1]["route_num"], df_simplified.iloc[0]["route_num"])


with open("files/"+project_folder+"/data/osmnx_pathfinding_simplified.df",'rb') as infile:
    df_p = pickle.load(infile) 
    df_pathfinding = pd.DataFrame(columns=["lat", "lon", "route_num"])
    for i in range(df_p.iloc[0]["route_num"], df_p.iloc[-1]["route_num"]+1):
        df_temp = df_p[df_p["route_num"]==i]
        df_temp["route_num"] = i-1
        df_pathfinding = df_pathfinding.append(df_temp)
tab_routes_voxels_pathfinding, dict_voxels_pathfinding = voxel.create_dict_vox(df_pathfinding, df_pathfinding.iloc[0]["route_num"], df_pathfinding.iloc[-1]["route_num"])
tab_routes_voxels_pathfinding_global = voxel.get_tab_routes_voxels_global(dict_voxels_pathfinding, df_pathfinding.iloc[-1]["route_num"], df_pathfinding.iloc[0]["route_num"])


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [8]:
print(len(tab_routes_voxels_simplified))

899


## Piste cyclable

In [9]:
#df_bikepath = data.load_bikepath('datas/pvo_patrimoine_voirie.json')
with open("files/"+project_folder+"/data/onlylyon_bikepath_fusioned.df", "rb") as outfile:
    df_bikepath = pickle.load(outfile)
    df_bikepath = pd.DataFrame(df_bikepath, dtype=object)
      
_, dict_voxels_bikepath = voxel.create_dict_vox(df_bikepath, df_bikepath.iloc[0]["route_num"], df_bikepath.iloc[-1]["route_num"], True)


In [10]:
def bikepath_fusion(df_bikepath):
    verbose = False

    n_route = 0
    n_route_next = n_route+1
    n = n_route
    nb_change = 0
    while(n_route < df_bikepath.iloc[-1]["route_num"]):
        p1 = df_bikepath[df_bikepath["route_num"]==n_route].values.tolist()[-1][:2]
        p2 = df_bikepath[df_bikepath["route_num"]==n_route_next].values.tolist()[0][:2]
        v1 = voxel.find_voxel_int(p1)
        v2 = voxel.find_voxel_int(p2)
        if(v1 == v2):
            tab_changes = [n_route]
            while(v1 == v2):
                tab_changes.append(n_route_next)
                n_route_next += 1
                n_route += 1
                p1 = df_bikepath[df_bikepath["route_num"]==n_route].values.tolist()[-1][:2]
                p2 = df_bikepath[df_bikepath["route_num"]==n_route_next].values.tolist()[0][:2]
                v1 = voxel.find_voxel_int(p1)
                v2 = voxel.find_voxel_int(p2)
            for i in range(len(tab_changes)):
                df_bikepath = df_bikepath.replace({"route_num": tab_changes[i]}, n)
                if(verbose):
                    print(tab_changes[i], "->", n, "equals")
                if(i != 0):
                    nb_change += 1
            n+=1
            n_route += 1
        else:
            df_bikepath = df_bikepath.replace({"route_num": n_route}, n)
            if(verbose):
                print(n_route, "->", n)
            n_route += 1
            n += 1
        n_route_next += 1
    df_bikepath = df_bikepath.replace({"route_num": df_bikepath.iloc[-1]["route_num"]}, n)
    print(nb_change, "changes")
    return df_bikepath
    
#df_bikepath_fusioned = bikepath_fusion(df_bikepath)

In [11]:
def bikepath_fusion_2(df_bikepath):
    df_bikepath_fusioned = pd.DataFrame()
    route_num_fusioned = 0
    nb_changes = 0
    for i in range(int(df_bikepath.iloc[-1]["route_num"])):
        if(len(df_bikepath[df_bikepath["route_num"]==i]) > 0):
            print(i)
            for j in range(i, int(df_bikepath.iloc[-1]["route_num"])):
                if(len(df_bikepath[df_bikepath["route_num"]==j]) > 0):
                    p1 = df_bikepath[df_bikepath["route_num"]==i].values.tolist()[-1][:2]
                    p2 = df_bikepath[df_bikepath["route_num"]==j].values.tolist()[0][:2]
                    v1 = voxel.find_voxel_int(p1)
                    v2 = voxel.find_voxel_int(p2)
                    if(v1 == v2):
                        nb_changes += 1
                        df_bikepath = df_bikepath.replace({"route_num": j}, i)
                        
            df_temp = df_bikepath[df_bikepath["route_num"]==i]
            df_temp["route_num"] = route_num_fusioned
            route_num_fusioned += 1
            df_bikepath_fusioned = df_bikepath_fusioned.append(df_temp)            
    print(nb_changes, "changes")
    return df_bikepath_fusioned

#df_bikepath_fusioned = bikepath_fusion_2(df_bikepath)

# Test voxels

In [12]:
if(display):
    with open("files/"+project_folder+"/data/observations_matched_simplified.df",'rb') as infile:
            df_simplified = pickle.load(infile)

    nb_routes = 2
    min_routes = 2
    df_simplified["type"] = 0
    df_display = df_simplified[(df_simplified["route_num"]<=nb_routes)]
    print(df_display)
    _, dict_voxels = voxel.create_dict_vox(df_display, 1, nb_routes)
    tab_vox = voxel.get_voxels_with_min_routes(dict_voxels, min_routes)
    print(len(tab_vox)/4)

    df = pd.DataFrame(tab_vox, columns=["lat", "lon", "route_num", "type"])
    df_display = df_display.append(df)
    dp.display(df_display, color="type")    
    print(tab_vox)
    print(df)


# Heat Maps

In [13]:
if(display):     
    tab = []
    for key in dict_voxels_simplified:
        vox_str = key.split(";")
        vox_int = [int(vox_str[0]), int(vox_str[1])]
        vox_pos = voxel.get_voxel_points(vox_int, 0)
        tab.append([vox_pos[0][0], vox_pos[0][1], dict_voxels_simplified[key]["cyclability_coeff"]])

    df = pd.DataFrame(tab, columns=["lat", "lon", "Cyclability coefficient"])
    fig = px.scatter_mapbox(df, lat="lat", lon="lon",  color="Cyclability coefficient", size="Cyclability coefficient", zoom=11)
    fig.show()
    #fig.write_image("images/heatmap_simplified.png")

In [14]:
if(display):
    tab = []
    for key in dict_voxels_pathfinding:
        vox_str = key.split(";")
        vox_int = [int(vox_str[0]), int(vox_str[1])]
        vox_pos = voxel.get_voxel_points(vox_int, 0)
        tab.append([vox_pos[0][0], vox_pos[0][1], dict_voxels_pathfinding[key]["cyclability_coeff"]])

    df = pd.DataFrame(tab, columns=["lat", "lon", "Cyclability coefficient"])
    fig = px.scatter_mapbox(df, lat="lat", lon="lon",  color="Cyclability coefficient", size="Cyclability coefficient", zoom=11)
    fig.show()
    #fig.write_image("images/heatmap_mapbox.png")

In [15]:
if(display):
    tab_routes_voxels_common = []
    dict_voxels_common = {}
    tab = []

    for i in range(len(tab_routes_voxels_pathfinding_global)):
        tab_routes_voxels_common.append(list(set(tab_routes_voxels_pathfinding_global[i]).intersection(set(tab_routes_voxels_simplified_global[i]))))

    for i in range(len(tab_routes_voxels_common)):
        for key in tab_routes_voxels_common[i]:
            if key not in dict_voxels_common:
                dict_voxels_common[key] = [i]
            else:
                dict_voxels_common[key].append(i)

    for key in dict_voxels_common:
        print(key)
        tab_routes = dict_voxels_common[key]
        vox_str = key.split(";")
        vox_int = [int(vox_str[0]), int(vox_str[1])]
        vox_pos = voxel.get_voxel_points(vox_int, 0)
        if(len(tab_routes) >= 0):
            tab.append([vox_pos[0][0], vox_pos[0][1], len(tab_routes)])

    df = pd.DataFrame(tab, columns=["lat", "lon", "value"])
    #fig = px.scatter_mapbox(df, lat="lat", lon="lon",  color="value", size="value", zoom=10)
    #fig.show()


In [16]:
if(display):
    tab = []
    for key in dict_voxels_pathfinding:
        tab_routes = dict_voxels_pathfinding[key]["tab_routes_starting"]
        vox_str = key.split(";")
        vox_int = [int(vox_str[0]), int(vox_str[1])]
        vox_pos =voxel. get_voxel_points(vox_int, 0)
        if(len(tab_routes) >= 0):
            tab.append([vox_pos[0][0], vox_pos[0][1], len(tab_routes)])

    df = pd.DataFrame(tab, columns=["lat", "lon", "value"])
    fig = px.scatter_mapbox(df, lat="lat", lon="lon",  color="value", size="value", zoom=10)
    fig.show()

# Graphes distances

In [17]:
if(display):
    with open("files/"+project_folder+"/distances/distances_observations.tab",'rb') as infile:
        tab_distances = pickle.load(infile)    

    with open("files/"+project_folder+"/distances/distances_mapbox.tab",'rb') as infile:
        tab_distances_pf = pickle.load(infile)   

    print(len(tab_distances_pf))

    fig = go.Figure()
    # Create and style traces
    fig.add_trace(go.Scatter(y=tab_distances, name='Distance réel',
                             line=dict(color='firebrick', width=4)))
    fig.add_trace(go.Scatter(y=tab_distances_pf, name = 'Distance plus court chemin',
                             line=dict(color='royalblue', width=4)))

    # Here we modify the tickangle of the xaxis, resulting in rotated labels.
    fig.update_layout(barmode='group', xaxis_tickangle=-45)
    fig.show()
    #fig.write_image("images/graph_distance.png")

In [18]:
if(display):

    tab_distances_relatives = []
    for i in range(len(tab_distances)):
        tab_distances_relatives.append(tab_distances[i]-tab_distances_pf[i])

    fig = go.Figure()
    # Create and style traces
    fig.add_trace(go.Scatter(y=tab_distances_relatives, name='Distance réel',
                             line=dict(width=4)))

    # Here we modify the tickangle of the xaxis, resulting in rotated labels.
    fig.update_layout(barmode='group', xaxis_tickangle=-45)
    fig.show()
    #fig.write_image("images/graph_diff_distance.png")

# CDF

In [19]:
if(display):
    tab_distances_relatives.sort()
    tab_display = []
    tab_axis = []
    dist_max = 0
    nb_routes = 0
    for dist in tab_distances_relatives:
        if(dist > dist_max):
            tab_display.append(nb_routes/len(tab_distances_relatives))
            tab_axis.append(dist_max)
            dist_max += 0.2
        nb_routes += 1
    fig = go.Figure()
    # Create and style traces
    fig.add_trace(go.Scatter(y=tab_display, x=tab_axis, name='Distance réel',
                             line=dict(width=4)))


    # Here we modify the tickangle of the xaxis, resulting in rotated labels.
    fig.update_layout(barmode='group', xaxis_tickangle=-45)
    fig.show()
    #fig.write_image("images/cdf.png")

# Matrice de distance

In [20]:
distance_matrix = np.empty([df_simplified.iloc[-1]["route_num"], df_simplified.iloc[-1]["route_num"]])

pca = PCA(n_components=1)

for i in range(df_simplified.iloc[-1]["route_num"]):
    #print(i)
    for j in range(i, df_simplified.iloc[-1]["route_num"]):
        coeff = metric.get_distance_voxels(i+1, j+1, tab_routes_voxels_simplified_global)
        #coeff = metric.get_distance_euclidian(df_cluster[df_cluster["route_num"]==i+1], df_cluster[df_cluster["route_num"]==j+1], pca)
        #coeff = metric.get_distance_hausdorff(df_cluster[df_cluster["route_num"]==i+1], df_cluster[df_cluster["route_num"]==j+1])
        distance_matrix[i][j] = coeff[0]
        distance_matrix[j][i] = coeff[1]

print("number of non-zero in diag : ", len(np.nonzero(np.diagonal(distance_matrix))[0]))

number of non-zero in diag :  0


In [21]:
if(display):
    num_vox = 0
    tab_vox = []
    num_route = 9
    df_display = df_simplified[df_simplified["route_num"]==num_route]
    df_display["type"] = 0
    for v in tab_routes_voxels_simplified[num_route]:
        vox_str = v.split(";")
        vox_int = [int(vox_str[0]), int(vox_str[1])]
        tab_vox += voxel.get_voxel_points(vox_int, num_vox)
        num_vox -= 1

    df = pd.DataFrame(tab_vox, columns=["lat", "lon", "route_num", "type"])
    df_display = df_display.append(df)
    dp.display(df_display, color="type")   

# Graphes clustering

In [22]:
start_variable = 0.1
stop_variable = 0.9
step_variable = 0.1

tab_nb_clusters = []
tab_noise = []
tab_nb_mini_clusters = []
tab_nb_big_clusters = []
tab_mean = []
tab_silhouette = []

for variable in np.arange(start_variable, stop_variable, step_variable):
    c = DBSCAN(eps=variable, min_samples=5, metric='precomputed')
    clusters = cl.cluster(distance_matrix, c)
    dict_cluster = cl.tab_clusters_to_dict(clusters)
    silhouette = silhouette_score(distance_matrix, clusters, metric="precomputed")

    nb_mini_clusters= 0
    nb_big_clusters = 0
    mean = 0
    for i in range(len(dict_cluster)-1):
        if(len(dict_cluster[i]) > 15):
            nb_big_clusters += 1
        elif(len(dict_cluster[i]) == 1):
            nb_mini_clusters += 1
        mean+=len(dict_cluster[i])
        
    tab_nb_clusters.append(len(dict_cluster)-1)
    tab_noise.append(clusters.tolist().count(-1))
    tab_nb_mini_clusters.append(nb_mini_clusters)
    tab_nb_big_clusters.append(nb_big_clusters)
    tab_mean.append(mean/(len(dict_cluster)-1))
    tab_silhouette.append(silhouette)
    


In [23]:
if(display):    
    fig = go.Figure()

    fig.add_trace(go.Scatter(x=np.arange(start_variable, stop_variable, step_variable), 
                             y=tab_nb_mini_clusters, name='Mini clusters',
                             line=dict(color='firebrick', width=4)))
    fig.add_trace(go.Scatter(x=np.arange(start_variable, stop_variable, step_variable), 
                             y=tab_nb_big_clusters, name = 'Big clusters',
                             line=dict(color='royalblue', width=4)))


    fig.add_shape(type="line", x0=0, y0=11, x1=0.4, y1=11,
                line=dict(color="LightSeaGreen", width=4, dash="dot"))
    fig.add_shape(type="line", x0=0.4, y0=0, x1=0.4, y1=11,
                line=dict(color="LightSeaGreen", width=4, dash="dot"))

    fig.add_shape(type="line", x0=0.4, y0=11, x1=0.4, y1=15,
                line=dict(color="LightSeaGreen", width=4, dash="dot"))
    fig.add_shape(type="line", x0=0, y0=15, x1=0.4, y1=15,
                line=dict(color="LightSeaGreen", width=4, dash="dot"))

    # Here we modify the tickangle of the xaxis, resulting in rotated labels.
    fig.update_layout(barmode='group', xaxis_tickangle=-45)
    fig.show()
    #fig.write_image("images/clusters_dbscan.png")

In [24]:
if(display):       
    fig = go.Figure()

    fig.add_trace(go.Scatter(x=np.arange(start_variable, stop_variable, step_variable), 
                             y=tab_nb_clusters, name='Mean size',
                             line=dict(color='firebrick', width=4)))

    fig.add_shape(type="line", x0=0, y0=74, x1=0.4, y1=74,
                line=dict(color="LightSeaGreen", width=4, dash="dot"))
    fig.add_shape(type="line", x0=0.4, y0=0, x1=0.4, y1=74,
                line=dict(color="LightSeaGreen", width=4, dash="dot"))


    # Here we modify the tickangle of the xaxis, resulting in rotated labels.
    fig.update_layout(barmode='group', xaxis_tickangle=-45)
    fig.show()
    #fig.write_image("images/mean_dbscan.png")

In [25]:
if(display):       
    fig = go.Figure()

    fig.add_trace(go.Scatter(x=np.arange(start_variable, stop_variable, step_variable), 
                             y=tab_silhouette, name='Silhouette',
                             line=dict(color='royalblue', width=4)))

    fig.add_shape(type="line", x0=0, y0=0.3898887, x1=0.4, y1=0.3898887,
                line=dict(color="LightSeaGreen", width=4, dash="dot"))
    fig.add_shape(type="line", x0=0.4, y0=0, x1=0.4, y1=0.3898887,
                line=dict(color="LightSeaGreen", width=4, dash="dot"))


    # Here we modify the tickangle of the xaxis, resulting in rotated labels.
    fig.update_layout(barmode='group', xaxis_tickangle=-45)
    fig.show()
    #fig.write_image("images/silhouette_dbscan.png")

# Affichage/Propriétés des clusters

In [26]:
def cluster_properties(dict_cl, X, cl, metric=None):
    mean = 0
    mini_clusters = []
    big_clusters = []
    for i in dict_cl:
        if(i != -1):
            if(len(dict_cl[i]) > len(X)/60):
                big_clusters.append(i)
            elif(len(dict_cl[i]) <= len(X)/500):
                mini_clusters.append(i)
            mean+=len(dict_cl[i])

    if(metric != None):
        silhouette = silhouette_score(X, cl, metric=metric)
    else:
        silhouette = silhouette_score(X, cl)

    print(len(big_clusters), "big clusters:", big_clusters)
    print(len(mini_clusters), "mini clusters :", mini_clusters)
    print()
    print("mean size :", mean/(len(dict_cl)-1))
    print()
    print("silhouette score :", silhouette)

# Clustering

In [27]:
pca = PCA(n_components=750)
#distance_matrix_reduced = pca.fit_transform(distance_matrix)

dbscan = DBSCAN(eps=0.4, min_samples=5, metric='precomputed')
optics = OPTICS(min_samples=10, metric='precomputed')
kmedoids = KMedoids(n_clusters=75, metric=metric.get_distance_voxels_symetric, random_state=0)

clustering = dbscan
X = distance_matrix
if(clustering == kmedoids):
    X = np.asarray(np.expand_dims(np.arange(1, 900, 1), axis=1))

clusters = cl.cluster(X, clustering)
dict_cluster = cl.tab_clusters_to_dict(clusters)

print(len(dict_cluster)-1, "clusters")
print("noise:", clusters.tolist().count(-1))
print()
cluster_properties(dict_cluster, distance_matrix, clusters, "precomputed")

74 clusters
noise: 81

16 big clusters: [0, 1, 4, 46, 18, 30, 12, 16, 19, 24, 45, 29, 33, 35, 36, 62]
11 mini clusters : [2, 5, 6, 38, 48, 53, 55, 56, 60, 64, 71]

mean size : 11.04054054054054

silhouette score : 0.39044884383673584


In [28]:
if(display):
    nb_cluster = 1
    print(len(dict_cluster[nb_cluster]))
    dp.display_routes(df_simplified, dict_cluster[nb_cluster])
    #dp.display_cluster_heatmap(df_simplified, dict_cluster[nb_cluster])

# Voxel Clustering

In [29]:
clusters_used = []
for key in dict_voxels_bikepath:
    if key in dict_voxels_pathfinding:
        if(dict_voxels_bikepath[key]["cluster"] not in clusters_used):
            dict_voxels_pathfinding[key]["cluster"] = len(clusters_used)
            clusters_used.append(dict_voxels_bikepath[key]["cluster"])
        else:
            dict_voxels_pathfinding[key]["cluster"] = clusters_used.index(dict_voxels_bikepath[key]["cluster"])

In [30]:
X = []
for key in dict_voxels_pathfinding:
    if("cluster" not in dict_voxels_pathfinding[key]):
        vox_str = key.split(";")
        vox_int = [int(vox_str[0]), int(vox_str[1])]
        tab_points = voxel.get_voxel_points(vox_int)
        X.append([tab_points[0][0], tab_points[0][1], dict_voxels_pathfinding[key]["cyclability_coeff"]])
print(len(X))

5554


In [31]:


if(new_voxels_clustering):  
    kmeans = KMeans(n_clusters=len(X)//10, random_state=0)
    dbscan = DBSCAN(eps=0.5, min_samples=5)
    clusters = cl.cluster(X, kmeans)
else:
    with open("files/"+project_folder+"/data/kmeans_voxels_osmnx.sk",'rb') as infile:
        kmeans = pickle.load(infile)
    clusters = kmeans.labels_
        
dict_cluster_voxel = cl.tab_clusters_to_dict(clusters)

print(len(dict_cluster_voxel)-1, "clusters")
print("noise:", clusters.tolist().count(-1))
print()
#cluster_properties(dict_cluster_voxel, X, clusters)


554 clusters
noise: 0



In [32]:
if(save_kmeans_voxels):
    print("Saving kmeans_voxels_osmnx.sk...")
    with open("files/"+project_folder+"/data/kmeans_voxels_osmnx.sk",'wb') as outfile:
        pickle.dump(kmeans, outfile)

In [33]:
if(display):

    num_cluster = 5

    tab = []
    for vox in dict_cluster_voxel[num_cluster]:
        tab.append(X[vox])

    df = pd.DataFrame(tab, columns=["lat", "lon", "value"])
    fig = px.scatter_mapbox(df, lat="lat", lon="lon",  color="value", size="value", zoom=10)
    fig.show()
    

In [34]:
dict_voxels_pathfinding_clustered = deepcopy(dict_voxels_pathfinding)
for c in dict_cluster_voxel:
    for vox in dict_cluster_voxel[c]:
        v = X[vox]
        vox_int = voxel.find_voxel_int([v[0], v[1]])
        key = str(vox_int[0])+";"+str(vox_int[1])
        dict_voxels_pathfinding_clustered[key]["cluster"] = len(clusters_used)+c
if(save_dict_voxels_pathfinding_clustered):
    print("Saving voxels_clustered_osmnx.dict...")
    with open("files/"+project_folder+"/data/voxels_clustered_osmnx.dict",'wb') as outfile:
        pickle.dump(dict_voxels_pathfinding_clustered, outfile)

In [35]:
print(len(dict_voxels_pathfinding_clustered))

11711


# Osmnx

In [36]:
def generate_graphs():
    # creating a graph by using a point
    lyon = (45.74846, 4.84671)
    st_etienne = (45.4333, 4.4)
    G_lyon = ox.graph_from_point(lyon, distance=7500)
    G_stetienne = ox.graph_from_point(st_etienne, distance=7500)
    G = G_lyon
    # quick plot
    #ox.plot_graph(G, fig_height=20, fig_width=20, edge_color='black')
    return G_lyon, G_stetienne
#G_lyon, G_stetienne = generate_graphs()

In [37]:
with open("files/"+project_folder+"/network_graphs/lyon.ox", "rb") as infile:
    G_lyon = pickle.load(infile)
with open("files/"+project_folder+"/network_graphs/st_etienne.ox", "rb") as infile:
    G_stetienne = pickle.load(infile)
    
nodes_lyon, _ = ox.graph_to_gdfs(G_lyon)
tree_lyon = KDTree(nodes_lyon[['y', 'x']], metric='euclidean')

nodes_stetienne, _ = ox.graph_to_gdfs(G_stetienne)
tree_stetienne = KDTree(nodes_stetienne[['y', 'x']], metric='euclidean')

G = G_lyon
tree = tree_lyon

In [38]:
df_temp = df_simplified[df_simplified["route_num"]==i+1]
d_point = [df_temp.iloc[0]["lat"], df_temp.iloc[0]["lon"]]
f_point = [df_temp.iloc[-1]["lat"], df_temp.iloc[-1]["lon"]]

In [39]:
if(display):
    route = data.pathfind_route_osmnx(d_point, f_point, tree, G, i)
    ox.plot_route_folium(G, route, route_color='green')

In [40]:
with open("files/"+project_folder+"/data/osmnx_pathfinding.df", "rb") as infile:
    df_pathfinding_osmnx = pickle.load(infile)

for i in range(df_pathfinding_osmnx.iloc[-1]["route_num"], df_simplified.iloc[-1]["route_num"]):
    df_temp = df_simplified[df_simplified["route_num"]==i+1]
    d_point = [df_temp.iloc[0]["lat"], df_temp.iloc[0]["lon"]]
    f_point = [df_temp.iloc[-1]["lat"], df_temp.iloc[-1]["lon"]]
    if(d_point[0] < 45.5):
        tree = tree_stetienne
        G = G_stetienne
    else:
        tree = tree_lyon
        G = G_lyon
    route = data.pathfind_route_osmnx(d_point, f_point, tree, G, i)
    route_coord = [[G.nodes[x]["y"], G.nodes[x]["x"]] for x in route]
    route_coord = [x + [i+1] for x in route_coord]
    df_pathfinding_osmnx = df_pathfinding_osmnx.append(pd.DataFrame(route_coord, columns=["lat", "lon", "route_num"]))
    

# Validation

In [41]:
import python.validation as validation

with open("files/"+project_folder+"/data/dbscan_pathfinding_osmnx.tab",'rb') as infile:
    tab_clusters = pickle.load(infile)
with open("files/"+project_folder+"/data/voxels_clustered_osmnx.dict",'rb') as infile:
    dict_voxels = pickle.load(infile)
with open("files/"+project_folder+"/data/kmeans_voxels_osmnx.sk",'rb') as infile:
    kmeans = pickle.load(infile)
    
df = df_pathfinding
    
size_data = 1
hidden_size = 256
num_layers = 2
voxels_frequency = 4

network = RNN.RNN_LSTM(size_data, max(tab_clusters)+1, hidden_size, num_layers)
network.load_state_dict(torch.load("files/"+project_folder+"/neural_networks/network_osmnx.pt"))
network.eval()

nb_good_predict = 0
nb_predict = 0

deviation = 0 #5e-3

tab_predict = []

i=675

for i in range(10): #len(tab_clusters)):
    if(tab_clusters[i] != -1 and i != 675):
        #print(i)
        df_temp = df[df["route_num"]==i]
        d_point = [df_temp.iloc[0]["lat"], df_temp.iloc[0]["lon"]]
        f_point = [df_temp.iloc[-1]["lat"], df_temp.iloc[-1]["lon"]]
        rand = random.uniform(-deviation, deviation)
        d_point[0] += rand
        rand = random.uniform(-deviation, deviation)
        d_point[1] += rand
        rand = random.uniform(-deviation, deviation)
        f_point[0] += rand
        rand = random.uniform(-deviation, deviation)
        f_point[1] += rand
        
        if(d_point[0] < 45.5):
            tree = tree_stetienne
            G = G_stetienne
        else:
            tree = tree_lyon
            G = G_lyon
        df_route, cl, nb_new_cluster = validation.find_cluster(d_point, f_point, network, voxels_frequency, df_pathfinding, dict_voxels, 
                                     kmeans, tree, G, False)
        if(cl == tab_clusters[i]):
            nb_good_predict += 1
            #print("good predict")
        nb_predict += 1
if(nb_predict > 0):
    tab_predict.append(nb_good_predict/nb_predict)

tot_predict = 0
for predict in tab_predict:
    tot_predict += predict
print("ratio:", tot_predict/len(tab_predict))


ratio: 1.0


In [42]:
if(display):
    dp.display(df_simplified[df_simplified["route_num"]==i]) 
    dp.display_cluster_heatmap(df_simplified, dict_cluster[cl])
    dp.display(df_route)

# Graph modification

In [43]:
#G_stetienne
G = G_lyon
with open("files/"+project_folder+"/onlylyon_bikepath_fusioned.df", "rb") as outfile:
    df_osm_bikepath = pickle.load(outfile)
    df_osm_bikepath = pd.DataFrame(df_osm_bikepath, dtype=object)
      
_, dict_voxels_osm_bikepath = voxel.create_dict_vox(df_osm_bikepath, df_osm_bikepath.iloc[0]["route_num"], df_osm_bikepath.iloc[-1]["route_num"], True)

for v in G:
    for v_n in G[v]:
        df_line = pd.DataFrame([[G.nodes[v]['y'], G.nodes[v]['x'], 1], [G.nodes[v_n]['y'], G.nodes[v_n]['x'], 1]], columns=["lat", "lon", "route_num"])
        tab_voxels, _ = voxel.create_dict_vox(df_line, 1, 1)
        nb_vox_found = 0
        tot_coeff = 0
        for vox in tab_voxels[0]:
            if vox in dict_voxels_osm_bikepath:
                G[v][v_n][0]['length'] -= G[v][v_n][0]['length']*0.15
                break

FileNotFoundError: [Errno 2] No such file or directory: 'files/veleval/onlylyon_bikepath_fusioned.df'

In [44]:
df_simplified

Unnamed: 0,lat,lon,route_num
0,45.7598,4.87564,0
1,45.7595,4.87557,0
2,45.7594,4.87589,0
3,45.7583,4.87658,0
4,45.7561,4.87612,0
...,...,...,...
61,45.7785,4.80798,898
62,45.7805,4.80921,898
63,45.7806,4.80916,898
64,45.7827,4.81093,898
