### Indice de risque
Pour chaque point donné, la fonction renvoie une moyenne en km. Plus cette valeur est faible, plus la distance moyenne des n points est faible, donc plus le point est dangereux.

In [8]:
import pandas as pd
from numpy import mean
from math import radians, cos, sin, asin, sqrt
def dist(point1: tuple, point2: tuple) -> float:
    """
    Replicating the same formula as mentioned in Wikipedia.

    - point1: tuple. First element is the latitude, second is longitude.

    - point2: tuple. First element is the latitude, second is longitude.
    """
    # convert decimal degrees to radians 
    lat1, lon1, lat2, lon2 = map(radians, [point1[0], point1[1], point2[0], point2[1]])
    # haversine formula 
    dlon = lon2 - lon1
    dlat = lat2 - lat1 
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a)) 
    # Radius of earth in kilometers is 6371
    km = 6371* c
    return km

In [9]:
# Exemple Haversine

lat1, lon1 = 48.8619573, 2.3311767
lat2, lon2 = 48.847950, 2.382987

dist((lat1, lon1), (lat2, lon2))


4.098099467429279

In [10]:
def risk_index(point: tuple, coords: list, n: int) -> float:
    """
    Compute the distance between one point and a list of other points.
    Take the distance of the n closest point and return the mean (in km).

    - point: tuple. First element is the latitude, second is the longitude.

    - coords: list. List of coordinates. Each coordinate must be a tuple, first element is the latitude
    and second element is the longitude.

    - n: number of closest points which will be used to compute the mean.
    """
    return mean(sorted([dist((point[0], point[1]), (coord[0], coord[1])) for coord in coords])[:n])

In [11]:
# Example of compute_mean_dist_of_n_closest_points

lat1, lon1 = 48.8619573, 2.3311767
coords = [(48.847950, 2.382987), (49.847950, 3.382987), (50.847950, 4.382987), (51.847950, 5.382987)]
compute_mean_dist_of_n_closest_points((lat1, lon1), coords, 4)

199.8045055652134

In [12]:
# Example with a df

df = pd.read_csv('../clean_data/crado_velo_format.csv')[['lat', 'long']]

# Creating a list of coordinates from two pandas Series.
coords = [(el[0], el[1]) for el in zip(df['lat'].to_list(), df['long'].to_list())]

lat1, lon1 = 48.8619573, 2.3311767
compute_mean_dist_of_n_closest_points((lat1, lon1), coords, 3)

0.12451208183704875