In [1]:
import geopandas as gpd
import pandas as pd 
from shapely.ops import unary_union
import osmnx as ox
import os 
import warnings 
warnings.filterwarnings("ignore", category=DeprecationWarning)

CITIES = ['Lyon','Villeurbanne','Décines-Charpieu','Vaulx-En-Velin']
FOLDER_PATH = '../../../../data/rrochas/prediction_validation' 

def split_multi_polygon(gdf):
    '''
    From a gdf which contains some Multi-Polygons, split them as a Series of Polygon:
    '''
    gdf_multi_poly = gdf[gdf.geometry.type == 'MultiPolygon']
    gdf_poly = gdf[gdf.geometry.type != 'MultiPolygon']

    for idx,row in gdf_multi_poly.iterrows():
        list_of_poly = list(row.geometry.geoms)
        tmp_list = []
        for poly in list_of_poly :
            dict_row = {key:value for key,value in row.items()}
            dict_row.update({'geometry' : poly,'element_type':row.name[0],'osmid':row.name[1]})
            tmp_list.append(dict_row)
        splitted_multi_poly = gpd.GeoDataFrame(tmp_list)
        splitted_multi_poly = splitted_multi_poly.set_index(['element_type','osmid'])

        #print('list_of_poly: ',len(list_of_poly))
        #print(list_of_poly)
        #print('splitted_multi_poly: ',splitted_multi_poly.shape)
        #print(splitted_multi_poly) 
        #splitted_multi_poly['geometry'] = list_of_poly
        
        # Concat to the init gdf:
        gdf_poly = gpd.GeoDataFrame(pd.concat([gdf_poly,splitted_multi_poly]),crs = gdf.crs)

    return(gdf_poly)


def group_adjacent_polygon(gdf):
    '''
    Créer une fonction qui regroupe les polygon adjacent pour en crée un seul. 
    Règle le problème où certaines géoémtrie comme les parcs sont subdivisé, 
    alors qu'on veut la géométrie du parc entier.
    '''
    gdf = split_multi_polygon(gdf)
    gdf = gdf.reset_index()  # set index from 0 to n 
    gdf = gdf.drop(columns=['element_type'])
    # Identifier les polygones en contact
    contact_indices = []
    for i in range(len(gdf)): #pass through index 
        for j in range(i + 1, len(gdf)):
            # Two polygon might intersect 
            try:
                condition = (gdf.geometry.loc[i].intersects(gdf.geometry.loc[j]))  #.values[0] #.values[0]
            # Tackle close polygon or point + polygon ..
            except: 
                condition = (gdf.geometry.loc[i].touches(gdf.geometry.loc[j])) 
            if condition:
                contact_indices.append(set([i,j]))


    # Regrouper l'ensemble des contact indices qui ont un élément en commmun : 
    grouped_geometry = {}
    for nb_iter,(i,j) in enumerate(contact_indices):
        group_exists = False
        id_set_to_remove = []
        new_set = set([i,j]) 
        for k,grouped_geom in grouped_geometry.items():
            # Si i ou j est déjà dans un groupe, ajouter l'autre dans le groupe: 

            # Parfois jusqu'à 2 id à retirer
            if (i in grouped_geom) or (j in grouped_geom):
                new_set = list(set(grouped_geom)|set(new_set))
                id_set_to_remove.append(k)
                group_exists = True
                        
        # Si aucun groupe n'a été trouvé, on en crée un 
        if not group_exists:
            new_indice = len(grouped_geometry)
            grouped_geometry[new_indice] = [i,j]
        else:
            for id_set in id_set_to_remove:
                # remove
                grouped_geometry.pop(id_set)
            # reorganise
            grouped_geometry = {i: grouped_geometry[key] for i, key in enumerate(grouped_geometry)}
            # add new item:
            grouped_geometry[len(grouped_geometry)] = new_set

    # Pour chaque grouped_geometry, les regrouper (gdf_to_add), et supprimer les geométrie non regroupé de gdf:
    gdf_to_add = gpd.GeoDataFrame()
    for k,grouped_geom in grouped_geometry.items():
        '''gdf.loc[grouped_geom].geometry'''
        geometries_to_merge = gdf.geometry[grouped_geom] 
        merged_geom = unary_union(geometries_to_merge)
        gdf_to_add_i = gpd.GeoDataFrame(dict(geometry = [merged_geom]),crs = gdf.crs)
        gdf_to_add_i['name'] = gdf.loc[grouped_geom[0]]['name']
        gdf_to_add_i['osmid'] = gdf.loc[grouped_geom[0]]['osmid']
        gdf_to_add = gpd.GeoDataFrame(pd.concat([gdf_to_add,gdf_to_add_i]),crs = gdf.crs)

        # Supprime les un-grouped geometry :
        gdf = gdf.drop(grouped_geom)

    new_gdf = gpd.GeoDataFrame(pd.concat([gdf,gdf_to_add]), crs=gdf.crs)
    return(new_gdf)

def concat_gdf_through_cities(CITIES,tag_type,sub_tag,sports_tags):
    gdf = gpd.GeoDataFrame()
    for city in CITIES:
        place_name = f"{city}, France"
        tags = {tag_type: sub_tag}
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", category=DeprecationWarning)
            try : 
                gdf_i = ox.features_from_place(place_name, tags=tags)
            except:
                print(f'No {tag_type}-{sub_tag} within {place_name}')
                gdf_i = None
            if gdf_i is not None:    
                if sports_tags is not None:
                    gdf_i = gdf_i[gdf_i.sport.isin(sports_tags)]
                
                gdf = gpd.GeoDataFrame(pd.concat([gdf,gdf_i]), crs=gdf_i.crs)
    return gdf

def get_POIs_from_cities(CITIES,tag_type,sub_tag,sports_tags = None):
    gdf = concat_gdf_through_cities(CITIES,tag_type,sub_tag,sports_tags)
    gdf = gdf[['name','geometry']]
    gdf = group_adjacent_polygon(gdf)
    gdf['type'] = tag_type
    gdf['sub_type'] = sub_tag
    return(gdf)

# Get POIs 'Leisure'
'Leisure' contient les géométries (polygon/multipolygon) des `Parcs`et des `Stades`

### Génère les POIs parcs : 
Rcupère les POIs de parc, groupe les sous-geometrie qui constitue un parc, et retourne uniquement les grands parcs:

In [2]:
tag_type = 'leisure'
sub_tag = 'park'
gdf_park = get_POIs_from_cities(CITIES,tag_type,sub_tag,sports_tags= None)
gdf_park = gdf_park.to_crs('epsg:2154')
gdf_park = gdf_park[gdf_park.geometry.area > 1e4]
gdf_park = gdf_park.to_crs('epsg:4326')
gdf_park.head()


  return GeometryArray(data, crs=_get_common_crs(to_concat))
  return GeometryArray(data, crs=_get_common_crs(to_concat))
  return GeometryArray(data, crs=_get_common_crs(to_concat))
  return GeometryArray(data, crs=_get_common_crs(to_concat))
  return GeometryArray(data, crs=_get_common_crs(to_concat))


Unnamed: 0,osmid,name,geometry,type,sub_type
14,22664120,Parc de la Cerisaie,"POLYGON ((4.81750 45.77479, 4.81757 45.77471, ...",leisure,park
15,22727720,Parc Francis Popy,"POLYGON ((4.82349 45.77753, 4.82349 45.77766, ...",leisure,park
17,23269760,Parc Georges Bazin,"POLYGON ((4.88274 45.75170, 4.88274 45.75172, ...",leisure,park
22,24572931,Parc Chambovet,"POLYGON ((4.89546 45.74693, 4.89554 45.74802, ...",leisure,park
23,29176499,Parc des Hauteurs et jardins du Rosaire,"POLYGON ((4.82290 45.76210, 4.82285 45.76210, ...",leisure,park


### Get POIs stadium : 

In [3]:
tag_type = 'leisure'
sub_tag = 'stadium'
sports_tags = ['rugby', 'football','soccer', 'basketball']
gdf_stadium = get_POIs_from_cities(CITIES,tag_type,sub_tag,sports_tags=sports_tags)
gdf_stadium



No leisure-stadium within Vaulx-En-Velin, France


Unnamed: 0,osmid,name,geometry,type,sub_type
0,4585967,Matmut Stadium Gerland,"POLYGON ((4.83278 45.72508, 4.83287 45.72507, ...",leisure,stadium
1,85200814,Astroballe,"POLYGON ((4.90681 45.76680, 4.90699 45.76660, ...",leisure,stadium
2,353267337,Groupama Stadium,"POLYGON ((4.98181 45.76665, 4.98209 45.76665, ...",leisure,stadium


### Génère les POIs Centre commerciaux : 

In [4]:
tag_type = 'shop'

# Tackle mall
sub_tag = 'mall' 
gdf_mall = get_POIs_from_cities(CITIES,tag_type,sub_tag,sports_tags=None)

# Tackle supermarket
sub_tag = 'supermarket' 
gdf_supermarket = get_POIs_from_cities(CITIES,tag_type,sub_tag,sports_tags=None)
gdf_shop = gpd.GeoDataFrame(pd.concat([gdf_mall,gdf_supermarket]),crs = gdf_mall.crs)

# Keep only polygon : 
gdf_shop = gdf_shop[gdf_shop.geometry.geom_type != 'Point']

# Keep only polygon > 2000m2
gdf_shop = gdf_shop.to_crs('epsg:2154')
gdf_shop = gdf_shop[gdf_shop.geometry.area > 2000]
gdf_shop = gdf_shop.to_crs('epsg:4326')
gdf_shop.head()




Unnamed: 0,osmid,name,geometry,type,sub_type
3,440270633,Pôle de Commerces et de Loisirs Confluence,"POLYGON ((4.81908 45.74095, 4.81897 45.74082, ...",shop,mall
4,624026627,Centre commercial Champvert,"POLYGON ((4.79811 45.76262, 4.79811 45.76317, ...",shop,mall
5,300128,Grand Hôtel-Dieu,"POLYGON ((4.83567 45.75806, 4.83583 45.75803, ...",shop,mall
7,395804418,Centre commercial Balzac,"POLYGON ((4.98115 45.77212, 4.98116 45.77149, ...",shop,mall
13,116760770,,"POLYGON ((4.92659 45.76489, 4.92657 45.76509, ...",shop,mall


### Génère les POIs Ecoles et Université : 

In [5]:
tag_type = 'amenity'
sub_tag = 'university' 
sports_tags = None
gdf_university = get_POIs_from_cities(CITIES,tag_type,sub_tag,sports_tags=None)
display(gdf_university.head())



No amenity-university within Décines-Charpieu, France
No amenity-university within Vaulx-En-Velin, France


  return GeometryArray(data, crs=_get_common_crs(to_concat))
  return GeometryArray(data, crs=_get_common_crs(to_concat))
  return GeometryArray(data, crs=_get_common_crs(to_concat))
  return GeometryArray(data, crs=_get_common_crs(to_concat))
  return GeometryArray(data, crs=_get_common_crs(to_concat))
  return GeometryArray(data, crs=_get_common_crs(to_concat))


Unnamed: 0,osmid,name,geometry,type,sub_type
0,1614261813,Les studios Lumières,POINT (4.86228 45.74742),amenity,university
1,2392516856,Institut supérieur d'agriculture et d'agroalim...,POINT (4.83626 45.72801),amenity,university
2,3588518600,Université Professionnelle Internationale de Lyon,POINT (4.80590 45.76833),amenity,university
3,3588531207,IDRAC - École de Commerce et de Management,POINT (4.80591 45.76838),amenity,university
4,3588534986,CEFAM,POINT (4.80591 45.76846),amenity,university


### Génère les POIs Nightclub: 

In [6]:
tag_type = 'amenity'
sub_tag = 'nightclub' 
sports_tags = None
gdf_nightclub = get_POIs_from_cities(CITIES,tag_type,sub_tag,sports_tags=None)
display(gdf_nightclub.head())



No amenity-nightclub within Décines-Charpieu, France
No amenity-nightclub within Vaulx-En-Velin, France


Unnamed: 0,osmid,name,geometry,type,sub_type
0,246746973,Fiesta Loca,POINT (4.82653 45.76729),amenity,nightclub
1,248737175,Loft Club,POINT (4.84192 45.74851),amenity,nightclub
2,2062871143,Les Salons du NH,POINT (4.85894 45.73121),amenity,nightclub
3,2283833291,Vertige X,POINT (4.83621 45.76911),amenity,nightclub
4,2543375734,La Cour des Grands,POINT (4.81933 45.75378),amenity,nightclub


## Parking 
Notes : `Les Parc Relais sont contenues dans parking`

In [7]:
tag_type = 'amenity'
sub_tag = 'parking'  
sports_tags = None
gdf_parkings = get_POIs_from_cities(CITIES,tag_type,sub_tag,sports_tags=None)
gdf_parkings = gdf_parkings.to_crs('epsg:2154')
gdf_parkings = gdf_parkings[gdf_parkings.geometry.area > 5000]
gdf_parkings = gdf_parkings.to_crs('epsg:4326')
gdf_parkings.head()

  return GeometryArray(data, crs=_get_common_crs(to_concat))


Unnamed: 0,osmid,name,geometry,type,sub_type
83,40649383,,"POLYGON ((4.82936 45.72227, 4.82987 45.72185, ...",amenity,parking
84,44822697,,"POLYGON ((4.82769 45.72638, 4.82871 45.72641, ...",amenity,parking
103,82490523,Bellecour,"POLYGON ((4.83107 45.75857, 4.83103 45.75851, ...",amenity,parking
105,84527723,,"POLYGON ((4.82753 45.72885, 4.82755 45.72925, ...",amenity,parking
111,88806907,Parc Relais TCL Parilly,"POLYGON ((4.88538 45.72042, 4.88626 45.72099, ...",amenity,parking


# Autres: 
- 'car_pooling' -> 1 seul quais perrache 
- 'waiting_area' -> 1 seule gare perrache 
- 'music_venue' -> 2 endroit seulement vers croix paquet (Collision et Kraspek ? )
- 'theatre' -> Bien. Récupère des lieu comme: Transbo, Opera, Thetare romain de fourviere ...
- 'marketplace' -> Tout les marché alimentaire  + Puces, pas mal aussi 
- 'cinema'   -> Beaucoup de ciné, pas sur que ce soit utile 
- 'parking'  -> Contient également les park relais. Aucun moyen de les identifier, donc je ne considère seulement des parking qui soient suffisamment gros.
- 'social_facility -> Contient beaucoup d'Ephad, pas sur que ce soit utile non plus. 

In [12]:
all_tag.amenity.unique()  

array(['fuel', 'marketplace', 'post_office', 'restaurant', 'post_box',
       'cinema', 'pub', 'bicycle_rental', 'parking', 'school', 'library',
       'pharmacy', 'fast_food', 'cafe', 'bank', 'bar', 'toilets',
       'bicycle_parking', 'parking_entrance', 'police', 'nightclub',
       'social_centre', 'drinking_water', 'social_facility', 'recycling',
       'theatre', 'motorcycle_parking', 'bench', 'hospital',
       'fire_station', 'atm', 'grit_bin', 'waste_basket', 'clock',
       'vending_machine', 'fountain', 'parking_space', 'kindergarten',
       'taxi', 'college', 'restaurant;club', 'vehicle_inspection',
       'community_centre', 'waste_disposal', 'driving_school', 'car_wash',
       'doctors', 'university', 'place_of_worship', 'car_sharing',
       'hookah_lounge', 'veterinary', 'music_venue', 'photo_booth',
       'clinic', 'stripclub', 'money_transfer', 'animal_boarding',
       'dentist', 'car_rental', 'ice_cream', 'biergarten', 'childcare',
       'swingerclub', 'arts_cen

### Save and Load : 

In [7]:
# Save : 
for name in ['park','stadium','university','shop','nightclub','parkings']:
    save_path = f"{FOLDER_PATH}/POIs"
    print('save path : ',save_path)
    if not os.path.exists(save_path):
        os.mkdir(save_path)
    globals()[f"gdf_{name}"].to_file(f"{save_path}/gdf_{name}.geojson", driver='GeoJSON')


# Load : 
if False:
    save_path = f"{FOLDER_PATH}/POIs"
    gpd.read_file(f"{save_path}/gdf_stadium.geojson")

save path :  ../../../../data/rrochas/prediction_validation/POIs
save path :  ../../../../data/rrochas/prediction_validation/POIs
save path :  ../../../../data/rrochas/prediction_validation/POIs
save path :  ../../../../data/rrochas/prediction_validation/POIs
save path :  ../../../../data/rrochas/prediction_validation/POIs


# Limites Observées : 
- Il manques des POIs, en particulier car je travaille avec des données 2019 et les POIs sont ceux actuels. Certains bars/boites ont ouvert depuis, d'autres ont fermé (cas du Ninkasi Kao lorsqu'on repère des anomalies de prédiction autour de la station Gerland). Les aménagements ont donc largement évolué, ce qui induis des biais/erreurs.

In [48]:
gdf_filtered = gdf[#(gdf.amenity =='pub') |
    #(gdf.amenity =='bar') |
    (gdf.amenity =='nightclub') 
    #(gdf.amenity =='restaurant') |
    #(gdf.amenity =='music_venue')
    ][['amenity','name','geometry']]

# Add NinKasi Kao which doesn't existe here (closed since)
from shapely import Point 
ninkasi_kao = [4.830913047430858,45.72818916789741]
geom = Point(ninkasi_kao)

gdf_nighclub = gpd.GeoDataFrame(pd.concat([gdf_filtered, pd.DataFrame({'amenity': ['nightclub'],'name':['Ninkasi Kao'], 'geometry':[geom]})],ignore_index=True))

# Save:
if False :
    FOLDER_PATH = '../../../../data/rrochas/prediction_validation' 
    save_path = f"{FOLDER_PATH}/POIs"
    gdf_nighclub.to_file(f"{save_path}/gdf_nightclub.geojson", driver='GeoJSON')

# gdf_filtered.explore('amenity', marker_kwds=dict(radius=7)) 



### Train Station in Lyon

In [124]:
place_name = f"Lyon, France"
gdf_train_station = ox.geometries_from_place(place_name, tags ={"building": "train_station"})
"""
osmid: 
Gare Part-dieu :  [196590753,62337529,1039333290,1191457432,1158815494]
Gare Perrache: [35143415]
Gare Vaise [31363448]
"""

part_dieu_ids = [196590753,62337529,1039333290,1191457432,1158815494]
gdf_train_station = gdf_train_station.reset_index()
gdf_train_station[gdf_train_station['osmid'].isin([part_dieu_ids])].explore()

  gdf_train_station = ox.geometries_from_place(place_name, tags ={"building": "train_station"})


In [9]:
place_name = f"Lyon, France"
ox.geometries_from_place(place_name, tags ={"railway": "station"})

  ox.geometries_from_place(place_name, tags ={"railway": "station"}).explore()


In [23]:
place_name = f"Lyon, France"
ox.geometries_from_place(place_name, tags ={"railway": "subway"})

  ox.geometries_from_place(place_name, tags ={"railway": "subway"})




Unnamed: 0_level_0,Unnamed: 1_level_0,geometry,name,network,railway,level,nodes,electrified,fixme,frequency,gauge,...,voltage,bridge,layer,service,oneway,tunnel,rack,incline,covered,note
element_type,osmid,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
way,18617872,"LINESTRING (4.82815 45.78184, 4.82824 45.78193...",Ligne C,TCL,subway,,"[192064355, 1434432807, 638837977, 1434432837,...",contact_line,Cette ligne n'est pas à voie unique.,0,1435,...,750,,,,,,,,,
way,51931058,"LINESTRING (4.82911 45.78260, 4.82946 45.78285)",Ligne C,TCL,subway,,"[475571014, 1434261013]",contact_line,,0,1435,...,750,yes,1,,,,,,,
way,98713889,"LINESTRING (4.82811 45.78193, 4.82794 45.78181...",,,subway,,"[1142015609, 1142015613, 1434432772, 1434432766]",contact_line,,0,1435,...,750,,,yard,,,,,,
way,98713892,"LINESTRING (4.82786 45.78163, 4.82791 45.78171...",,,subway,,"[1142015596, 1434432780, 1434432785, 114201563...",contact_line,,0,1435,...,750,,,yard,,,,,,
way,98713896,"LINESTRING (4.82912 45.78268, 4.82935 45.78281...",,,subway,,"[2938051798, 1434258951, 1142015604]",contact_line,,0,1435,...,750,yes,1,yard,,,,,,
way,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
way,850918881,"LINESTRING (4.83589 45.76878, 4.83583 45.76903)",Ligne C,TCL,subway,-2,"[7937874644, 3349897079]",contact_line,,0,1435,...,750,,-1,,,yes,yes,,,
way,850918882,"LINESTRING (4.83591 45.76894, 4.83599 45.76853...",Ligne C,TCL,subway,-2,"[7937874639, 7937874642, 7937874640, 793787464...",contact_line,,0,1435,...,750,,-1,,,yes,yes,17.6%,,
way,850918883,"LINESTRING (4.83162 45.77447, 4.83141 45.77445...",Ligne C,TCL,subway,,"[241595897, 10879656103, 987863556, 7937874624...",contact_line,,0,1435,...,750,,-1,,,yes,,17.6%,,
way,850918884,"LINESTRING (4.83163 45.77453, 4.83186 45.77456...",Ligne C,TCL,subway,,"[7937874589, 7937874590, 7937874588, 108969054...",contact_line,,0,1435,...,750,,-1,,,yes,yes,17.6%,,
