In [None]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import box, LineString
from pyproj import Transformer
import osmnx as ox

In [None]:
basedir = '/gpfs/space/home/etais/hpc_nikolaykozlovskiy/transformers_project/Traffic4cast/data_traffic'
road_graph_folder = 'road_graph'
spatial_data_folder = 'spatial_data'
preprocess_stage = 'pois'
buffer_m = 100
city = 'london'
crs_reprj = {'london':27700, 
             'madrid':2062,
             'melbourne':3110
            }
city_epsg = crs_reprj[city]
cache_dir = f'{basedir}/{spatial_data_folder}/{preprocess_stage}/{city}/.cache'

In [None]:
ox.settings.cache_folder = cache_dir

In [None]:
road_graph_nodes = pd.read_parquet(f'{basedir}/{road_graph_folder}/{city}/road_graph_nodes.parquet')
road_graph_edges = pd.read_parquet(f'{basedir}/{road_graph_folder}/{city}/road_graph_edges.parquet')

In [None]:
road_graph_edges = road_graph_edges.merge(road_graph_nodes[['node_id', 'x', 'y']], 
                                          left_on = 'u', 
                                          right_on = 'node_id'
                                         )
road_graph_edges = road_graph_edges.merge(road_graph_nodes[['node_id', 'x', 'y']], 
                                          left_on = 'v', 
                                          right_on = 'node_id', 
                                          suffixes = ('_start', '_end')
                                         )

In [None]:
road_graph_edges['geometry'] = road_graph_edges.apply(lambda row:
    LineString([[row['x_start'], row['y_start']],[row['x_end'], row['y_end']]]), axis=1
)

In [None]:
gdf_road_graph_edges = gpd.GeoDataFrame(
    road_graph_edges,
    crs = 4326,
    geometry = 'geometry'
)

In [None]:
gdf_road_graph_edges = gdf_road_graph_edges.to_crs(city_epsg)
gdf_road_graph_edges['geometry'] = gdf_road_graph_edges['geometry'].buffer(buffer_m)

In [None]:
transformer = Transformer.from_crs(city_epsg, 4326, always_xy=True)
bbox = box(*transformer.transform_bounds(*gdf_road_graph_edges.total_bounds))
gdf_road_graph_edges = gdf_road_graph_edges.to_crs(4326)

In [None]:
pois = {
    'leisure': {
        'amenity': ['theatre', 'nightclub', 'cinema', 'swimming_pool'],
        'leisure': ['park', 'playground', 'sports_centre', 'stadium']
    }, 
    "traffic": {
        "highway":['traffic_signals', 'crossing', 'stop']
    },
    'tourism':{
        'tourism': ['attraction', 'museum', 'artwork', 'picnic_site', 'viewpoint', 'zoo', 'theme_park'],
        'historic': ['monument', 'memorial', 'castle']
    },
    "catering":{
        'amenity': ['restaurant', 'fast_food', 'cafe', 'pub', 'bar', 'food_court', 'biergarten'],
    }, 
    'transport': {
        'railway': ['station', 'halt', 'tram_stop'],
        'highway': ['bus_stop'],
        'amenity': [ 'taxi', 'airport', 'ferry_terminal']
    }, 
    "shopping": {
        'shop':True
    }
}

In [None]:
pois_gdf = None

for pois_class, tags in pois.items(): 
    gdf = ox.features_from_polygon(bbox, tags=tags)
    gdf['geometry'] = gdf.to_crs(crs_reprj[city]).centroid.to_crs(4326)
    gdf['pois_class'] = pois_class
    gdf = gdf[['pois_class', 'geometry']]
    
    if pois_gdf is None:
        pois_gdf = gdf
    else:
        pois_gdf = pd.concat([pois_gdf, gdf], axis = 0, ignore_index=True)


In [None]:
pois_gdf['pois_class'].value_counts()

In [None]:
sjoin = gpd.sjoin(gdf_road_graph_edges, pois_gdf, predicate='contains')

In [None]:
sum_pois = sjoin.groupby(['u', 'v', 'pois_class']).size().reset_index(name='counts')

In [None]:
sum_pois = sum_pois.pivot(index=['u', 'v'], columns=['pois_class'], values="counts").fillna(0)

In [None]:
total = sum_pois.sum(axis=1)

In [None]:
sum_pois = sum_pois.div(total, axis=0) * 100

In [None]:
sum_pois

In [None]:
sum_pois['total'] = total
sum_pois.reset_index(inplace=True)

In [None]:
road_graph_edges = road_graph_edges.merge(sum_pois, on = ['u', 'v'], how='left')

In [None]:
road_graph_edges[~road_graph_edges['total'].isna()]