In [9]:
import sys
sys.path.append('../../') 


import pandas as pd
import geopandas as gpd
import osmnx as ox

from src.graph.create_osmnx_graph import OSMnxGraph

from srai.plotting import plot_regions
from srai.regionalizers import geocode_to_region_gdf
from srai.constants import WGS84_CRS, FEATURES_INDEX
from srai.regionalizers import H3Regionalizer
from srai.joiners import IntersectionJoiner

In [10]:
data_folder = '../../data/downstream_tasks/checkins/nyc'

In [11]:
data = pd.read_csv(f'{data_folder}/nyc_dataset.csv', delimiter=';')

In [12]:
gdf = gpd.GeoDataFrame(
            data.drop(["latitude", "longitude"], axis=1),
            geometry=gpd.points_from_xy(x=data["longitude"], y=data["latitude"]),
            crs="EPSG:4326",
        )
gdf

Unnamed: 0,user_id,venua_id,venue_category_id,venue_category_name,timezone_offset_in_minutes,utc_time,geometry
0,1541,4f0fd5a8e4b03856eeb6c8cb,4bf58dd8d48988d10c951735,Cosmetics Shop,540,Tue Apr 03 18:17:18 +0000 2012,POINT (139.61959 35.70510)
1,868,4b7b884ff964a5207d662fe3,4bf58dd8d48988d1d1941735,Ramen / Noodle House,540,Tue Apr 03 18:22:04 +0000 2012,POINT (139.80032 35.71558)
2,114,4c16fdda96040f477cc473a5,4d954b0ea243a5684a65b473,Convenience Store,540,Tue Apr 03 19:12:07 +0000 2012,POINT (139.48006 35.71454)
3,868,4c178638c2dfc928651ea869,4bf58dd8d48988d118951735,Food & Drink Shop,540,Tue Apr 03 19:12:13 +0000 2012,POINT (139.77663 35.72559)
4,1458,4f568309e4b071452e447afe,4f2a210c4b9023bd5841ed28,Housing Development,540,Tue Apr 03 19:18:23 +0000 2012,POINT (139.73405 35.65608)
...,...,...,...,...,...,...,...
573698,326,4bab3456f964a5204d993ae3,4bf58dd8d48988d1e9931735,Music Venue,540,Sat Feb 16 02:34:35 +0000 2013,POINT (139.70252 35.65694)
573699,853,4b559c09f964a520efe827e3,4bf58dd8d48988d129951735,Train Station,540,Sat Feb 16 02:34:53 +0000 2013,POINT (139.65729 35.85874)
573700,1502,5101e81ee4b020384100b0a5,4bf58dd8d48988d1dc931735,Tea Room,540,Sat Feb 16 02:34:55 +0000 2013,POINT (139.77122 35.70175)
573701,408,4bbc5648afe1b7134743304b,4bf58dd8d48988d16e941735,Fast Food Restaurant,540,Sat Feb 16 02:35:17 +0000 2013,POINT (139.76835 35.67046)


In [None]:
regions = gpd.read_file(f'{data_folder}/New_York_data_res_8.shx')

In [None]:
resolutions = [8, 9]

def checkins_per_region(resolution=9):
    regions = gpd.read_file(f'{data_folder}/New_York_data_res_{resolution}.shx')
    joiner = IntersectionJoiner()
    checkins_per_region_df = (
        joiner.transform(regions, gdf)
        .reset_index()
        .set_index("region_id")
        .groupby("region_id")
        .count()
    )

    checkins_per_region_df.reset_index(inplace=True)
    checkins_per_region_df.rename(columns={'feature_id': 'count_checkins'}, inplace=True)
    
    return checkins_per_region_df

In [None]:
for resolution in resolutions:
    print(f"Resolution {resolution}")
    display(checkins_per_region(resolution).describe())

### osmnx

In [None]:
area = "New York City, USA"

In [None]:
G = ox.graph.graph_from_place(area, network_type="drive")
ox.plot_graph(G)

In [None]:
gdf_nodes, gdf_edges = ox.graph_to_gdfs(G)
display(gdf_nodes)
display(gdf_edges)

In [None]:
graph_embedder = OSMnxGraph(data, gdf_nodes, gdf_edges)

graph_data = graph_embedder.create_graph(element_type='node', aggregation_method='count')
graph_data