In [None]:
import pandas as pd
import geopandas as gpd
import numpy as np
from shapely.geometry import box

### Auxiliary functions 

In [None]:
def compute_bounding_boxes(bbox : tuple[float,float,float,float], step : float, crs: str) -> list[str]:
    list_bboxes = []
    
    min_lon = bbox[0]
    min_lat = bbox[1]
    max_lon = bbox[2]
    max_lat = bbox[3]

    # compute the width/height of each cell
    lon_step = (max_lon - min_lon) / step
    lat_step = (max_lat - min_lat) / step
    
    for i in range(step):
        curr_lon = min_lon + i * lon_step
        next_lon = min(curr_lon + lon_step, max_lon)
        for j in range(step):
            curr_lat = min_lat + j * lat_step
            next_lat = min(curr_lat + lat_step, max_lat)
            
            # build shapely box
            geom = box(curr_lon, curr_lat, next_lon, next_lat)

            # record bounds + geometry
            list_bboxes.append({"geometry": geom})

    # create GeoDataFrame
    gdf = gpd.GeoDataFrame.from_dict(list_bboxes)
    gdf.set_crs(crs=crs, inplace=True)
    return gdf

### Main code

In [None]:
path_dataset = './data_simulator/medium_dataset/dataset_simulator_trajectories.compressed.parquet'
gdf = pd.read_parquet(path_dataset)
gdf = gpd.GeoDataFrame(gdf, geometry=gpd.points_from_xy(gdf.lng, gdf.lat), crs="EPSG:4326")
del gdf['lng'], gdf['lat']
display(gdf.info())

# Create a GeoDataFrame for the stops dataset.
path_stops = f'{path_dataset}.stops.parquet'
gdf_stops = pd.read_parquet(path_stops)
gdf_stops = gpd.GeoDataFrame(gdf_stops, geometry=gpd.points_from_xy(gdf_stops.lng, gdf_stops.lat), crs="EPSG:4326")
del gdf_stops['lng'], gdf_stops['lat']
display(gdf_stops.info())

In [None]:
# Find the trajectories' bounding box, and then superimpose a grid over it.
bbox_trajs = gdf.total_bounds
grid = compute_bounding_boxes(bbox_trajs, 20, gdf.crs)
display(grid)

In [None]:
# For each stop, associate the index of the cell in which it falls.
joined = gpd.sjoin(gdf_stops, 
                   grid[["geometry"]], 
                   how="left", 
                   predicate="within")
joined.rename(columns={"index_right": "cell_id"}, inplace = True)
joined

In [None]:
# Now, compute some general statistics concerning the cells.
