In [None]:
import pandas as pd
import geopandas as gpd
import numpy as np

from src.stop_grid_mapper import StopGridMapper
from src.stop_explorer import StopExplorer
from src.grid_partitioning import Grid

### Main code

In [None]:
# Create a GeoDataFrame for the stops dataset.
path_stops = './data_simulator/huge_dataset/dataset_simulator_trajectories.compressed.parquet.stops.parquet'
stop_explorer = StopExplorer(path_stops)

display(stop_explorer.get_df_stops())
stop_explorer.get_df_stops().info()

### Materialize a uniform grid, with side of a given length, over the bounding box enclosing the stop segments.

In [None]:
grid = Grid(grid_cell_length_meters = 50)
grid.compute_grid_over_geodata(stop_explorer.get_df_stops())
display(grid.get_grid())

# mappa = grid.generate_grid_map()
# mappa

### Map the stop segments to the cells of the grid.

In [None]:
# Instantiate the StopGridMapper class, which has the effect of computing the join between the centroids of the 
# stop segments and grid cells. This effectively maps each stop segment to the grid cell it falls into.
stop_grid_mapper = StopGridMapper(grid, stop_explorer)
display(stop_grid_mapper.get_join())

### Compute statistics concerning the pairs '(uid, cell_id)', and the cells of the grid.

In [None]:
# Compute some aggregate statistics for each grid's cell.
augmented_grid = stop_grid_mapper.compute_statistics_cells()
display(augmented_grid)
print(f"Number of cells without stops/users: {len(augmented_grid.loc[augmented_grid['num_users'] == 0.])}/{len(augmented_grid)}")

# Compute some aggregate statistics for each pair (cell_id, user_id).
stats_uid_cell = stop_grid_mapper.compute_statistics_cells_users()
display(stats_uid_cell)

In [None]:
# Plot the distribution of the number of distinct cells in which each user has stop segments.
(stats_uid_cell.groupby('uid')
               .agg(num_cells = pd.NamedAgg(column='num_stops', aggfunc='size'))
               .sort_values(by='num_cells', ascending=False)
               .value_counts()
               .plot(kind='bar', title='Distribution of number of cells per user', xlabel='Number of distinct cells with stops', ylabel='Number of users'))

### For each user, determine the subset of cells with which their stops are consistently associated.

For each user and each of their cells, this is done by first computing the number of distinct days
spanned by the user's stop segments within a cell, and then ranking the cells accordingly. 

In [None]:
# For each user, select the 'top_k_cells_user' cells in which they have stops.
top_k_cells_user = 5
final_mapping_user_cells = stop_grid_mapper.associate_cells_to_users(top_k_cells_user)
display(final_mapping_user_cells)

Output the files that will be used in the subsequent step, i.e., determining with subsets of cells need to undergo a movement fairness audit.

In [None]:
# Saving the grid.
grid.save_to_file(f'./grid_{int(grid.get_grid_cell_length_meters())}m.pkl')

# Saving the user-cells mapping
final_mapping_user_cells.to_parquet('./mapping_users_cells.parquet')

# Plot heatmaps of the grid, each focused on a different statistics.

In [None]:
dic_fields_tooltip = {"num_users" : "# users",
                      "num_stops" : "# stops",
                      "mean_duration_mins" : "mean stop duration (mins)", 
                      "median_duration_mins" : "median stop duration (mins)"}


# Plot heatmap of the grid, where each cell is colored according to the number of unique users
# that have at least a stop in that cell.
mappa = stop_grid_mapper.generate_augmented_grid_heatmap('num_users', 'Number of users per cell', dic_fields_tooltip)
mappa.save('map_users.html')


# Plot heatmap of the grid, where each cell is colored according to the number of stops that fall in that cell.
mappa = stop_grid_mapper.generate_augmented_grid_heatmap('num_stops', 'Number of stops per cell', dic_fields_tooltip)
mappa.save('map_stops.html')