In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from src.graph_layering.create_dataframes import create_osmnx_dataframes
import geopandas as gpd
import os


In [3]:
GRAPH_LOCATION = "/home/staszek/mgr/gradient/gradient/data/wro/wro-map.osm"
ACCIDENTS_LOCATION = "/home/staszek/mgr/gradient/gradient/data/wypadki-pl/accidents.csv"
ORGANIZED_DATASETS_LOCATION = (
    "/home/staszek/mgr/gradient/gradient/data/organized-datasets"
)

In [4]:
krakow_osmnx_nodes, krakow_osmnx_edges = create_osmnx_dataframes(ACCIDENTS_LOCATION, 'Kraków, Poland')
krakow_hexes: gpd.GeoDataFrame = gpd.read_parquet(
    os.path.join(
        ORGANIZED_DATASETS_LOCATION, "krakow/2022/h9/count-embedder/dataset.parquet"
    )
)

wroclaw_osmnx_nodes, wroclaw_osmnx_edges = create_osmnx_dataframes(ACCIDENTS_LOCATION, 'Wrocław, Poland')
wroclaw_hexes: gpd.GeoDataFrame = gpd.read_parquet(
    os.path.join(
        ORGANIZED_DATASETS_LOCATION, "wroclaw/2022/h9/count-embedder/dataset.parquet"
    )
)



In [6]:
wroclaw_hexes = (
    wroclaw_hexes.rename(columns={"region_id": "h3_id"})
    .rename_axis("region_id", axis=0)
    .drop(columns="accidents_count")
)

krakow_hexes = (
    krakow_hexes.rename(columns={"region_id": "h3_id"})
    .rename_axis("region_id", axis=0)
    .drop(columns="accidents_count")
)

In [7]:
from src.graph_layering.graph_layer_creator import GraphLayerController

wroclaw_layer_controller = GraphLayerController(
    hexes_gdf=wroclaw_hexes, osmnx_nodes_gdf=wroclaw_osmnx_nodes, osmnx_edges_gdf=wroclaw_osmnx_edges
)
krakow_layer_controller = GraphLayerController(
    hexes_gdf=krakow_hexes, osmnx_nodes_gdf=krakow_osmnx_nodes, osmnx_edges_gdf=krakow_osmnx_edges
)


  self.hexes_gdf.centroid, columns=["centroid_geometry"]

  self.hexes_gdf.centroid, columns=["centroid_geometry"]


In [9]:
from src.graph_layering.graph_layer_creator import SourceType
from typing import cast
import pandas as pd

def create_hexes_with_y(
        osmnx_nodes: gpd.GeoDataFrame, hexes: gpd.GeoDataFrame, virtual_edges: pd.DataFrame
) -> gpd.GeoDataFrame:
    return cast(gpd.GeoDataFrame, hexes.merge(
        virtual_edges.merge(osmnx_nodes, left_on="source_id", right_index=True)[
            ["region_id", "accidents_count"]
        ]
        .groupby("region_id")
        .sum(),
        left_index=True,
        right_index=True,
        how="left",
    ).fillna(
        0
    ))

def patch_hexes_with_accidents_occurred(controller, osmnx_nodes, hexes, ):
    virtual_edges = controller.get_virtual_edges_to_hexes(SourceType.OSMNX_NODES)
    hexes_with_y = create_hexes_with_y(osmnx_nodes, hexes, virtual_edges)
    hexes_with_y["accident_occured"] = (hexes_with_y["accidents_count"] > 0).astype(int)
    hexes_with_y.drop(columns="accidents_count", inplace=True)
    controller.hexes_gdf = hexes_with_y
    controller._hexes_centroids_gdf = controller._create_hexes_centroids_gdf()

In [10]:
patch_hexes_with_accidents_occurred(wroclaw_layer_controller, wroclaw_osmnx_nodes, wroclaw_hexes)
patch_hexes_with_accidents_occurred(krakow_layer_controller, krakow_osmnx_nodes, krakow_hexes)


  self.hexes_gdf.centroid, columns=["centroid_geometry"]

  self.hexes_gdf.centroid, columns=["centroid_geometry"]


In [13]:
from src.graph_layering.hetero_data import create_hetero_data


def create_city_hetero_data(osmnx_nodes, osmnx_edges, hexes, controller):
    edges_attr_columns = osmnx_edges.columns[
        ~osmnx_edges.columns.isin(["u", "v", "key", "geometry"])
    ]
    nodes_attr_columns = osmnx_nodes.columns[
        ~osmnx_nodes.columns.isin(["geometry", "x", "y", "osmid", "accidents_count"])
    ]
    hexes_attr_columns = hexes.columns[~hexes.columns.isin(["geometry", "h3_id"])]
    
    return create_hetero_data(
        controller,
        hexes_attrs_columns_names=hexes_attr_columns,
        osmnx_edge_attrs_columns_names=edges_attr_columns,
        osmnx_node_attrs_columns_names=nodes_attr_columns,
        virtual_edge_attrs_columns_names=[],
        hexes_y_columns_names=["accident_occured"],
    )

wroclaw_hetero_data = create_city_hetero_data(wroclaw_osmnx_nodes, wroclaw_osmnx_edges, wroclaw_hexes, wroclaw_layer_controller)

In [52]:
a = (wroclaw_osmnx_nodes.loc[:, wroclaw_osmnx_nodes.columns != 'geometry'])
a[(a.street_count > 0) & (a.crossing > 0) & (a.crossing > 0)].loc[34:35]

Unnamed: 0_level_0,osmid,street_count,crossing,give_way,mini_roundabout,motorway_junction,traffic_signals,turning_circle,accidents_count,x,y
node_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
34,150597406,3,1,0,0,0,0,0,0,16.977864,51.092384


In [50]:
display(wroclaw_hetero_data['osmnx_node'].x[34])
display(wroclaw_osmnx_nodes.iloc[34:35])

tensor([3., 1., 0., 0., 0., 0., 0.])

Unnamed: 0_level_0,osmid,street_count,crossing,give_way,mini_roundabout,motorway_junction,traffic_signals,turning_circle,accidents_count,geometry,x,y
node_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
34,150597406,3,1,0,0,0,0,0,0,POINT (16.97786 51.09238),16.977864,51.092384


In [55]:
from torch_geometric.loader import DataLoader

loader = DataLoader([wroclaw_hetero_data], batch_size=16, shuffle=True)

In [71]:
sample = next(iter(loader))
loader.dataset

[HeteroData(
   hex={
     x=[3168, 798],
     y=[3168, 1],
   },
   osmnx_node={ x=[7220, 7] },
   (hex, connected_to, hex)={ edge_index=[2, 9206] },
   (osmnx_node, connected_to, osmnx_node)={
     edge_index=[2, 17004],
     edge_attr=[17004, 33],
   },
   (osmnx_node, connected_to, hex)={
     edge_index=[2, 7220],
     edge_attr=[7220, 0],
   }
 )]