In [3]:
# BIBLIOTEKI
import pandas as pd
from srai.regionalizers import H3Regionalizer, geocode_to_region_gdf
import sys
sys.path.insert(0, '..')

from srai.embedders import CountEmbedder
from src.embedders.osm_data_embedder import OSMDataEmbedder
from sklearn.preprocessing import LabelEncoder

In [4]:
# Podział mapy Wrocławia na obszary, zliczenie danych cech w obszarze, połączenie X z Y (Y - liczba wypadów w danym obszarze)

area = geocode_to_region_gdf("Wrocław, Poland")
regionalizer = H3Regionalizer(resolution=8)
embedder = CountEmbedder()
query = {"highway": True}

embeddings = OSMDataEmbedder(area=area, embedder=embedder, regionalizer=regionalizer, query=query)
embeddings_feature_gdf = embeddings.make_embeddings()

df_accidents = pd.read_csv('../data/embeddings.csv')

gdf = pd.merge(embeddings_feature_gdf, df_accidents, on='region_id', how='inner')

Downloading highway: True: 100%|██████████| 1/1 [03:32<00:00, 212.95s/it]


In [5]:
# Stworzenie etykiet określających poziom ryzyka na podstawie liczby wypadków + zakodowanie ich 

bins = [-1, 0, 2, 12, float("inf")]  # Bins: (-1, 0], (0, 2], (2, 12], (12, inf)
label_names = ['no risk', 'low risk', 'medium risk', 'high risk']
gdf['risk_level'] = pd.cut(gdf['rok_2023'], bins=bins, labels=label_names, right=True)

labels = gdf['risk_level']
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)

gdf['risk_level'] = labels_encoded
gdf= gdf.set_index('region_id')
gdf = gdf.drop(['rok_2023'], axis=1)
gdf.head(3)

Unnamed: 0_level_0,geometry,highway_bridleway,highway_bus_stop,highway_construction,highway_corridor,highway_crossing,highway_cycleway,highway_elevator,highway_footway,highway_give_way,...,highway_tertiary_link,highway_track,highway_traffic_mirror,highway_traffic_signals,highway_trunk,highway_trunk_link,highway_turning_circle,highway_turning_loop,highway_unclassified,risk_level
region_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
881e2041d1fffff,"POLYGON ((16.97247 51.08486, 16.97006 51.08058...",0,0,0,0,2,0,0,100,0,...,0,26,0,0,0,0,1,0,0,3
881e204087fffff,"POLYGON ((17.02740 51.09661, 17.02498 51.09233...",0,27,0,7,76,24,0,620,0,...,0,0,0,21,0,0,3,0,8,0
881e204509fffff,"POLYGON ((17.11019 51.09652, 17.10777 51.09224...",0,2,0,0,0,0,0,2,0,...,0,7,0,0,0,0,0,0,0,1


In [6]:
# Wybór cech + stworzenie bazowego gdf 

selected_features = ['highway_bridleway',
 'highway_bus_stop',
 'highway_elevator',
 'highway_give_way',
 'highway_motorway',
 'highway_passing_place',
 'highway_primary_link',
 'highway_secondary_link',
 'highway_stop',
 'highway_traffic_signals',
 'highway_turning_circle']

gdf_base = gdf[['geometry']+selected_features+['risk_level']]
gdf_base.head(3)

Unnamed: 0_level_0,geometry,highway_bridleway,highway_bus_stop,highway_elevator,highway_give_way,highway_motorway,highway_passing_place,highway_primary_link,highway_secondary_link,highway_stop,highway_traffic_signals,highway_turning_circle,risk_level
region_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
881e2041d1fffff,"POLYGON ((16.97247 51.08486, 16.97006 51.08058...",0,0,0,0,0,0,0,0,0,0,1,3
881e204087fffff,"POLYGON ((17.02740 51.09661, 17.02498 51.09233...",0,27,0,0,0,0,0,0,0,21,3,0
881e204509fffff,"POLYGON ((17.11019 51.09652, 17.10777 51.09224...",0,2,0,0,0,0,0,0,0,0,0,1


In [9]:
gdf_base.to_csv('../data/base_gdf.csv')
print("saved base gdf in '../data/base_gdf.csv'")

gdf_base is your base gdf
