In [1]:
import pyrosm
import geopandas as gpd
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt

In [2]:
# load openstreetmap basemap
# https://www.openstreetmap.org/export - boundingbox setup
bbox = [16.3837, 49.0946, 16.9227, 49.2884]  # [minx, miny, maxx, maxy]
pbf_reader = pyrosm.OSM('datasets/czech_republic-latest.osm.pbf', bounding_box=bbox)
cycling_network = pbf_reader.get_network('cycling')
driving_network = pbf_reader.get_network('driving')
basemap_network = driving_network.append(cycling_network)

# load strava data
strava_df = pd.read_csv('datasets/strava_edges_2021.csv')

# load bike-to-work data
biketowork_df = gpd.read_file('datasets/do_prace_na_kole.geojson')

# load mechanical counters data
counters_df = gpd.read_file('datasets/cyklodetektory.geojson')

# load driving census data
census_df = pd.read_excel('datasets/rsd_scitanie_dopravy_2020.xlsx', skiprows=[0])

  basemap_network = driving_network.append(cycling_network)


In [18]:
# TODO: drop unnecessary columns - speedup
strava_cropped_df = strava_df.drop([
    'forward_morning_trip_count',
    'reverse_morning_trip_count',
    'forward_evening_trip_count',
    'reverse_evening_trip_count',
    'forward_male_people_count',
    'reverse_male_people_count',
    'forward_female_people_count',
    'reverse_female_people_count',
    'forward_unspecified_people_count',
    'reverse_unspecified_people_count',
    'forward_13_19_people_count',
    'reverse_13_19_people_count',
    'forward_20_34_people_count',
    'reverse_20_34_people_count',
    'forward_35_54_people_count',
    'reverse_35_54_people_count',
    'forward_55_64_people_count',
    'reverse_55_64_people_count',
    'forward_65_plus_people_count',
    'reverse_65_plus_people_count',
    'activity_type',
    'edge_uid',
    'year'
], axis=1)
strava_cropped_df

Index(['edge_uid', 'activity_type', 'year', 'forward_trip_count',
       'reverse_trip_count', 'forward_people_count', 'reverse_people_count',
       'forward_commute_trip_count', 'reverse_commute_trip_count',
       'forward_leisure_trip_count', 'reverse_leisure_trip_count',
       'forward_morning_trip_count', 'reverse_morning_trip_count',
       'forward_evening_trip_count', 'reverse_evening_trip_count',
       'forward_male_people_count', 'reverse_male_people_count',
       'forward_female_people_count', 'reverse_female_people_count',
       'forward_unspecified_people_count', 'reverse_unspecified_people_count',
       'forward_13_19_people_count', 'reverse_13_19_people_count',
       'forward_20_34_people_count', 'reverse_20_34_people_count',
       'forward_35_54_people_count', 'reverse_35_54_people_count',
       'forward_55_64_people_count', 'reverse_55_64_people_count',
       'forward_65_plus_people_count', 'reverse_65_plus_people_count',
       'forward_average_speed', 'reve

Unnamed: 0,forward_trip_count,reverse_trip_count,forward_people_count,reverse_people_count,forward_commute_trip_count,reverse_commute_trip_count,forward_leisure_trip_count,reverse_leisure_trip_count,forward_average_speed,reverse_average_speed,osm_reference_id
0,5,30,5,15,0,0,5,30,5.94,3.15,93103358
1,5,0,5,0,0,0,5,0,1.36,0.00,65201855
2,65,65,60,55,5,5,60,60,0.56,0.35,33968694
3,255,400,85,100,25,20,230,380,2.66,4.61,108343596
4,75,185,70,130,5,15,70,170,2.03,2.60,74064352
...,...,...,...,...,...,...,...,...,...,...,...
193807,5,5,5,5,0,0,5,5,2.04,3.34,591639152
193808,575,385,240,160,20,15,555,370,0.17,0.27,237480106
193809,595,510,215,190,45,60,550,450,5.38,6.22,467759540
193810,5,5,5,5,0,0,5,5,2.75,3.67,338583813


In [4]:
print(cycling_network.shape)
print(driving_network.shape)
print(basemap_network.shape)
print(strava_df.shape)

(36644, 39)
(23179, 39)
(59823, 40)
(193812, 34)


In [19]:
# group edges into one OSM way (some columns will need to be averaged, not summed)
strava_grouped_df = strava_cropped_df.groupby(['osm_reference_id']).sum()
model = strava_grouped_df.join(basemap_network.set_index('id'), on='osm_reference_id', how='inner')
model = model.drop_duplicates()
# https://www.openstreetmap.org/way/{id}
model.head()

Unnamed: 0_level_0,forward_trip_count,reverse_trip_count,forward_people_count,reverse_people_count,forward_commute_trip_count,reverse_commute_trip_count,forward_leisure_trip_count,reverse_leisure_trip_count,forward_average_speed,reverse_average_speed,...,tunnel,turn,width,timestamp,version,tags,osm_type,geometry,length,path
osm_reference_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
4019269,30,0,10,0,0,0,30,0,18.06,0.0,...,,,,1556088822,16,"{""layer"":""1"",""source:maxspeed"":""CZ:urban_trunk...",way,"MULTILINESTRING ((16.56569 49.18349, 16.56527 ...",81.0,
4019432,805,745,265,270,140,105,665,640,4.29,9.82,...,,,,1611510572,10,"{""source:maxspeed"":""CZ:rural"",""source:ref"":""rs...",way,"MULTILINESTRING ((16.52347 49.17080, 16.52324 ...",159.0,
4019434,5,0,5,0,0,0,5,0,16.33,0.0,...,,,,1530611385,13,"{""source:maxspeed"":""sign""}",way,"MULTILINESTRING ((16.54352 49.16684, 16.54319 ...",380.0,
4019435,50,0,25,0,20,0,30,0,17.68,0.0,...,,,,1639339626,10,"{""source"":""uhul:ortofoto""}",way,"MULTILINESTRING ((16.56420 49.17379, 16.56420 ...",338.0,
4019437,1100,990,140,220,55,55,1045,935,21.31,23.57,...,,,,1648299982,16,,way,"MULTILINESTRING ((16.56755 49.16300, 16.56762 ...",166.0,


In [20]:
print(model.shape)
model

(27050, 49)


Unnamed: 0_level_0,forward_trip_count,reverse_trip_count,forward_people_count,reverse_people_count,forward_commute_trip_count,reverse_commute_trip_count,forward_leisure_trip_count,reverse_leisure_trip_count,forward_average_speed,reverse_average_speed,...,tunnel,turn,width,timestamp,version,tags,osm_type,geometry,length,path
osm_reference_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
4019269,30,0,10,0,0,0,30,0,18.06,0.00,...,,,,1556088822,16,"{""layer"":""1"",""source:maxspeed"":""CZ:urban_trunk...",way,"MULTILINESTRING ((16.56569 49.18349, 16.56527 ...",81.0,
4019432,805,745,265,270,140,105,665,640,4.29,9.82,...,,,,1611510572,10,"{""source:maxspeed"":""CZ:rural"",""source:ref"":""rs...",way,"MULTILINESTRING ((16.52347 49.17080, 16.52324 ...",159.0,
4019434,5,0,5,0,0,0,5,0,16.33,0.00,...,,,,1530611385,13,"{""source:maxspeed"":""sign""}",way,"MULTILINESTRING ((16.54352 49.16684, 16.54319 ...",380.0,
4019435,50,0,25,0,20,0,30,0,17.68,0.00,...,,,,1639339626,10,"{""source"":""uhul:ortofoto""}",way,"MULTILINESTRING ((16.56420 49.17379, 16.56420 ...",338.0,
4019437,1100,990,140,220,55,55,1045,935,21.31,23.57,...,,,,1648299982,16,,way,"MULTILINESTRING ((16.56755 49.16300, 16.56762 ...",166.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1023721767,5,0,0,0,0,0,5,0,0.00,0.00,...,,,,1644149494,2,,way,"MULTILINESTRING ((16.58372 49.24982, 16.58388 ...",27.0,
1023721771,170,395,105,170,10,15,160,380,3.09,6.97,...,,,,1642884868,1,,way,"MULTILINESTRING ((16.56073 49.25148, 16.56098 ...",363.0,
1023721772,40,5,35,5,15,0,25,5,12.97,2.14,...,,,,1661716762,2,,way,"MULTILINESTRING ((16.58314 49.25817, 16.58319 ...",111.0,
1023960623,15,25,10,10,0,0,15,25,10.20,7.83,...,,,,1642950670,2,"{""source"":""uhul:ortofoto;survey""}",way,"MULTILINESTRING ((16.71887 49.17968, 16.71869 ...",105.0,


In [7]:
model.to_csv('model.csv', sep=';')


In [45]:
import utils

counters = counters_df['geometry'].unique()
for i, item in enumerate(counters):
    # within <- exact match fails (needs threshold), better to use distance (TODO: find optimal threshold)
    model[f'counter{i}'] = model['geometry'].apply(lambda x: utils.is_point_on_line(item, x))


In [46]:
for i in range(len(counters)):
    print(model[f'counter{i}'].unique())


[False]
[False]
[False]
[False]
[False]
[False]
[False]
[False]
[False]
[False]
[False]
[False]
[False]
[False]
[False]
[False]
[False]
[False]
[False]
