In [5]:
import pyrosm
import geopandas as gpd
import pandas as pd
#import numpy as np
#import seaborn as sns
#from matplotlib import pyplot as plt
#import contextily as cx
import gc
import utils

In [6]:
# load openstreetmap basemap
# https://www.openstreetmap.org/export - boundingbox setup
bbox = [16.3837, 49.0946, 16.9227, 49.2884]  # [minx, miny, maxx, maxy]
pbf_reader = pyrosm.OSM('datasets/czech_republic-latest.osm.pbf', bounding_box=bbox)
cycling_network = pbf_reader.get_network('cycling')
driving_network = pbf_reader.get_network('driving')
basemap_network = driving_network.append(cycling_network)
del cycling_network
del driving_network
gc.collect()

# load strava data
strava_df = pd.read_csv('datasets/strava_edges_2021.csv')
# load bike-to-work data
biketowork_df = gpd.read_file('datasets/do_prace_na_kole.geojson')
# load mechanical counters data
counters_df = gpd.read_file('datasets/cyklodetektory.geojson')
# load driving census data
census_df = pd.read_excel('datasets/rsd_scitanie_dopravy_2020.xlsx', skiprows=[0])

  basemap_network = driving_network.append(cycling_network)


In [7]:
# drop unnecessary columns - speedup
strava_df = strava_df.drop([
    'forward_morning_trip_count',
    'reverse_morning_trip_count',
    'forward_evening_trip_count',
    'reverse_evening_trip_count',
    'forward_male_people_count',
    'reverse_male_people_count',
    'forward_female_people_count',
    'reverse_female_people_count',
    'forward_unspecified_people_count',
    'reverse_unspecified_people_count',
    'forward_13_19_people_count',
    'reverse_13_19_people_count',
    'forward_20_34_people_count',
    'reverse_20_34_people_count',
    'forward_35_54_people_count',
    'reverse_35_54_people_count',
    'forward_55_64_people_count',
    'reverse_55_64_people_count',
    'forward_65_plus_people_count',
    'reverse_65_plus_people_count',
    'activity_type',
    'year'
], axis=1)
# drop from counters
counters_df = counters_df.drop([
    'ID',
    'FirstDirection_Name',
    'FirstDirection_Pedestrians',
    'FirstDirection_Total',
    'SecondDirection_Name',
    'SecondDirection_Pedestrians',
    'SecondDirection_Total',
    'LocationId',
    'Latitude',
    'Longitude',
], axis=1)
counters_df

census_df = census_df[census_df['NÁZEV_MĚSTA'] == 'Brno']

In [4]:
print(basemap_network.shape)
print(strava_df.shape)

(59823, 40)
(193812, 12)


In [8]:
# create model from basemap and strava data
model = strava_df.join(basemap_network.set_index('id'), on='osm_reference_id', how='inner')
del basemap_network
del strava_df
gc.collect()
model = model.drop_duplicates()
# https://www.openstreetmap.org/way/{id}
print(model.shape)
model.head()

(55058, 51)


Unnamed: 0,edge_uid,forward_trip_count,reverse_trip_count,forward_people_count,reverse_people_count,forward_commute_trip_count,reverse_commute_trip_count,forward_leisure_trip_count,reverse_leisure_trip_count,forward_average_speed,...,tunnel,turn,width,timestamp,version,tags,osm_type,geometry,length,path
20,161016343,140,220,95,135,5,10,135,210,5.48,...,,,,1534974133,2,"{""mtb:scale"":""0"",""source"":""gps;uhul:ortofoto""}",way,"MULTILINESTRING ((16.41994 49.17335, 16.42037 ...",118.0,
21,161019607,40,55,20,25,5,10,35,45,0.9,...,,,,1535317104,2,"{""source"":""cuzk:km""}",way,"MULTILINESTRING ((16.50526 49.15238, 16.50531 ...",84.0,
119579,161019604,45,45,15,10,5,10,40,35,0.51,...,,,,1535317104,2,"{""source"":""cuzk:km""}",way,"MULTILINESTRING ((16.50526 49.15238, 16.50531 ...",84.0,
131142,161019599,35,40,15,20,5,5,30,35,6.91,...,,,,1535317104,2,"{""source"":""cuzk:km""}",way,"MULTILINESTRING ((16.50526 49.15238, 16.50531 ...",84.0,
22,161021372,65,25,45,20,5,0,60,25,5.09,...,,,,1660129784,7,"{""source"":""uhul:ortofoto"",""source:maxspeed"":""C...",way,"MULTILINESTRING ((16.47988 49.20669, 16.47977 ...",488.0,


In [9]:
# load distances between ways and mechanical counter units
counters = counters_df['geometry'].unique()
for i, item in enumerate(counters):
    # within <- exact match fails (needs threshold), better to use distance
    model[f'distance{i}'] = model['geometry'].apply(lambda x: x.distance(item))

counter_units = []
for counter_geo in counters:
    counter_units.append(counters_df[counters_df['geometry']==counter_geo]['UnitName'].unique()[0])

model.head()

Unnamed: 0,edge_uid,forward_trip_count,reverse_trip_count,forward_people_count,reverse_people_count,forward_commute_trip_count,reverse_commute_trip_count,forward_leisure_trip_count,reverse_leisure_trip_count,forward_average_speed,...,distance9,distance10,distance11,distance12,distance13,distance14,distance15,distance16,distance17,distance18
20,161016343,140,220,95,135,5,10,135,210,5.48,...,0.1878,0.188094,0.204763,0.183556,0.2107,0.266757,0.184477,0.210774,0.260703,0.209083
21,161019607,40,55,20,25,5,10,35,45,0.9,...,0.112711,0.113014,0.119325,0.104054,0.124344,0.194584,0.105237,0.124393,0.188805,0.127482
119579,161019604,45,45,15,10,5,10,40,35,0.51,...,0.112711,0.113014,0.119325,0.104054,0.124344,0.194584,0.105237,0.124393,0.188805,0.127482
131142,161019599,35,40,15,20,5,5,30,35,6.91,...,0.112711,0.113014,0.119325,0.104054,0.124344,0.194584,0.105237,0.124393,0.188805,0.127482
22,161021372,65,25,45,20,5,0,60,25,5.09,...,0.127116,0.127391,0.154237,0.12635,0.1623,0.202624,0.12699,0.162448,0.196541,0.1527


In [7]:
# check if counter street was found (OPTIONAL)
for i in range(len(counters)):
    print(f'Counter #{i}: ({counters_df[counters_df["geometry"]==counters[i]]["UnitName"].unique()[0]})')
    print(f'Lon: {counters[i].x}\nLat: {counters[i].y}')
    print(f'Closest way: {model[model[f"distance{i}"]==model[f"distance{i}"].min()]["osm_reference_id"].values}')

Counter #0: (Renneska 1)
Lon: 16.60242319100007
Lat: 49.18369855600008
Closest way: [76821611]
Counter #1: (Obřany)
Lon: 16.65878176700005
Lat: 49.22628269300003
Closest way: [32355469 32355469 32355469]
Counter #2: (Renneska 2)
Lon: 16.602337360000035
Lat: 49.18364245400005
Closest way: [83202890]
Counter #3: (Královopolská)
Lon: 16.575043201000028
Lat: 49.21865179000008
Closest way: [450098706 450098706 450098706 450098706 450098706 450098706 450098706
 450098706 450098706 450098706 450098706 450098706 450098706 450098706]
Counter #4: (Komín)
Lon: 16.54097914700003
Lat: 49.218532657000026
Closest way: [736313314 736313314]
Counter #5: (Kounicova A)
Lon: 16.601403952000055
Lat: 49.20196340600006
Closest way: [632124039 632124039 632124039]
Counter #6: (Kounicova B)
Lon: 16.601318121000077
Lat: 49.20225082200005
Closest way: [109505674 109505674 109505674 109505674 109505674 109505674 109505674
 109505674 109505674 109505674 109505674]
Counter #7: (Jundrov)
Lon: 16.566503048000072
Lat:

In [10]:
# draw and save edges matched to counters
#fig = plt.figure(figsize=[20, 20], dpi=800)
#ax = plt.axes()
#fig.add_axes(ax)
unit_way_dict = {}
for i in range(len(counters)):
    min_dist = model[model[f"distance{i}"]==model[f"distance{i}"].min()]
    counter_matches = min_dist['geometry']
    geoser = gpd.GeoSeries(counter_matches)
    #geoser.plot(ax=ax)
    json_str = geoser.to_json()
    unit_way_dict[counter_units[i]] = min_dist['osm_reference_id'].unique()[0]
    #with open(f'counters_matching/counter{i}_matches.geojson', 'w+') as f:
    #    f.write(json_str)
# append edges to counters_df
counters_df['way_id'] = counters_df['UnitName'].map(unit_way_dict)
#counters_df.plot(ax=ax)
#cx.add_basemap(ax, crs=counters_df.crs, zoom=12)
#with open(f'counters_matching/counters.geojson', 'w+') as f:
#    f.write(gpd.GeoSeries(counters).to_json())

for i in range(len(counters)):
    model = model.drop([f'distance{i}'], axis=1)
counters_df.head()

Unnamed: 0,ObjectId,UnitName,UnitId,EndOfInterval,FirstDirection_Cyclists,SecondDirection_Cyclists,datum,geometry,way_id
0,1,Renneska 1,CAM1,2021-06-22 03:59:59+00:00,6.0,18.0,2021-06-22 02:00:00+00:00,POINT (16.60242 49.18370),76821611
1,2,Obřany,CAM8,2021-06-21 17:00:00+00:00,8.0,5.0,2021-06-21 03:00:00+00:00,POINT (16.65878 49.22628),32355469
2,3,Renneska 2,CAM2,2021-06-22 03:59:59+00:00,13.0,5.0,2021-06-22 02:00:00+00:00,POINT (16.60234 49.18364),83202890
3,4,Královopolská,CAM9,2021-06-21 02:00:00+00:00,0.0,0.0,2021-06-21 03:00:00+00:00,POINT (16.57504 49.21865),450098706
4,5,Komín,CAM3,2021-06-22 03:59:59+00:00,12.0,1.0,2021-06-22 02:00:00+00:00,POINT (16.54098 49.21853),736313314


In [11]:
counters_df.to_file('counters_matched.geojson', driver="GeoJSON")

In [10]:
# save final model
model.to_csv('model.csv', sep=';')
gpd.GeoDataFrame(model).to_file('model.geojson', driver="GeoJSON")

In [11]:
model.to_pickle('model.pkl')

### From this point load model from pickle to skip setup

In [3]:
model = pd.read_pickle('model.pkl')

In [3]:
biketowork_df = gpd.read_file('datasets/do_prace_na_kole.geojson')
row = biketowork_df[biketowork_df['GID_ROAD'] == 224865].iloc[0]
model['way_covers'] = model['geometry'].apply(lambda x: utils.lines_overlap(row['geometry'], x))
print(model['way_covers'].unique())
# proof of concept works with GID=224865, 232768

[False  True]


In [None]:
# visual debug TMP
gpd.GeoDataFrame(model[model['osm_reference_id'] == 5606151]).to_file('test1.geojson', driver="GeoJSON")
gpd.GeoDataFrame(row).to_file('test2.geojson', driver="GeoJSON")


In [15]:
print(model[model['way_covers']==True]['osm_reference_id'])


5776      5606151
61947     5606151
62898     5606151
188312    5606151
6055      8134289
77782     8134289
136509    8134289
Name: osm_reference_id, dtype: int64


In [4]:
model.head()


Unnamed: 0,edge_uid,forward_trip_count,reverse_trip_count,forward_people_count,reverse_people_count,forward_commute_trip_count,reverse_commute_trip_count,forward_leisure_trip_count,reverse_leisure_trip_count,forward_average_speed,...,tunnel,turn,width,timestamp,version,tags,osm_type,geometry,length,path
20,161016343,140,220,95,135,5,10,135,210,5.48,...,,,,1534974133,2,"{""mtb:scale"":""0"",""source"":""gps;uhul:ortofoto""}",way,"MULTILINESTRING ((16.41994 49.17335, 16.42037 ...",118.0,
21,161019607,40,55,20,25,5,10,35,45,0.9,...,,,,1535317104,2,"{""source"":""cuzk:km""}",way,"MULTILINESTRING ((16.50526 49.15238, 16.50531 ...",84.0,
119579,161019604,45,45,15,10,5,10,40,35,0.51,...,,,,1535317104,2,"{""source"":""cuzk:km""}",way,"MULTILINESTRING ((16.50526 49.15238, 16.50531 ...",84.0,
131142,161019599,35,40,15,20,5,5,30,35,6.91,...,,,,1535317104,2,"{""source"":""cuzk:km""}",way,"MULTILINESTRING ((16.50526 49.15238, 16.50531 ...",84.0,
22,161021372,65,25,45,20,5,0,60,25,5.09,...,,,,1660129784,7,"{""source"":""uhul:ortofoto"",""source:maxspeed"":""C...",way,"MULTILINESTRING ((16.47988 49.20669, 16.47977 ...",488.0,
