# Bike Safety EPJ model in Washington DC

Should get:
* accident severity
* accident distance from intersection
* road characteristics:
    * max speed
    * number of lanes
    * width
    * curvature
    * type of road
    * bike lane type
    * traffic volume
    * cycling volume

check slack for data inputs and outputs

TO DO:
* deal with directionality

In [1]:
import geopandas as gpd
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from shapely.geometry import Point
import networkx as nx
import igraph as ig

## Make graph and get between-ness

In [2]:
seg = gpd.read_file(r'../seg_nodes.geojson')
node = gpd.read_file(r'..\osm_highway_limited_nodes_cleaned.geojson')
seg

  return ogr_read(


Unnamed: 0,id,SegID,startPoints,endPoints,PointID_start,geometry_start,PointID_end,geometry_end,geometry
0,0,0,POINT (-77.180353 38.967728),POINT (-77.082863 38.902279),0,POINT (-77.180353 38.967728),1,POINT (-77.082863 38.902279),"LINESTRING (-77.18035 38.96773, -77.18014 38.9..."
1,0,1,POINT (-77.117895 38.93696),POINT (-77.121719 38.939401),2,POINT (-77.117895 38.93696),3,POINT (-77.121719 38.939401),"LINESTRING (-77.1179 38.93696, -77.118 38.9370..."
2,0,2,POINT (-77.117725 38.92926),POINT (-77.118124 38.930734),4,POINT (-77.117725 38.92926),5,POINT (-77.118124 38.930734),"LINESTRING (-77.11773 38.92926, -77.11753 38.9..."
3,0,3,POINT (-77.117895 38.93696),POINT (-77.11618 38.935177),2,POINT (-77.117895 38.93696),7,POINT (-77.11618 38.935177),"LINESTRING (-77.1179 38.93696, -77.11792 38.93..."
4,0,4,POINT (-77.115779 38.935197),POINT (-77.117895 38.93696),8,POINT (-77.115779 38.935197),2,POINT (-77.117895 38.93696),"LINESTRING (-77.11578 38.9352, -77.116 38.9354..."
...,...,...,...,...,...,...,...,...,...
62607,0,62607,POINT (-77.030862 38.882691),POINT (-77.030862 38.882691),125214,POINT (-77.030862 38.882691),125214,POINT (-77.030862 38.882691),"LINESTRING (-77.03086 38.88269, -77.03088 38.8..."
62608,0,62608,POINT (-77.023401 38.897861),POINT (-77.023401 38.897861),125216,POINT (-77.023401 38.897861),125216,POINT (-77.023401 38.897861),"LINESTRING (-77.0234 38.89786, -77.02338 38.89..."
62609,0,62609,POINT (-77.022906 38.898133),POINT (-77.022906 38.898133),125218,POINT (-77.022906 38.898133),125218,POINT (-77.022906 38.898133),"LINESTRING (-77.02291 38.89813, -77.02282 38.8..."
62610,0,62610,POINT (-77.022896 38.897678),POINT (-77.022896 38.897678),125220,POINT (-77.022896 38.897678),125220,POINT (-77.022896 38.897678),"LINESTRING (-77.0229 38.89768, -77.02285 38.89..."


In [3]:
seg_edge = seg[['PointID_start', 'PointID_end']]
seg_edge

Unnamed: 0,PointID_start,PointID_end
0,0,1
1,2,3
2,4,5
3,2,7
4,8,2
...,...,...
62607,125214,125214
62608,125216,125216
62609,125218,125218
62610,125220,125220


In [30]:
g = ig.Graph.DataFrame(seg_edge, directed=False)

In [None]:
ig.plot(g)

In [16]:
edge_betweenness1 = g.edge_betweenness(cutoff = 20, normalized = True)
max(edge_betweenness1)

1.507550781464544e-05

In [None]:
import time

start = time.time()
edge_betweenness2 = g.edge_betweenness()
end = time.time()
print(end - start)

698.4850332736969


In [36]:
np.savetxt('edge_betweenness2_notNormalized.csv', np.array(edge_betweenness2), delimiter=',', fmt='%d')

In [4]:
edge_betweenness2 = pd.read_csv('edge_betweenness2_notNormalized.csv')
edge_betweenness2

Unnamed: 0,1
0,42425
1,1
2,42425
3,127269
4,42425
...,...
62606,0
62607,0
62608,0
62609,0


In [5]:
seg['betweenness'] = edge_betweenness2

## Get geometric properties (distance from intersection and curvature) from edges data

In [6]:
#calculate curve or road (source to target length/edge length)
seg = seg.to_crs(seg.estimate_utm_crs())
seg['straight_len'] = gpd.GeoSeries.from_wkt(seg['geometry_start'], crs = '4326').to_crs(seg.crs).distance(gpd.GeoSeries.from_wkt(seg['geometry_end'], crs = '4326').to_crs(seg.crs))
seg['len'] = seg.length
seg['curve'] = seg.apply(lambda x: x['len']/(x['straight_len']) if x['straight_len']!=0 else 100, axis = 1)
seg

Unnamed: 0,id,SegID,startPoints,endPoints,PointID_start,geometry_start,PointID_end,geometry_end,geometry,betweenness,straight_len,len,curve
0,0,0,POINT (-77.180353 38.967728),POINT (-77.082863 38.902279),0,POINT (-77.180353 38.967728),1,POINT (-77.082863 38.902279),"LINESTRING (311104.061 4315456.299, 311123.016...",42425.0,11146.680303,13780.459010,1.236284
1,0,1,POINT (-77.117895 38.93696),POINT (-77.121719 38.939401),2,POINT (-77.117895 38.93696),3,POINT (-77.121719 38.939401),"LINESTRING (316436.259 4311913.75, 316427.223 ...",1.0,428.208835,429.894494,1.003937
2,0,2,POINT (-77.117725 38.92926),POINT (-77.118124 38.930734),4,POINT (-77.117725 38.92926),5,POINT (-77.118124 38.930734),"LINESTRING (316431.109 4311058.739, 316448.01 ...",42425.0,167.254968,264.402407,1.580834
3,0,3,POINT (-77.117895 38.93696),POINT (-77.11618 38.935177),2,POINT (-77.117895 38.93696),7,POINT (-77.11618 38.935177),"LINESTRING (316436.259 4311913.75, 316433.42 4...",127269.0,247.573166,274.374070,1.108254
4,0,4,POINT (-77.115779 38.935197),POINT (-77.117895 38.93696),8,POINT (-77.115779 38.935197),2,POINT (-77.117895 38.93696),"LINESTRING (316615.124 4311713.799, 316596.659...",42425.0,268.267702,268.442587,1.000652
...,...,...,...,...,...,...,...,...,...,...,...,...,...
62607,0,62607,POINT (-77.030862 38.882691),POINT (-77.030862 38.882691),125214,POINT (-77.030862 38.882691),125214,POINT (-77.030862 38.882691),"LINESTRING (323846.141 4305718.997, 323844.559...",0.0,0.000000,26.548273,100.000000
62608,0,62608,POINT (-77.023401 38.897861),POINT (-77.023401 38.897861),125216,POINT (-77.023401 38.897861),125216,POINT (-77.023401 38.897861),"LINESTRING (324530.733 4307388.196, 324532.703...",0.0,0.000000,7.955386,100.000000
62609,0,62609,POINT (-77.022906 38.898133),POINT (-77.022906 38.898133),125218,POINT (-77.022906 38.898133),125218,POINT (-77.022906 38.898133),"LINESTRING (324574.333 4307417.464, 324581.592...",0.0,0.000000,42.952525,100.000000
62610,0,62610,POINT (-77.022896 38.897678),POINT (-77.022896 38.897678),125220,POINT (-77.022896 38.897678),125220,POINT (-77.022896 38.897678),"LINESTRING (324574.106 4307366.936, 324577.836...",0.0,0.000000,15.853464,100.000000


In [7]:
#seg = seg.drop(columns = ['id'])
seg.to_file('seg_len_straight.geojson')

In [8]:
#read in accident data
bc = pd.read_csv(r'../bike_crash.csv')
bc = bc.rename(columns = {'Unnamed: 0': 'crashID'})
bc = gpd.GeoDataFrame(bc, geometry = gpd.points_from_xy(bc['LONGITUDE'], bc['LATITUDE']), crs="EPSG:4326")
bc = bc.to_crs(bc.estimate_utm_crs())
print(bc.crs)
bc.columns

EPSG:32618


Index(['crashID', 'X', 'Y', 'CRIMEID', 'CCN', 'REPORTDATE', 'ROUTEID',
       'MEASURE', 'OFFSET', 'STREETSEGID', 'ROADWAYSEGID', 'FROMDATE',
       'TODATE', 'ADDRESS', 'LATITUDE', 'LONGITUDE', 'XCOORD', 'YCOORD',
       'WARD', 'EVENTID', 'MAR_ADDRESS', 'MAR_SCORE',
       'MAJORINJURIES_BICYCLIST', 'MINORINJURIES_BICYCLIST',
       'UNKNOWNINJURIES_BICYCLIST', 'FATAL_BICYCLIST', 'MAJORINJURIES_DRIVER',
       'MINORINJURIES_DRIVER', 'UNKNOWNINJURIES_DRIVER', 'FATAL_DRIVER',
       'MAJORINJURIES_PEDESTRIAN', 'MINORINJURIES_PEDESTRIAN',
       'UNKNOWNINJURIES_PEDESTRIAN', 'FATAL_PEDESTRIAN', 'TOTAL_VEHICLES',
       'TOTAL_BICYCLES', 'TOTAL_PEDESTRIANS', 'PEDESTRIANSIMPAIRED',
       'BICYCLISTSIMPAIRED', 'DRIVERSIMPAIRED', 'TOTAL_TAXIS',
       'TOTAL_GOVERNMENT', 'SPEEDING_INVOLVED', 'NEARESTINTROUTEID',
       'NEARESTINTSTREETNAME', 'OFFINTERSECTION', 'INTAPPROACHDIRECTION',
       'LOCATIONERROR', 'LASTUPDATEDATE', 'MPDLATITUDE', 'MPDLONGITUDE',
       'MPDGEOX', 'MPDGEOY', 'FA

In [9]:
bc

Unnamed: 0,crashID,X,Y,CRIMEID,CCN,REPORTDATE,ROUTEID,MEASURE,OFFSET,STREETSEGID,...,BLOCKKEY,SUBBLOCKKEY,CORRIDORID,NEARESTINTKEY,MAJORINJURIESOTHER,MINORINJURIESOTHER,UNKNOWNINJURIESOTHER,FATALOTHER,OBJECTID,geometry
0,797,-8.574897e+06,4.708167e+06,23985330,12060521,2012/05/04 10:28:00+00,11057852,1740.48,1.26,12913.0,...,0763965ce77d9e2e74bb74fe9eda952a,0763965ce77d9e2e74bb74fe9eda952a,11057852_2,310d24ffe7fb95e13481d733ff2e387b,,,,,398453191,POINT (324010.556 4308265.657)
1,826,-8.575452e+06,4.707550e+06,23962926,11164839,2011/11/09 05:00:00+00,11001502,1460.99,0.00,7180.0,...,a0cb2b50563dec5cf3144088008c8f9c,e3432174f207e4a55946d264918d3835,12054472_2,5999548982c6caea51a926e8d2d45d7a,,,,,398453220,POINT (323568.689 4307795.481)
2,863,-8.574910e+06,4.712549e+06,23950462,12046793,2012/04/06 12:59:00+00,11001302,4411.76,0.59,1562.0,...,071c70f7960690181b8f2b342bae46ae,6eeb765f9d7ab2439116159ac2769440,11001302_2,5c7b7887dad0bb26baf4627ef0b78d01,,,,,398453257,POINT (324077.203 4311664.2)
3,1023,-8.574270e+06,4.708395e+06,24004749,12014404,2012/01/30 00:00:00+00,11063182,1250.67,0.16,7719.0,...,1892bfe4ffce6a3d6a8b017e63eeba05,8569fe7902405f35d32ba60631ee7cfc,11063182_1,12381b95b91bf587b88bfab3392ac9f2,,,,,398453417,POINT (324503.725 4308430.693)
4,1043,-8.579157e+06,4.710900e+06,24066694,12089083,2012/06/27 04:00:00+00,11059602,5578.49,0.03,5787.0,...,ade938f45d032b80c63f51092b735a08,7385e575bafe75a89b452aa225435014,11059602_5,f3c9dbd3fe2f2a146cfb2b640b9546a5,,,,,398453437,POINT (320740.858 4310459.23)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5043,338157,-8.577033e+06,4.709004e+06,66781477267,25158602,2025/10/18 05:58:00+00,11059602,3376.08,48.06,,...,99c4a74f5d3e546aa3f2154888a3c043,e3c66f122056b4954af64f9b2e9bab05,11059602_4,937ae69e3cb072d0145f9e3f49f7a826,0.0,0.0,0.0,0.0,398790551,POINT (322394.492 4308982.358)
5044,338168,-8.574494e+06,4.709817e+06,66783341759,25158810,2025/10/18 18:59:00+00,11001002,2524.72,25.96,,...,5e1dd81ecf76fc98a76d079dcf83e5f7,78edc316b2559bbda65092160de53343,11001002_4,f3292e567c431abdcaf382ec5bfe480b,0.0,0.0,0.0,0.0,398790562,POINT (324327.758 4309538.55)
5045,338207,-8.571040e+06,4.707871e+06,66751529117,25157863,2025/10/16 20:10:00+00,12000802,1529.38,19.62,,...,670a1fcc4ecdd5e6fc4880d03ceed114,670a1fcc4ecdd5e6fc4880d03ceed114,12000802_1,7a9d428ad79633cc21b7415597784e66,0.0,0.0,0.0,0.0,398790601,POINT (327046.28 4307967.941)
5046,338223,-8.570615e+06,4.702151e+06,66756674497,25157849,2025/10/16 20:40:00+00,13023142,153.52,13.06,,...,d0e6fdf4125257725d4d186a6fa1ba7d,6115c1598ee8818b3f7ba210ea2a0c67,13023142_1,3ab0e1e02855fb561b1f38c0e811c3f7,0.0,0.0,0.0,0.0,398790617,POINT (327245.387 4303522.811)


In [10]:
bc['sev'] = bc.apply(lambda x: 1 if x['MAJORINJURIES_BICYCLIST'] > 0 or x['MAJORINJURIES_BICYCLIST'] > 0 else 0, axis = 1)
bc_basic = bc[['crashID', 'REPORTDATE', 'LATITUDE', 'LONGITUDE','MAJORINJURIES_BICYCLIST', 'MINORINJURIES_BICYCLIST', 'UNKNOWNINJURIES_BICYCLIST','FATAL_BICYCLIST', 'BICYCLISTSIMPAIRED', 'sev', 'geometry']]
bc_basic

Unnamed: 0,crashID,REPORTDATE,LATITUDE,LONGITUDE,MAJORINJURIES_BICYCLIST,MINORINJURIES_BICYCLIST,UNKNOWNINJURIES_BICYCLIST,FATAL_BICYCLIST,BICYCLISTSIMPAIRED,sev,geometry
0,797,2012/05/04 10:28:00+00,38.905659,-77.029621,1,0,0,0,0,1,POINT (324010.556 4308265.657)
1,826,2011/11/09 05:00:00+00,38.901336,-77.034593,1,0,0,0,0,1,POINT (323568.689 4307795.481)
2,863,2012/04/06 12:59:00+00,38.936279,-77.029725,1,0,0,0,0,1,POINT (324077.203 4311664.2)
3,1023,2012/01/30 00:00:00+00,38.907244,-77.023979,0,1,0,0,0,0,POINT (324503.725 4308430.693)
4,1043,2012/06/27 04:00:00+00,38.924752,-77.067880,1,0,0,0,0,1,POINT (320740.858 4310459.23)
...,...,...,...,...,...,...,...,...,...,...,...
5043,338157,2025/10/18 05:58:00+00,38.911788,-77.048433,0,1,0,0,0,0,POINT (322394.492 4308982.358)
5044,338168,2025/10/18 18:59:00+00,38.917186,-77.026291,0,1,0,0,0,0,POINT (324327.758 4309538.55)
5045,338207,2025/10/16 20:10:00+00,38.903581,-76.994556,0,1,0,0,0,0,POINT (327046.28 4307967.941)
5046,338223,2025/10/16 20:40:00+00,38.863587,-76.991143,0,1,0,0,0,0,POINT (327245.387 4303522.811)


In [11]:
seg['seg_geom'] = seg['geometry']
bc_seg = gpd.sjoin_nearest(bc_basic, seg, how="left", distance_col="dist_to_road")
bc_seg['dist_end1'] = bc_seg.apply(lambda x: x['seg_geom'].project(x['geometry']), axis = 1)
bc_seg['dist_end2'] = bc_seg['len'] - bc_seg['dist_end1']
bc_seg['intersection_dist'] = bc_seg[['dist_end1', 'dist_end2']].min(axis=1)
bc_seg

Unnamed: 0,crashID,REPORTDATE,LATITUDE,LONGITUDE,MAJORINJURIES_BICYCLIST,MINORINJURIES_BICYCLIST,UNKNOWNINJURIES_BICYCLIST,FATAL_BICYCLIST,BICYCLISTSIMPAIRED,sev,...,geometry_end,betweenness,straight_len,len,curve,seg_geom,dist_to_road,dist_end1,dist_end2,intersection_dist
0,797,2012/05/04 10:28:00+00,38.905659,-77.029621,1,0,0,0,0,1,...,POINT (-77.029628 38.905282),681534.0,42.739471,42.761692,1.000520,"LINESTRING (324009.952 4308266.57, 324009.774 ...",0.622744,0.900429,41.861262,0.900429
1,826,2011/11/09 05:00:00+00,38.901336,-77.034593,1,0,0,0,0,1,...,POINT (-77.034593 38.901338),20323514.0,130.877235,132.081974,1.009205,"LINESTRING (323520.978 4307673.88, 323525.532 ...",0.008936,131.860131,0.221844,0.221844
2,863,2012/04/06 12:59:00+00,38.936279,-77.029725,1,0,0,0,0,1,...,POINT (-77.029725 38.936285),9582795.0,190.274909,190.277957,1.000016,"LINESTRING (324072.807 4311474.642, 324072.985...",0.014837,189.612050,0.665908,0.665908
3,1023,2012/01/30 00:00:00+00,38.907244,-77.023979,0,1,0,0,0,0,...,POINT (-77.024723 38.907253),93480.0,65.403541,65.413156,1.000147,"LINESTRING (324504.599 4308431.006, 324492.181...",0.332260,0.867673,64.545483,0.867673
4,1043,2012/06/27 04:00:00+00,38.924752,-77.067880,1,0,0,0,0,1,...,POINT (-77.068178 38.924968),30951.0,42.836291,42.809640,0.999378,"LINESTRING (320746.555 4310454.182, 320739.869...",0.174258,7.610027,35.199613,7.610027
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5043,338157,2025/10/18 05:58:00+00,38.911788,-77.048433,0,1,0,0,0,0,...,POINT (-77.04833 38.911847),17822.0,17.705917,17.833857,1.007226,"LINESTRING (322391.7 4308975.492, 322397.125 4...",3.063281,6.749387,11.084470,6.749387
5044,338168,2025/10/18 18:59:00+00,38.917186,-77.026291,0,1,0,0,0,0,...,POINT (-77.0265 38.917706),115332.0,62.333279,84.661151,1.358201,"LINESTRING (324353.871 4309551.539, 324346.374...",13.940403,25.619051,59.042100,25.619051
5045,338207,2025/10/16 20:10:00+00,38.903581,-76.994556,0,1,0,0,0,0,...,POINT (-76.994557 38.903725),1888.0,69.575137,71.352618,1.025548,"LINESTRING (327051.498 4307914.562, 327047.369...",0.088838,55.367103,15.985515,15.985515
5046,338223,2025/10/16 20:40:00+00,38.863587,-76.991143,0,1,0,0,0,0,...,POINT (-76.991016 38.863429),42425.0,66.555136,66.559685,1.000068,"LINESTRING (327222.643 4303562.614, 327236.464...",0.079268,45.844529,20.715156,20.715156


In [12]:
bc_seg['seg_geom'] = bc_seg['seg_geom'].to_wkt()
#bc_seg.to_file('bike_crash_seg_join.geojson')

## Get road characterization

In [58]:
highways = gpd.read_file(r'..\osm_highway_limited_wCols.geojson')
highways

Unnamed: 0,osm_id,name,highway,waterway,aerialway,barrier,man_made,z_order,other_tags,"""HFCS""",...,"""cycleway:right:lane""","""cycleway:both""","""parking:lane:right""","""cycleway:both:lane""","""cycleway:left:lane""","""cycleway:left:oneway""","""cycleway:oneway""","""cycleway:separation""","""surface""",geometry
0,4681186,,service,,,,,0,,,...,,,,,,,,,,"LINESTRING (-77.10826 38.93574, -77.10826 38.9..."
1,4725152,Texas Avenue Southeast,secondary,,,,,6,"""HFCS""=>""Collector"",""lanes""=>""1"",""name:etymolo...","""Collector""",...,,,,,,,,,,"LINESTRING (-76.93866 38.88917, -76.93838 38.8..."
2,5967442,Allan Road,residential,,,,,3,"""lcn""=>""yes"",""maxspeed""=>""20 mph"",""sidewalk:bo...",,...,,,,,,,,,"""asphalt""","LINESTRING (-77.10231 38.95533, -77.10227 38.9..."
3,5968135,Summerfield Road,residential,,,,,3,"""lane_markings""=>""no"",""surface""=>""asphalt""",,...,,,,,,,,,"""asphalt""","LINESTRING (-77.07191 38.97399, -77.07156 38.9..."
4,5968393,Albemarle Street,residential,,,,,3,"""maxspeed""=>""20 mph"",""sidewalk""=>""no"",""surface...",,...,,,,,,,,,"""asphalt""","LINESTRING (-77.10253 38.9479, -77.10264 38.94..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35839,1445836671,,service,,,,,0,"""access""=>""private""",,...,,,,,,,,,,"LINESTRING (-77.01518 38.90502, -77.01525 38.9..."
35840,1445836672,New York Avenue Northwest,trunk,,,,,8,"""HFCS""=>""Principal Arterial"",""NHS""=>""yes"",""des...","""Principal Arterial""",...,,,,,,,,,"""asphalt""","LINESTRING (-77.01586 38.905, -77.01602 38.904..."
35841,1445943511,,service,,,,,0,,,...,,,,,,,,,,"LINESTRING (-77.0239 38.91957, -77.02382 38.91..."
35842,1445943512,,service,,,,,0,,,...,,,,,,,,,,"LINESTRING (-77.02371 38.91957, -77.02371 38.9..."


In [14]:
highways.columns

Index(['osm_id', 'name', 'highway', 'waterway', 'aerialway', 'barrier',
       'man_made', 'z_order', 'other_tags', '"HFCS"', '"lanes"', '"oneway"',
       '"maxspeed"', '"bicycle"', '"NHS"', '"expressway"',
       '"oneway:conditional"', '"parking:left"', '"parking:right"',
       '"parking:right:orientation"', '"source:width"', '"cycleway"',
       '"shoulder"', '"cycleway:right"', '"cycleway:right:buffer"',
       '"parking:both"', '"parking:both:orientation"', '"access"',
       '"motor_vehicle"', '"foot"', '"turn:lanes"', '"bridge"', '"tunnel"',
       '"bridge:name"', '"lanes:backward"', '"lanes:forward"',
       '"turn:lanes:backward"', '"turn:lanes:forward"', '"cycleway:left"',
       '"cycleway:right:lane"', '"cycleway:both"', '"parking:lane:right"',
       '"cycleway:both:lane"', '"cycleway:left:lane"',
       '"cycleway:left:oneway"', '"cycleway:oneway"', '"cycleway:separation"',
       '"surface"', 'geometry'],
      dtype='object')

In [59]:
#get max speedlimit
highways['"maxspeed"'].value_counts()
highways['maxspeed'] = highways['"maxspeed"'].str.replace(' mph', '')
highways['maxspeed'] = highways['maxspeed'].str.replace('"', '')
highways['maxspeed'] = highways['maxspeed'].astype(float)
highways['maxspeed'] = highways['maxspeed'].apply(lambda x: x if x%5 == 0 else np.nan) #going to assume 
highways['maxspeed'].value_counts()

maxspeed
25.0    2147
15.0     666
30.0     647
20.0     344
40.0     222
35.0     174
50.0     122
45.0     103
10.0      84
5.0       55
55.0      28
65.0       9
Name: count, dtype: int64

In [60]:
#lanes
highways = highways.rename(columns = {'"lanes"':'lanes'})
highways['lanes'] = highways['lanes'].str.replace('"', '').astype('Int64')
highways['lanes'].value_counts()

lanes
2    4129
3    1898
1    1854
4    1366
5     263
6     257
7      51
8      49
Name: count, dtype: Int64

In [61]:
highways['highway'].value_counts()

highway
service           17454
residential        5211
primary            2971
secondary          2162
cycleway           2054
tertiary           1979
footway             813
path                600
motorway            527
motorway_link       468
unclassified        449
trunk               418
primary_link        330
secondary_link       83
pedestrian           59
trunk_link           53
tertiary_link        52
living_street        51
track                44
corridor             33
construction         26
elevator              6
services              1
Name: count, dtype: int64

In [62]:
#simplify road types
types_categories = {
'service':'service',
'residential':'residential',
'primary':'primary',
'secondary':'secondary',
'cycleway':'noMotors',
'tertiary':'tertiary',
'footway':'noMotors',
'path':'noMotors',
'motorway':'motorway',
'motorway_link':'motorway',
'unclassified':'other',
'trunk':'motorway',
'primary_link':'primary',
'secondary_link':'secondary',
'pedestrian':'noMotors',
'trunk_link':'motorway',
'tertiary_link':'tertiary',
'living_street':'residential',
'track':'other',
'corridor':'other',
'construction':'other',
'elevator':'other',
'services':'service'}
highways['types'] = highways['highway'].apply(lambda x: types_categories[x])
highways['types'].value_counts()

types
service        17455
residential     5262
noMotors        3526
primary         3301
secondary       2245
tertiary        2031
motorway        1466
other            558
Name: count, dtype: int64

In [63]:
highways['"bicycle"'].value_counts()

"bicycle"
"yes"                  1824
"designated"           1076
"no"                    485
"use_sidepath"           17
"optional_sidepath"      15
"permissive"             13
"private"                11
"dismount"                8
"destination"             3
Name: count, dtype: int64

In [64]:
highways['"cycleway"'].value_counts()

"cycleway"
"crossing"          715
"separate"          302
"shared_lane"       282
"lane"              104
"no"                 52
"share_busway"       21
"track"              20
"traffic_island"     12
"sidewalk"            2
"shoulder"            1
"link"                1
"buffered_lane"       1
Name: count, dtype: int64

In [65]:
#bicycle -> yes ("yes", "designated", "use_sidepath", 'Optional_sidepath", "permissive"), no ("no", "dismount"), anything else- unclear
#cycleway -> yes ("crossing", "separate", "shared_lane", "lane", "share_busway", "track", "traffic_island"), no ("no"), anything else- unclear
#if either yes and unclear then yes, if yes and no then unclear, if both unclear then unclear
highways['bike_intermediate'] = 'unclear'
group = highways['"bicycle"'].isin(['"yes"', '"designated"', '"use_sidepath"', '"Optional_sidepath"', '"permissive"'])
highways.loc[group, 'bike_intermediate'] = 'yes'
group = highways['"bicycle"'].isin(['"no"', '"dismount"'])
highways.loc[group, 'bike_intermediate'] = 'no'

highways['cycleway_intermediate'] = 'unclear'
group = highways['"cycleway"'].isin(['"crossing"', '"separate"', '"shared_lane"', '"lane"', '"share_busway"', '"track"', '"traffic_island"'])
highways.loc[group, 'bike_intermediate'] = 'yes'
group = highways['"cycleway"'].isin(['"no"'])
highways.loc[group, 'cycleway_intermediate'] = 'no'
highways['cycle_summary'] = 'unclear'
group = ((highways['bike_intermediate'] == 'yes') & (highways['cycleway_intermediate'] == 'yes')) | ((highways['bike_intermediate'] == 'yes') & (highways['cycleway_intermediate'] == 'unclear')) | ((highways['bike_intermediate'] == 'unclear') & (highways['cycleway_intermediate'] == 'yes'))
highways.loc[group, 'cycle_summary'] = 'yes'
group = ((highways['bike_intermediate'] == 'no') & (highways['cycleway_intermediate'] == 'no')) | ((highways['bike_intermediate'] == 'no') & (highways['cycleway_intermediate'] == 'unclear')) | ((highways['bike_intermediate'] == 'unclear') & (highways['cycleway_intermediate'] == 'no'))
highways.loc[group, 'cycle_summary'] = 'no'
sum(group)

503

In [66]:
highways

Unnamed: 0,osm_id,name,highway,waterway,aerialway,barrier,man_made,z_order,other_tags,"""HFCS""",...,"""cycleway:left:oneway""","""cycleway:oneway""","""cycleway:separation""","""surface""",geometry,maxspeed,types,bike_intermediate,cycleway_intermediate,cycle_summary
0,4681186,,service,,,,,0,,,...,,,,,"LINESTRING (-77.10826 38.93574, -77.10826 38.9...",,service,unclear,unclear,unclear
1,4725152,Texas Avenue Southeast,secondary,,,,,6,"""HFCS""=>""Collector"",""lanes""=>""1"",""name:etymolo...","""Collector""",...,,,,,"LINESTRING (-76.93866 38.88917, -76.93838 38.8...",,secondary,unclear,unclear,unclear
2,5967442,Allan Road,residential,,,,,3,"""lcn""=>""yes"",""maxspeed""=>""20 mph"",""sidewalk:bo...",,...,,,,"""asphalt""","LINESTRING (-77.10231 38.95533, -77.10227 38.9...",20.0,residential,unclear,unclear,unclear
3,5968135,Summerfield Road,residential,,,,,3,"""lane_markings""=>""no"",""surface""=>""asphalt""",,...,,,,"""asphalt""","LINESTRING (-77.07191 38.97399, -77.07156 38.9...",,residential,unclear,unclear,unclear
4,5968393,Albemarle Street,residential,,,,,3,"""maxspeed""=>""20 mph"",""sidewalk""=>""no"",""surface...",,...,,,,"""asphalt""","LINESTRING (-77.10253 38.9479, -77.10264 38.94...",20.0,residential,unclear,unclear,unclear
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35839,1445836671,,service,,,,,0,"""access""=>""private""",,...,,,,,"LINESTRING (-77.01518 38.90502, -77.01525 38.9...",,service,unclear,unclear,unclear
35840,1445836672,New York Avenue Northwest,trunk,,,,,8,"""HFCS""=>""Principal Arterial"",""NHS""=>""yes"",""des...","""Principal Arterial""",...,,,,"""asphalt""","LINESTRING (-77.01586 38.905, -77.01602 38.904...",,motorway,unclear,unclear,unclear
35841,1445943511,,service,,,,,0,,,...,,,,,"LINESTRING (-77.0239 38.91957, -77.02382 38.91...",,service,unclear,unclear,unclear
35842,1445943512,,service,,,,,0,,,...,,,,,"LINESTRING (-77.02371 38.91957, -77.02371 38.9...",,service,unclear,unclear,unclear


In [67]:
highways['cycle_summary'].value_counts()

cycle_summary
unclear    31312
yes         4029
no           503
Name: count, dtype: int64

In [68]:
highways.to_file('../highways_wInfo.geojson')

In [101]:
cols_toCheck = [ '"expressway"',
       '"oneway:conditional"', '"parking:left"', '"parking:right"',
       '"parking:right:orientation"', '"source:width"', '"cycleway"',
       '"shoulder"', '"cycleway:right"', '"cycleway:right:buffer"',
       '"parking:both"', '"parking:both:orientation"', '"access"',
       '"motor_vehicle"', '"foot"', '"turn:lanes"', '"bridge"', '"tunnel"',
       '"bridge:name"', '"lanes:backward"', '"lanes:forward"',
       '"turn:lanes:backward"', '"turn:lanes:forward"', '"cycleway:left"',
       '"cycleway:right:lane"', '"cycleway:both"', '"parking:lane:right"',
       '"cycleway:both:lane"', '"cycleway:left:lane"',
       '"cycleway:left:oneway"', '"cycleway:oneway"', '"cycleway:separation"',
       '"surface"']
for c in cols_toCheck:
    print('----'+c+'------')
    print(highways[c].value_counts())

----"expressway"------
"expressway"
"yes"    104
Name: count, dtype: int64
----"oneway:conditional"------
"oneway:conditional"
"-1 @ (Mo-Fr 06:45-09:30)"                                               25
"-1 @ (Mo-Fr 15:45-18:30)"                                               23
"-1 @ (Mo-Fr 06:45-09:30); yes @ (Mo-Fr 15:45-18:30)"                    18
"-1 @ (Mo-Fr 06:45-09:30; PH off)"                                        4
"-1 @ (Mo-Fr 06:15-10:00); yes @ (Mo-Fr 14:45-19:15)"                     3
"yes @ (Mo-Fr 07:00-09:00                                                 3
"yes @ (Mo-Fr 14:45-19:15; Ph off); -1 @ (Mo-Fr 06:00-10:15; PH off)"     3
"-1 @ (Mo-Fr 15:45-18:30; PH off)"                                        3
"-1 @ (Mo-Fr 06:45-09:30; PH off); yes @ (Mo-Fr 15:45-18:30; PH off)"     2
"yes @ (Mo-Fr 07:00-09:30; PH off)"                                       2
"yes@(Mo-Fr 07:00-09:30; PH off)"                                         2
"yes@(Mo-Fr 06:00-09:30)"            

In [78]:
highways = highways.to_crs(highways.estimate_utm_crs())
highways.crs

<Projected CRS: EPSG:32618>
Name: WGS 84 / UTM zone 18N
Axis Info [cartesian]:
- E[east]: Easting (metre)
- N[north]: Northing (metre)
Area of Use:
- name: Between 78°W and 72°W, northern hemisphere between equator and 84°N, onshore and offshore. Bahamas. Canada - Nunavut; Ontario; Quebec. Colombia. Cuba. Ecuador. Greenland. Haiti. Jamaica. Panama. Turks and Caicos Islands. United States (USA). Venezuela.
- bounds: (-78.0, 0.0, -72.0, 84.0)
Coordinate Operation:
- name: UTM zone 18N
- method: Transverse Mercator
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [79]:
bc_seg = bc_seg.rename(columns = {'index_right':'index_right_prev'})
bc_seg.geometry

0       POINT (324010.556 4308265.657)
1       POINT (323568.689 4307795.481)
2         POINT (324077.203 4311664.2)
3       POINT (324503.725 4308430.693)
4        POINT (320740.858 4310459.23)
                     ...              
5043    POINT (322394.492 4308982.358)
5044     POINT (324327.758 4309538.55)
5045     POINT (327046.28 4307967.941)
5046    POINT (327245.387 4303522.811)
5047     POINT (324007.714 4310888.47)
Name: geometry, Length: 5257, dtype: geometry

In [80]:
highways.geometry

0        LINESTRING (317268.241 4311758.715, 317268.877...
1        LINESTRING (331859.455 4306264.199, 331884.42 ...
2        LINESTRING (317834.217 4313920.79, 317837.339 ...
3        LINESTRING (320516.127 4315931.437, 320545.243...
4        LINESTRING (317796.198 4313096.991, 317786.872...
                               ...                        
35839    LINESTRING (325261.101 4308166.848, 325255.041...
35840    LINESTRING (325202.122 4308165.708, 325188.225...
35841    LINESTRING (324541.275 4309798.713, 324548.266...
35842    LINESTRING (324557.225 4309798.592, 324556.255...
35843    LINESTRING (319209.033 4308610.227, 319208.873...
Name: geometry, Length: 35844, dtype: geometry

In [81]:
#connect to accident data
bc_seg_osm = gpd.sjoin_nearest(bc_seg, highways, how="left", distance_col="dist_to_osm")
bc_seg_osm

Unnamed: 0,crashID,REPORTDATE,LATITUDE,LONGITUDE,MAJORINJURIES_BICYCLIST,MINORINJURIES_BICYCLIST,UNKNOWNINJURIES_BICYCLIST,FATAL_BICYCLIST,BICYCLISTSIMPAIRED,sev,...,"""cycleway:left:oneway""","""cycleway:oneway""","""cycleway:separation""","""surface""",maxspeed,types,bike_intermediate,cycleway_intermediate,cycle_summary,dist_to_osm
0,797,2012/05/04 10:28:00+00,38.905659,-77.029621,1,0,0,0,0,1,...,,,,"""asphalt""",,secondary,unclear,unclear,unclear,0.622744
1,826,2011/11/09 05:00:00+00,38.901336,-77.034593,1,0,0,0,0,1,...,,,,"""asphalt""",,tertiary,yes,unclear,yes,0.008936
2,863,2012/04/06 12:59:00+00,38.936279,-77.029725,1,0,0,0,0,1,...,,,,"""asphalt""",25.0,tertiary,unclear,unclear,unclear,0.014837
3,1023,2012/01/30 00:00:00+00,38.907244,-77.023979,0,1,0,0,0,0,...,,,,"""asphalt""",,residential,unclear,unclear,unclear,0.332260
4,1043,2012/06/27 04:00:00+00,38.924752,-77.067880,1,0,0,0,0,1,...,,,,"""asphalt""",30.0,primary,unclear,unclear,unclear,0.174258
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5043,338157,2025/10/18 05:58:00+00,38.911788,-77.048433,0,1,0,0,0,0,...,,,,"""asphalt""",25.0,primary,unclear,unclear,unclear,3.063281
5044,338168,2025/10/18 18:59:00+00,38.917186,-77.026291,0,1,0,0,0,0,...,,,,,,service,unclear,unclear,unclear,13.940403
5045,338207,2025/10/16 20:10:00+00,38.903581,-76.994556,0,1,0,0,0,0,...,,,,"""asphalt""",,service,unclear,unclear,unclear,0.088838
5046,338223,2025/10/16 20:40:00+00,38.863587,-76.991143,0,1,0,0,0,0,...,,,,"""asphalt""",,residential,unclear,unclear,unclear,0.079268


In [82]:
bc_seg_osm[['dist_to_road', 'dist_to_osm']]

Unnamed: 0,dist_to_road,dist_to_osm
0,0.622744,0.622744
1,0.008936,0.008936
2,0.014837,0.014837
3,0.332260,0.332260
4,0.174258,0.174258
...,...,...
5043,3.063281,3.063281
5044,13.940403,13.940403
5045,0.088838,0.088838
5046,0.079268,0.079268


In [83]:
bc_seg_osm.to_file('../accidents_segments_osm.geojson')

## Finally, merge the segment and osm data together again cause that seems like something that would be useful
I think there are some osm roads that end 'mid-segment' but I'm just going to ignore that problem for now.

In [85]:
seg = gpd.read_file('seg_len_straight.geojson')
seg

  return ogr_read(


Unnamed: 0,id,SegID,startPoints,endPoints,PointID_start,geometry_start,PointID_end,geometry_end,betweenness,straight_len,len,curve,geometry
0,0,0,POINT (-77.180353 38.967728),POINT (-77.082863 38.902279),0,POINT (-77.180353 38.967728),1,POINT (-77.082863 38.902279),42425.0,11146.680303,13780.459010,1.236284,"LINESTRING (311104.061 4315456.299, 311123.016..."
1,0,1,POINT (-77.117895 38.93696),POINT (-77.121719 38.939401),2,POINT (-77.117895 38.93696),3,POINT (-77.121719 38.939401),1.0,428.208835,429.894494,1.003937,"LINESTRING (316436.259 4311913.75, 316427.223 ..."
2,0,2,POINT (-77.117725 38.92926),POINT (-77.118124 38.930734),4,POINT (-77.117725 38.92926),5,POINT (-77.118124 38.930734),42425.0,167.254968,264.402407,1.580834,"LINESTRING (316431.109 4311058.739, 316448.01 ..."
3,0,3,POINT (-77.117895 38.93696),POINT (-77.11618 38.935177),2,POINT (-77.117895 38.93696),7,POINT (-77.11618 38.935177),127269.0,247.573166,274.374070,1.108254,"LINESTRING (316436.259 4311913.75, 316433.42 4..."
4,0,4,POINT (-77.115779 38.935197),POINT (-77.117895 38.93696),8,POINT (-77.115779 38.935197),2,POINT (-77.117895 38.93696),42425.0,268.267702,268.442587,1.000652,"LINESTRING (316615.124 4311713.799, 316596.659..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
62607,0,62607,POINT (-77.030862 38.882691),POINT (-77.030862 38.882691),125214,POINT (-77.030862 38.882691),125214,POINT (-77.030862 38.882691),0.0,0.000000,26.548273,100.000000,"LINESTRING (323846.141 4305718.997, 323844.559..."
62608,0,62608,POINT (-77.023401 38.897861),POINT (-77.023401 38.897861),125216,POINT (-77.023401 38.897861),125216,POINT (-77.023401 38.897861),0.0,0.000000,7.955386,100.000000,"LINESTRING (324530.733 4307388.196, 324532.703..."
62609,0,62609,POINT (-77.022906 38.898133),POINT (-77.022906 38.898133),125218,POINT (-77.022906 38.898133),125218,POINT (-77.022906 38.898133),0.0,0.000000,42.952525,100.000000,"LINESTRING (324574.333 4307417.464, 324581.592..."
62610,0,62610,POINT (-77.022896 38.897678),POINT (-77.022896 38.897678),125220,POINT (-77.022896 38.897678),125220,POINT (-77.022896 38.897678),0.0,0.000000,15.853464,100.000000,"LINESTRING (324574.106 4307366.936, 324577.836..."


In [94]:
highways['ind'] = highways.index
highways['geoms'] = highways.geometry
highways

Unnamed: 0,osm_id,name,highway,waterway,aerialway,barrier,man_made,z_order,other_tags,"""HFCS""",...,"""cycleway:separation""","""surface""",geometry,maxspeed,types,bike_intermediate,cycleway_intermediate,cycle_summary,ind,geoms
0,4681186,,service,,,,,0,,,...,,,"LINESTRING (317268.241 4311758.715, 317268.877...",,service,unclear,unclear,unclear,0,"LINESTRING (317268.241 4311758.715, 317268.877..."
1,4725152,Texas Avenue Southeast,secondary,,,,,6,"""HFCS""=>""Collector"",""lanes""=>""1"",""name:etymolo...","""Collector""",...,,,"LINESTRING (331859.455 4306264.199, 331884.42 ...",,secondary,unclear,unclear,unclear,1,"LINESTRING (331859.455 4306264.199, 331884.42 ..."
2,5967442,Allan Road,residential,,,,,3,"""lcn""=>""yes"",""maxspeed""=>""20 mph"",""sidewalk:bo...",,...,,"""asphalt""","LINESTRING (317834.217 4313920.79, 317837.339 ...",20.0,residential,unclear,unclear,unclear,2,"LINESTRING (317834.217 4313920.79, 317837.339 ..."
3,5968135,Summerfield Road,residential,,,,,3,"""lane_markings""=>""no"",""surface""=>""asphalt""",,...,,"""asphalt""","LINESTRING (320516.127 4315931.437, 320545.243...",,residential,unclear,unclear,unclear,3,"LINESTRING (320516.127 4315931.437, 320545.243..."
4,5968393,Albemarle Street,residential,,,,,3,"""maxspeed""=>""20 mph"",""sidewalk""=>""no"",""surface...",,...,,"""asphalt""","LINESTRING (317796.198 4313096.991, 317786.872...",20.0,residential,unclear,unclear,unclear,4,"LINESTRING (317796.198 4313096.991, 317786.872..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35839,1445836671,,service,,,,,0,"""access""=>""private""",,...,,,"LINESTRING (325261.101 4308166.848, 325255.041...",,service,unclear,unclear,unclear,35839,"LINESTRING (325261.101 4308166.848, 325255.041..."
35840,1445836672,New York Avenue Northwest,trunk,,,,,8,"""HFCS""=>""Principal Arterial"",""NHS""=>""yes"",""des...","""Principal Arterial""",...,,"""asphalt""","LINESTRING (325202.122 4308165.708, 325188.225...",,motorway,unclear,unclear,unclear,35840,"LINESTRING (325202.122 4308165.708, 325188.225..."
35841,1445943511,,service,,,,,0,,,...,,,"LINESTRING (324541.275 4309798.713, 324548.266...",,service,unclear,unclear,unclear,35841,"LINESTRING (324541.275 4309798.713, 324548.266..."
35842,1445943512,,service,,,,,0,,,...,,,"LINESTRING (324557.225 4309798.592, 324556.255...",,service,unclear,unclear,unclear,35842,"LINESTRING (324557.225 4309798.592, 324556.255..."


In [101]:
seg_osm_within = gpd.sjoin(seg, highways, how="inner", predicate = 'within')
seg_osm_within

Unnamed: 0,id,SegID,startPoints,endPoints,PointID_start,geometry_start,PointID_end,geometry_end,betweenness,straight_len,...,"""cycleway:oneway""","""cycleway:separation""","""surface""",maxspeed,types,bike_intermediate,cycleway_intermediate,cycle_summary,ind,geoms
0,0,0,POINT (-77.180353 38.967728),POINT (-77.082863 38.902279),0,POINT (-77.180353 38.967728),1,POINT (-77.082863 38.902279),42425.0,11146.680303,...,,,"""ground""",,noMotors,no,unclear,no,8633,"LINESTRING (311104.061 4315456.299, 311123.016..."
1,0,1,POINT (-77.117895 38.93696),POINT (-77.121719 38.939401),2,POINT (-77.117895 38.93696),3,POINT (-77.121719 38.939401),1.0,428.208835,...,,,"""dirt""",,noMotors,unclear,unclear,unclear,8961,"LINESTRING (316495.623 4311848.33, 316440.295 ..."
3,0,3,POINT (-77.117895 38.93696),POINT (-77.11618 38.935177),2,POINT (-77.117895 38.93696),7,POINT (-77.11618 38.935177),127269.0,247.573166,...,,,"""dirt""",,noMotors,yes,unclear,yes,21620,"LINESTRING (316436.259 4311913.75, 316433.42 4..."
5,0,5,POINT (-77.117824 38.929153),POINT (-77.116895 38.929534),10,POINT (-77.117824 38.929153),11,POINT (-77.116895 38.929534),42425.0,90.986172,...,,,"""concrete""",,noMotors,yes,unclear,yes,4933,"LINESTRING (316422.322 4311047.139, 316433.923..."
8,0,8,POINT (-77.115779 38.935197),POINT (-77.117277 38.933006),8,POINT (-77.115779 38.935197),17,POINT (-77.117277 38.933006),35.0,275.743755,...,,,"""unpaved""",,other,yes,unclear,yes,21621,"LINESTRING (316615.124 4311713.799, 316616.606..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62607,0,62607,POINT (-77.030862 38.882691),POINT (-77.030862 38.882691),125214,POINT (-77.030862 38.882691),125214,POINT (-77.030862 38.882691),0.0,0.000000,...,,,"""concrete""",,other,unclear,unclear,unclear,33782,"LINESTRING (323846.141 4305718.997, 323844.559..."
62608,0,62608,POINT (-77.023401 38.897861),POINT (-77.023401 38.897861),125216,POINT (-77.023401 38.897861),125216,POINT (-77.023401 38.897861),0.0,0.000000,...,,,,,other,unclear,unclear,unclear,31220,"LINESTRING (324530.733 4307388.196, 324532.703..."
62609,0,62609,POINT (-77.022906 38.898133),POINT (-77.022906 38.898133),125218,POINT (-77.022906 38.898133),125218,POINT (-77.022906 38.898133),0.0,0.000000,...,,,,,other,unclear,unclear,unclear,31219,"LINESTRING (324574.333 4307417.464, 324581.592..."
62610,0,62610,POINT (-77.022896 38.897678),POINT (-77.022896 38.897678),125220,POINT (-77.022896 38.897678),125220,POINT (-77.022896 38.897678),0.0,0.000000,...,,,,,other,unclear,unclear,unclear,31221,"LINESTRING (324574.106 4307366.936, 324577.836..."


In [102]:
seg_osm_contains = gpd.sjoin(seg, highways, how="inner", predicate = 'contains')
seg_osm_contains

Unnamed: 0,id,SegID,startPoints,endPoints,PointID_start,geometry_start,PointID_end,geometry_end,betweenness,straight_len,...,"""cycleway:oneway""","""cycleway:separation""","""surface""",maxspeed,types,bike_intermediate,cycleway_intermediate,cycle_summary,ind,geoms
0,0,0,POINT (-77.180353 38.967728),POINT (-77.082863 38.902279),0,POINT (-77.180353 38.967728),1,POINT (-77.082863 38.902279),42425.0,11146.680303,...,,,"""ground""",,noMotors,no,unclear,no,8633,"LINESTRING (311104.061 4315456.299, 311123.016..."
2,0,2,POINT (-77.117725 38.92926),POINT (-77.118124 38.930734),4,POINT (-77.117725 38.92926),5,POINT (-77.118124 38.930734),42425.0,167.254968,...,,,,,service,unclear,unclear,unclear,22263,"LINESTRING (316431.109 4311058.739, 316448.01 ..."
2,0,2,POINT (-77.117725 38.92926),POINT (-77.118124 38.930734),4,POINT (-77.117725 38.92926),5,POINT (-77.118124 38.930734),42425.0,167.254968,...,,,,,service,unclear,unclear,unclear,22478,"LINESTRING (316445.094 4311151.081, 316444.642..."
3,0,3,POINT (-77.117895 38.93696),POINT (-77.11618 38.935177),2,POINT (-77.117895 38.93696),7,POINT (-77.11618 38.935177),127269.0,247.573166,...,,,"""dirt""",,noMotors,yes,unclear,yes,21620,"LINESTRING (316436.259 4311913.75, 316433.42 4..."
6,0,6,POINT (-77.115183 38.935118),POINT (-77.117686 38.937527),12,POINT (-77.115183 38.935118),13,POINT (-77.117686 38.937527),42425.0,344.415068,...,,,,35.0,primary,no,unclear,no,4777,"LINESTRING (316540.695 4311900.207, 316518.421..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62607,0,62607,POINT (-77.030862 38.882691),POINT (-77.030862 38.882691),125214,POINT (-77.030862 38.882691),125214,POINT (-77.030862 38.882691),0.0,0.000000,...,,,"""concrete""",,other,unclear,unclear,unclear,33782,"LINESTRING (323846.141 4305718.997, 323844.559..."
62608,0,62608,POINT (-77.023401 38.897861),POINT (-77.023401 38.897861),125216,POINT (-77.023401 38.897861),125216,POINT (-77.023401 38.897861),0.0,0.000000,...,,,,,other,unclear,unclear,unclear,31220,"LINESTRING (324530.733 4307388.196, 324532.703..."
62609,0,62609,POINT (-77.022906 38.898133),POINT (-77.022906 38.898133),125218,POINT (-77.022906 38.898133),125218,POINT (-77.022906 38.898133),0.0,0.000000,...,,,,,other,unclear,unclear,unclear,31219,"LINESTRING (324574.333 4307417.464, 324581.592..."
62610,0,62610,POINT (-77.022896 38.897678),POINT (-77.022896 38.897678),125220,POINT (-77.022896 38.897678),125220,POINT (-77.022896 38.897678),0.0,0.000000,...,,,,,other,unclear,unclear,unclear,31221,"LINESTRING (324574.106 4307366.936, 324577.836..."


In [103]:
seg_osm_overlaps = gpd.sjoin(seg, highways, how="inner", predicate = 'overlaps')
seg_osm_overlaps

Unnamed: 0,id,SegID,startPoints,endPoints,PointID_start,geometry_start,PointID_end,geometry_end,betweenness,straight_len,...,"""cycleway:oneway""","""cycleway:separation""","""surface""",maxspeed,types,bike_intermediate,cycleway_intermediate,cycle_summary,ind,geoms
4,0,4,POINT (-77.115779 38.935197),POINT (-77.117895 38.93696),8,POINT (-77.115779 38.935197),2,POINT (-77.117895 38.93696),42425.0,268.267702,...,,,"""dirt""",,noMotors,yes,unclear,yes,7915,"LINESTRING (317037.765 4310854.601, 317025.576..."
4,0,4,POINT (-77.115779 38.935197),POINT (-77.117895 38.93696),8,POINT (-77.115779 38.935197),2,POINT (-77.117895 38.93696),42425.0,268.267702,...,,,"""dirt""",,noMotors,unclear,unclear,unclear,8961,"LINESTRING (316495.623 4311848.33, 316440.295 ..."
6,0,6,POINT (-77.115183 38.935118),POINT (-77.117686 38.937527),12,POINT (-77.115183 38.935118),13,POINT (-77.117686 38.937527),42425.0,344.415068,...,,,,35.0,primary,no,unclear,no,11,"LINESTRING (316905.755 4311245.64, 316898.769 ..."
7,0,7,POINT (-77.116872 38.929478),POINT (-77.117647 38.929133),14,POINT (-77.116872 38.929478),15,POINT (-77.117647 38.929133),42425.0,77.351118,...,,,"""concrete""",25.0,primary,unclear,unclear,unclear,30848,"LINESTRING (316523.319 4311086.357, 316518.106..."
9,0,9,POINT (-77.116895 38.929534),POINT (-77.112435 38.930298),11,POINT (-77.116895 38.929534),19,POINT (-77.112435 38.930298),42425.0,395.931209,...,,,"""concrete""",,noMotors,yes,unclear,yes,4933,"LINESTRING (316422.322 4311047.139, 316433.923..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62468,0,62468,POINT (-76.915032 38.891887),POINT (-76.91503 38.892883),124745,POINT (-76.915032 38.891887),124813,POINT (-76.91503 38.892883),42425.0,110.562840,...,,,,,noMotors,unclear,unclear,unclear,8808,"LINESTRING (333915.319 4306551.162, 333915.203..."
62468,0,62468,POINT (-76.915032 38.891887),POINT (-76.91503 38.892883),124745,POINT (-76.915032 38.891887),124813,POINT (-76.91503 38.892883),42425.0,110.562840,...,,,"""asphalt""",,residential,unclear,unclear,unclear,1234,"LINESTRING (333915.319 4306551.162, 333916.386..."
62582,0,62582,POINT (-76.910936 38.891962),POINT (-76.910931 38.89243),125115,POINT (-76.910936 38.891962),125119,POINT (-76.910931 38.89243),10885.0,51.952884,...,,,"""asphalt""",,primary,unclear,unclear,unclear,6289,"LINESTRING (334272.114 4306569.727, 334272.321..."
62600,0,62600,POINT (-76.910557 38.891886),POINT (-76.910524 38.891959),125149,POINT (-76.910557 38.891886),125162,POINT (-76.910524 38.891959),127360.0,8.594312,...,,,"""asphalt""",,residential,unclear,unclear,unclear,9394,"LINESTRING (334270.801 4306523.211, 334280.515..."


In [121]:
#bad heuristic
combined_gdf = pd.concat([seg_osm_within, seg_osm_contains, seg_osm_overlaps], ignore_index=True)
combined_gdf['geometry'] = combined_gdf['geometry'].normalize()
final_gdf = combined_gdf.drop_duplicates(subset = ['SegID'])
final_gdf

Unnamed: 0,id,SegID,startPoints,endPoints,PointID_start,geometry_start,PointID_end,geometry_end,betweenness,straight_len,...,"""cycleway:oneway""","""cycleway:separation""","""surface""",maxspeed,types,bike_intermediate,cycleway_intermediate,cycle_summary,ind,geoms
0,0,0,POINT (-77.180353 38.967728),POINT (-77.082863 38.902279),0,POINT (-77.180353 38.967728),1,POINT (-77.082863 38.902279),42425.0,11146.680303,...,,,"""ground""",,noMotors,no,unclear,no,8633,"LINESTRING (311104.061 4315456.299, 311123.016..."
1,0,1,POINT (-77.117895 38.93696),POINT (-77.121719 38.939401),2,POINT (-77.117895 38.93696),3,POINT (-77.121719 38.939401),1.0,428.208835,...,,,"""dirt""",,noMotors,unclear,unclear,unclear,8961,"LINESTRING (316495.623 4311848.33, 316440.295 ..."
2,0,3,POINT (-77.117895 38.93696),POINT (-77.11618 38.935177),2,POINT (-77.117895 38.93696),7,POINT (-77.11618 38.935177),127269.0,247.573166,...,,,"""dirt""",,noMotors,yes,unclear,yes,21620,"LINESTRING (316436.259 4311913.75, 316433.42 4..."
3,0,5,POINT (-77.117824 38.929153),POINT (-77.116895 38.929534),10,POINT (-77.117824 38.929153),11,POINT (-77.116895 38.929534),42425.0,90.986172,...,,,"""concrete""",,noMotors,yes,unclear,yes,4933,"LINESTRING (316422.322 4311047.139, 316433.923..."
4,0,8,POINT (-77.115779 38.935197),POINT (-77.117277 38.933006),8,POINT (-77.115779 38.935197),17,POINT (-77.117277 38.933006),35.0,275.743755,...,,,"""unpaved""",,other,yes,unclear,yes,21621,"LINESTRING (316615.124 4311713.799, 316616.606..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
82508,0,62063,POINT (-76.92189 38.895014),POINT (-76.920576 38.894659),123867,POINT (-76.92189 38.895014),124127,POINT (-76.920576 38.894659),2944763.0,120.608417,...,,,,,noMotors,unclear,unclear,unclear,13143,"LINESTRING (333336.403 4306881.875, 333343.804..."
82514,0,62143,POINT (-76.920576 38.894659),POINT (-76.917783 38.893335),124127,POINT (-76.920576 38.894659),124287,POINT (-76.917783 38.893335),845.0,283.385663,...,,,,,noMotors,yes,unclear,yes,33949,"LINESTRING (333676.345 4306688.343, 333680.156..."
82516,0,62145,POINT (-76.920533 38.898959),POINT (-76.920518 38.898754),124187,POINT (-76.920533 38.898959),124185,POINT (-76.920518 38.898754),43969.0,22.793617,...,,,"""asphalt""",,residential,unclear,unclear,unclear,1344,"LINESTRING (333446.229 4306846.596, 333447.731..."
82524,0,62351,POINT (-76.917054 38.892882),POINT (-76.916405 38.892427),124601,POINT (-76.917054 38.892882),124703,POINT (-76.916405 38.892427),292282.0,75.637197,...,,,,,noMotors,unclear,unclear,unclear,33948,"LINESTRING (333742.23 4306634.273, 333742.276 ..."


In [122]:
bc_seg_simple = bc_seg[['crashID', 'SegID']]
bc_seg_simple = bc_seg_simple.groupby('SegID').count()
bc_seg_simple

Unnamed: 0_level_0,crashID
SegID,Unnamed: 1_level_1
49,1
122,1
146,1
253,1
275,2
...,...
62505,1
62534,2
62557,1
62586,1


In [123]:
final_gdf = pd.merge(left=final_gdf, right=bc_seg_simple,
               left_on='SegID',
               right_index=True,
               how='left')
final_gdf = final_gdf.rename(columns = {'crashID': 'crashCount'})
final_gdf

Unnamed: 0,id,SegID,startPoints,endPoints,PointID_start,geometry_start,PointID_end,geometry_end,betweenness,straight_len,...,"""cycleway:separation""","""surface""",maxspeed,types,bike_intermediate,cycleway_intermediate,cycle_summary,ind,geoms,crashCount
0,0,0,POINT (-77.180353 38.967728),POINT (-77.082863 38.902279),0,POINT (-77.180353 38.967728),1,POINT (-77.082863 38.902279),42425.0,11146.680303,...,,"""ground""",,noMotors,no,unclear,no,8633,"LINESTRING (311104.061 4315456.299, 311123.016...",
1,0,1,POINT (-77.117895 38.93696),POINT (-77.121719 38.939401),2,POINT (-77.117895 38.93696),3,POINT (-77.121719 38.939401),1.0,428.208835,...,,"""dirt""",,noMotors,unclear,unclear,unclear,8961,"LINESTRING (316495.623 4311848.33, 316440.295 ...",
2,0,3,POINT (-77.117895 38.93696),POINT (-77.11618 38.935177),2,POINT (-77.117895 38.93696),7,POINT (-77.11618 38.935177),127269.0,247.573166,...,,"""dirt""",,noMotors,yes,unclear,yes,21620,"LINESTRING (316436.259 4311913.75, 316433.42 4...",
3,0,5,POINT (-77.117824 38.929153),POINT (-77.116895 38.929534),10,POINT (-77.117824 38.929153),11,POINT (-77.116895 38.929534),42425.0,90.986172,...,,"""concrete""",,noMotors,yes,unclear,yes,4933,"LINESTRING (316422.322 4311047.139, 316433.923...",
4,0,8,POINT (-77.115779 38.935197),POINT (-77.117277 38.933006),8,POINT (-77.115779 38.935197),17,POINT (-77.117277 38.933006),35.0,275.743755,...,,"""unpaved""",,other,yes,unclear,yes,21621,"LINESTRING (316615.124 4311713.799, 316616.606...",
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
82508,0,62063,POINT (-76.92189 38.895014),POINT (-76.920576 38.894659),123867,POINT (-76.92189 38.895014),124127,POINT (-76.920576 38.894659),2944763.0,120.608417,...,,,,noMotors,unclear,unclear,unclear,13143,"LINESTRING (333336.403 4306881.875, 333343.804...",
82514,0,62143,POINT (-76.920576 38.894659),POINT (-76.917783 38.893335),124127,POINT (-76.920576 38.894659),124287,POINT (-76.917783 38.893335),845.0,283.385663,...,,,,noMotors,yes,unclear,yes,33949,"LINESTRING (333676.345 4306688.343, 333680.156...",
82516,0,62145,POINT (-76.920533 38.898959),POINT (-76.920518 38.898754),124187,POINT (-76.920533 38.898959),124185,POINT (-76.920518 38.898754),43969.0,22.793617,...,,"""asphalt""",,residential,unclear,unclear,unclear,1344,"LINESTRING (333446.229 4306846.596, 333447.731...",
82524,0,62351,POINT (-76.917054 38.892882),POINT (-76.916405 38.892427),124601,POINT (-76.917054 38.892882),124703,POINT (-76.916405 38.892427),292282.0,75.637197,...,,,,noMotors,unclear,unclear,unclear,33948,"LINESTRING (333742.23 4306634.273, 333742.276 ...",


In [125]:
final_gdf[final_gdf['SegID'] == 62534]

Unnamed: 0,id,SegID,startPoints,endPoints,PointID_start,geometry_start,PointID_end,geometry_end,betweenness,straight_len,...,"""cycleway:separation""","""surface""",maxspeed,types,bike_intermediate,cycleway_intermediate,cycle_summary,ind,geoms,crashCount
54694,0,62534,POINT (-76.913651 38.889807),POINT (-76.913658 38.890514),124958,POINT (-76.913651 38.889807),125056,POINT (-76.913658 38.890514),42425.0,78.484042,...,,"""asphalt""",,residential,unclear,unclear,unclear,7123,"LINESTRING (334030.31 4306288.969, 334030.433 ...",2.0


In [126]:
final_gdf['geoms'] = final_gdf['geoms'].to_wkt()
final_gdf.to_file('../seg_osm.geojson')