Need to process streets into links and nodes, with nodes as intersections

In [1]:
import os
import pandas as pd
import json

from mapboxgl.utils import create_color_stops, df_to_geojson
from mapboxgl.viz import CircleViz,MapViz

streets = pd.read_csv('nyc_streets.csv')
streets.shape

(30731, 7)

In [2]:
streets.head()

Unnamed: 0,street,name,type,oneway,node,lat,lon
0,5668966,West 106th Street,secondary,,4205830390,40.798208,-73.960425
1,5668966,West 106th Street,secondary,,3602678205,40.798324,-73.960705
2,5668966,West 106th Street,secondary,,4205830391,40.798485,-73.96109
3,5668968,West 80th Street,residential,yes,42421778,40.785593,-73.982011
4,5668968,West 80th Street,residential,yes,8727756094,40.785083,-73.980801


An intersecion is a node with shared streets

### Useful info
The nodes defining the geometry of the way are enumerated in the correct order, and indicated only by reference using their unique identifier. These nodes must have been already defined separately with their coordinates. 

### Tasks
- find nodes that are intersections - some are just streets broken up into many ways
- join streets to leave legit intersections
- removed intersections at the same geographical locations

In [3]:
# get nodes that appear more than once - assume these are intersections
node_counts = streets['node'].value_counts()
inters = node_counts[node_counts.values > 1].rename('count')
# print(inters.head())

# node geoms
inter_df = streets.loc[:,['node','lat','lon']].join(inters,on=['node'],how='inner').set_index('node')
inter_df = inter_df.drop_duplicates()
print(inter_df.shape)
inter_df.head()

(5476, 3)


Unnamed: 0_level_0,lat,lon,count
node,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
4205830390,40.798208,-73.960425,2
4205830391,40.798485,-73.96109,2
42421778,40.785593,-73.982011,2
42421776,40.785031,-73.980678,3
42421775,40.784569,-73.979583,2


In [4]:
# by joining the inters on the streets, we should only have the interstctions 
# of each street remaining
street_links = streets.join(
    inters.rename("intersection"),how="inner",on=["node"]
).rename_axis("order").sort_values(["street","order"])#.drop_duplicates()

# sort by street and index to get the links
print(len(street_links))
street_links.head(10)

14248


Unnamed: 0_level_0,street,name,type,oneway,node,lat,lon,intersection
order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,5668966,West 106th Street,secondary,,4205830390,40.798208,-73.960425,2
2,5668966,West 106th Street,secondary,,4205830391,40.798485,-73.96109,2
3,5668968,West 80th Street,residential,yes,42421778,40.785593,-73.982011,2
5,5668968,West 80th Street,residential,yes,42421776,40.785031,-73.980678,3
8,5668968,West 80th Street,residential,yes,42421775,40.784569,-73.979583,2
9,5668968,West 80th Street,residential,yes,1061531525,40.784494,-73.979405,2
12,5668968,West 80th Street,residential,yes,42421772,40.783828,-73.977825,2
13,5668968,West 80th Street,residential,yes,42421769,40.782638,-73.975007,2
14,5668973,Hillside Avenue,tertiary,yes,8904913030,40.859078,-73.931443,3
19,5668973,Hillside Avenue,tertiary,yes,42421785,40.858744,-73.930122,2


In [5]:
# Generate data breaks and color stops from colorBrewer
data = df_to_geojson(inter_df, properties=['count'],
              lat='lat', lon='lon')

# create the color stops
color_breaks = [1,2,3,4,5,6]
color_stops = create_color_stops(color_breaks,colors='YlGnBu')

# Create the viz from the dataframe
token = os.getenv('MAPBOX_TOKEN')
viz = CircleViz(data,
                access_token=token,
                color_property='count',
                color_stops=color_stops,
                radius=1,
                stroke_color='black',
                stroke_width=0.2,
                center=(-73.97, 40.77),
                zoom=12,
                below_layer='waterway-label')
viz.show()
viz.create_html("raw_intersections.html")



Now let's try to only keep the intersections, that intersect with a different street.

In [6]:
# First create edges from the street links - 
def create_links(df):
    """
    This is erroneous and throws away 64 intersection nodes 
    """
    link_start = df.iloc[:-1,:-3].rename(columns=dict(node='start'))
    link_end = df.iloc[1:,4].rename('end')
    link_start['end'] = link_end.values
    return link_start

edges = street_links.groupby(['street']).apply(create_links).reset_index(drop=True)
edges.head() 

Unnamed: 0,street,name,type,oneway,start,end
0,5668966,West 106th Street,secondary,,4205830390,4205830391
1,5668968,West 80th Street,residential,yes,42421778,42421776
2,5668968,West 80th Street,residential,yes,42421776,42421775
3,5668968,West 80th Street,residential,yes,42421775,1061531525
4,5668968,West 80th Street,residential,yes,1061531525,42421772


In [7]:
# now create df that has the in and out edges
edge_ids = edges.reset_index().rename(columns=dict(index='edge_id')).\
    loc[:,['edge_id','start','end']]
in_edges = edge_ids.loc[:,['edge_id','end']].\
    join(inter_df,on=['end'],how='inner').rename(
    columns=dict(edge_id='in_edge',end="node_id")).reset_index(drop=True)
out_edges = edge_ids.loc[:,['edge_id','start']].\
    join(inter_df,on=['start'],how='inner').rename(
    columns=dict(edge_id='out_edge',start='node_id')).reset_index(drop=True)
in_edges.head()

Unnamed: 0,in_edge,node_id,lat,lon,count
0,0,4205830391,40.798485,-73.96109,2
1,1,42421776,40.785031,-73.980678,3
2,8046,42421776,40.785031,-73.980678,3
3,2,42421775,40.784569,-73.979583,2
4,4583,42421775,40.784569,-73.979583,2


In [None]:
# so now find nodes with two street intersections
# and join with only a street of the same name
in_bynode = in_edges.groupby(['node_id'])
out_bynode = out_edges.groupby(['node_id'])
odds = []
errors = []
for node in inter_df[inter_df['count']==2].index.values:
    
    try:
        # get the in/out edges for this node
        ins = in_bynode.get_group(node)
        outs = out_bynode.get_group(node)
    except KeyError as ke:\
        errors.append(ke.args[0])       
    
    if len(ins) > 1 or len(outs) > 1:
    
#         print(ins.head(),len(ins))
#         print(outs.head(),len(outs))
        odds.append(node)
    
len(odds),len(errors)

In [None]:
street_links[street_links['node']==6791166002]

In [None]:
street_links[street_links['street']==809013919]

In [None]:
errors[:2]

In [None]:
# let's plot the missing nodes
# Generate data breaks and color stops from colorBrewer
data = df_to_geojson(inter_df.loc[errors].reset_index(),
                     properties=['node','count'],
                     lat='lat', lon='lon')

# create the color stops
color_breaks = [1,2,3,4,5,6]
color_stops = create_color_stops(color_breaks,colors='YlGnBu')

# Create the viz from the dataframe
token = os.getenv('MAPBOX_TOKEN')
viz = CircleViz(data,
                access_token=token,
                color_property='count',
                color_stops=color_stops,
                radius=1,
                stroke_color='black',
                stroke_width=0.2,
                center=(-73.97, 40.77),
                zoom=12,
                below_layer='waterway-label')
viz.show()
viz.create_html("errors.html")

In [None]:
# let's plot the missing nodes
# Generate data breaks and color stops from colorBrewer
data = df_to_geojson(inter_df.loc[odds].reset_index(),
                     properties=['node','count'],
                     lat='lat', lon='lon')

# create the color stops
color_breaks = [1,2,3,4,5,6]
color_stops = create_color_stops(color_breaks,colors='YlGnBu')

# Create the viz from the dataframe
token = os.getenv('MAPBOX_TOKEN')
viz = CircleViz(data,
                access_token=token,
                color_property='count',
                color_stops=color_stops,
                radius=1,
                stroke_color='black',
                stroke_width=0.2,
                center=(-73.97, 40.77),
                zoom=12,
                below_layer='waterway-label')
viz.show()
viz.create_html("odds.html")