Need to process streets into links and nodes, with nodes as intersections

In [29]:
import os
import pandas as pd
import json

from mapboxgl.utils import create_color_stops, df_to_geojson
from mapboxgl.viz import CircleViz,MapViz
token = os.getenv('MAPBOX_TOKEN')

# let's plot the missing nodes
# Generate data breaks and color stops from colorBrewer
def show_points(to_show,data):
    nodes = data.set_index('node')
    data = df_to_geojson(nodes.loc[to_show].reset_index(),
                         properties=['node','name','street'],
                         lat='lat', lon='lon')

    # Create the viz from the dataframe
    viz = CircleViz(data,
                    access_token=token,
                    radius=1,
                    stroke_color='black',
                    stroke_width=0.2,
                    center=(-73.97, 40.77),
                    zoom=12,
                    below_layer='waterway-label')
    viz.show()

streets = pd.read_csv('nyc_streets.csv')

In [30]:
streets.head()

Unnamed: 0,street,name,type,oneway,node,lat,lon
0,5668966,West 106th Street,secondary,,4205830390,40.798208,-73.960425
1,5668966,West 106th Street,secondary,,3602678205,40.798324,-73.960705
2,5668966,West 106th Street,secondary,,4205830391,40.798485,-73.96109
3,5668968,West 80th Street,residential,yes,42421778,40.785593,-73.982011
4,5668968,West 80th Street,residential,yes,8727756094,40.785083,-73.980801


In [36]:
# are there any lonely ways?
streets['street'].value_counts()[::-1]

# no

962829745      2
584971404      2
420877078      2
156994969      2
584971388      2
            ... 
404253364     67
68674962      71
664893485     80
5669386       94
828561963    143
Name: street, Length: 5481, dtype: int64

An intersecion is a node with shared streets

### Useful info
The nodes defining the geometry of the way are enumerated in the correct order, and indicated only by reference using their unique identifier. These nodes must have been already defined separately with their coordinates. 

### Tasks
- crawl nodes and create edges
- join edges that are part of the same street
- find nodes that are intersections - some are just streets broken up into many ways
- join streets to leave legit intersections
- removed intersections at the same geographical locations

In [31]:
# first create a links data set
def create_links(df):
    link_start = df.iloc[:-1,:5].rename(columns=dict(node='start'))
    link_end = df.iloc[1:,4].rename('end')
    link_start['end'] = link_end.values
    return link_start

edges = streets.groupby(['street']).apply(create_links).reset_index(drop=True)
edges = edges.reset_index().rename(columns=dict(index='edge_id'))
edges.head()

Unnamed: 0,edge_id,street,name,type,oneway,start,end
0,0,5668966,West 106th Street,secondary,,4205830390,3602678205
1,1,5668966,West 106th Street,secondary,,3602678205,4205830391
2,2,5668968,West 80th Street,residential,yes,42421778,8727756094
3,3,5668968,West 80th Street,residential,yes,8727756094,42421776
4,4,5668968,West 80th Street,residential,yes,42421776,8727756090


Now let's try to only keep the intersections, that intersect with a different street.

In [34]:
# so now find nodes with two street intersections
# and join with only a street of the same name
in_bynode = edges.groupby(['end'])
out_bynode = edges.groupby(['start'])

# loop and join edges until
done = set()
errors = set()
def join_edges(start,node):
        
    # get the next edge - if it fails, assume a dead-end
    try:
        next_edges = out_bynode.get_group(node)
    except KeyError as ke:
        errors.add(ke.args[0])
        return False     
    
    # assume if there is more than one connected edge,
    # its a legit intersection
    if next_edges.shape[0] > 1:        
        return start,node
    
    next_start = next_edges['start'].values[0]
    end = next_edges['end'].values[0]
    
    # if we've been here, stop
    # this should stop recursion errors
    if (next_start,end) in done:
        return start,node
    
    done.add((next_start,end))
    join_edges(start,end)

new_edges = []
stuck = []
for idx, row in edges.iterrows():
    
    start = row['start']
    end = row['end']
    
    try:        
        new = join_edges(start,end)
    except RecursionError as re:
        stuck.append((start,end))
        print("Stuck:",start,end)
        continue
    
    if new:
        new_edges.append(new)
    
print(len(errors))
print(len(new_edges))

92
19450


In [35]:
intersections = set([e[0] for e in new_edges]+[e[1] for e in new_edges])
show_points(intersections,streets)

In [7]:
streets[streets['node']==42424905]

Unnamed: 0,street,name,type,oneway,node,lat,lon
131,5669355,Main Street,residential,,42424905,40.769349,-73.943832
3720,5671637,Main Street,residential,,42424905,40.769349,-73.943832


In [10]:
streets[streets['street']==5671637]

Unnamed: 0,street,name,type,oneway,node,lat,lon
3714,5671637,Main Street,residential,,42448857,40.768578,-73.942663
3715,5671637,Main Street,residential,,8265114831,40.768639,-73.942727
3716,5671637,Main Street,residential,,1241986599,40.768809,-73.942905
3717,5671637,Main Street,residential,,593428194,40.769105,-73.943157
3718,5671637,Main Street,residential,,7799540769,40.769141,-73.943239
3719,5671637,Main Street,residential,,3262581661,40.769292,-73.943585
3720,5671637,Main Street,residential,,42424905,40.769349,-73.943832


In [None]:
errors[:2]

In [None]:
# let's plot the missing nodes
# Generate data breaks and color stops from colorBrewer
data = df_to_geojson(inter_df.loc[errors].reset_index(),
                     properties=['node','count'],
                     lat='lat', lon='lon')

# create the color stops
color_breaks = [1,2,3,4,5,6]
color_stops = create_color_stops(color_breaks,colors='YlGnBu')

# Create the viz from the dataframe
token = os.getenv('MAPBOX_TOKEN')
viz = CircleViz(data,
                access_token=token,
                color_property='count',
                color_stops=color_stops,
                radius=1,
                stroke_color='black',
                stroke_width=0.2,
                center=(-73.97, 40.77),
                zoom=12,
                below_layer='waterway-label')
viz.show()
viz.create_html("errors.html")

In [None]:
# let's plot the missing nodes
# Generate data breaks and color stops from colorBrewer
data = df_to_geojson(inter_df.loc[odds].reset_index(),
                     properties=['node','count'],
                     lat='lat', lon='lon')

# create the color stops
color_breaks = [1,2,3,4,5,6]
color_stops = create_color_stops(color_breaks,colors='YlGnBu')

# Create the viz from the dataframe
token = os.getenv('MAPBOX_TOKEN')
viz = CircleViz(data,
                access_token=token,
                color_property='count',
                color_stops=color_stops,
                radius=1,
                stroke_color='black',
                stroke_width=0.2,
                center=(-73.97, 40.77),
                zoom=12,
                below_layer='waterway-label')
viz.show()
viz.create_html("odds.html")