In [30]:
import geopandas as gpd
import pandas as pd
import numpy as np
from shapely.geometry import Point

In [31]:
# Read shapefile
# Set filepath 
fp = "/Users/joe/Documents/Coding/Simplification/comp_model_cl_w_u.shp"
edges_df = gpd.read_file(fp,  encoding="utf-8")
# Plot shapefile
#edges_df.plot()

In [32]:
# add start and end point coordinates to data frame 
def getstartnode(row):
    return row.coords[0]

def getendnode(row):
    return row.coords[-1]

edges_df = edges_df.assign(startcoords = map(lambda x: getstartnode(x), edges_df['geometry'].values.tolist()), 
                           endcoords = map(lambda x: getendnode(x), edges_df['geometry'].values.tolist()))

In [33]:
# get all unique coordinates
coords_pds = pd.concat([edges_df.loc[:, 'startcoords'], edges_df.loc[:, 'endcoords']])
coords_pds = pd.unique(coords_pds)
# TODO: df = pd.DataFrame(data, index = ['Cochice', 'Pima', 'Santa Cruz', 'Maricopa', 'Yuma'])
nodes_df = pd.DataFrame({'coords': coords_pds, 'id': range(0, len(coords_pds))})
nodes_df.set_index(['coords'], inplace=True)
# del coords_pds
print nodes_df.head()

                                id
coords                            
(415623.0, 439681.0)             0
(420759.0, 445060.0)             1
(420745.253186, 445141.572253)   2
(440861.06732, 425396.6059)      3
(429961.973, 439686.461)         4


In [34]:
edges_df = pd.merge(edges_df, nodes_df, left_on='startcoords', right_index=True, how='left', suffixes=('_raw', '_startnode'))
edges_df = pd.merge(edges_df, nodes_df, left_on='endcoords', right_index=True, how='left')
edges_df['sid'] = edges_df.index
edges_df['stype'] = None
edges_df['audit'] = None
edges_df = edges_df.rename(columns={'id':'endnodeid', 'id_startnode':'startnodeid' })               

In [35]:
# exclude points
print edges_df.shape
edges_df = edges_df[edges_df.geometry.length != 0]
print edges_df.shape

(98637, 18)
(97866, 18)


In [36]:
adjnodes_df = pd.DataFrame({'n1': pd.concat([edges_df['startnodeid'], edges_df['endnodeid']]), 'n2': pd.concat([edges_df['endnodeid'], edges_df['startnodeid']])})
print adjnodes_df.shape
print adjnodes_df.head()

(195732, 2)
   n1     n2
0   0  58338
1   1  11144
2   2      1
3   3  42424
4   4  37161


In [37]:
adjedges_df = pd.DataFrame({'n': pd.concat([edges_df['startnodeid'], edges_df['endnodeid']]), 'sid': pd.concat([edges_df['sid'], edges_df['sid']])})
print adjedges_df.shape
print adjedges_df.tail()

(195732, 2)
           n    sid
98632  63132  98632
98633  11275  98633
98634  41600  98634
98635  44007  98635
98636  20342  98636


In [38]:
adjnodes_df = adjnodes_df.groupby('n1')['n2'].apply(list).to_frame()
print adjnodes_df.shape
# print nodes_df.shape
print adjnodes_df.head()

adjedges_df = adjedges_df.groupby('n')['sid'].apply(list).to_frame()
print adjedges_df.shape
# print nodes_df.shape
print adjedges_df.head()
# nodenode_df not needed from edges based on index 

(75820, 1)
                              n2
n1                              
0           [58338, 62077, 2526]
1       [11144, 33849, 2, 11144]
2               [1, 8997, 11165]
3            [42424, 42422, 885]
4   [37161, 52627, 52624, 72865]
(75820, 1)
                    sid
n                      
0     [0, 88173, 35412]
1  [1, 49507, 2, 13009]
2     [2, 10409, 13031]
3       [3, 88206, 917]
4    [4, 13, 18, 77969]


In [39]:
# update nodes_df
nodes_df = pd.merge(nodes_df, adjedges_df, right_index = True, left_on='id', how='left', suffixes=('_raw', '_join'))
nodes_df = pd.merge(nodes_df, adjnodes_df, left_on='id', right_index=True, how='left', suffixes=('', '_join'))

print nodes_df.head()

                                id                   sid  \
coords                                                     
(415623.0, 439681.0)             0     [0, 88173, 35412]   
(420759.0, 445060.0)             1  [1, 49507, 2, 13009]   
(420745.253186, 445141.572253)   2     [2, 10409, 13031]   
(440861.06732, 425396.6059)      3       [3, 88206, 917]   
(429961.973, 439686.461)         4    [4, 13, 18, 77969]   

                                                          n2  
coords                                                        
(415623.0, 439681.0)                    [58338, 62077, 2526]  
(420759.0, 445060.0)                [11144, 33849, 2, 11144]  
(420745.253186, 445141.572253)              [1, 8997, 11165]  
(440861.06732, 425396.6059)              [42424, 42422, 885]  
(429961.973, 439686.461)        [37161, 52627, 52624, 72865]  


In [40]:
print type(nodes_df)

# to geodataframe
print nodes_df.head()
nodes_df['geometry'] = map(lambda x: Point(x), nodes_df.index.values)
nodes_df = nodes_df.reset_index(drop = True)
nodes_gdf = gpd.GeoDataFrame(nodes_df)
print type(nodes_gdf)
del nodes_df

<class 'pandas.core.frame.DataFrame'>
                                id                   sid  \
coords                                                     
(415623.0, 439681.0)             0     [0, 88173, 35412]   
(420759.0, 445060.0)             1  [1, 49507, 2, 13009]   
(420745.253186, 445141.572253)   2     [2, 10409, 13031]   
(440861.06732, 425396.6059)      3       [3, 88206, 917]   
(429961.973, 439686.461)         4    [4, 13, 18, 77969]   

                                                          n2  
coords                                                        
(415623.0, 439681.0)                    [58338, 62077, 2526]  
(420759.0, 445060.0)                [11144, 33849, 2, 11144]  
(420745.253186, 445141.572253)              [1, 8997, 11165]  
(440861.06732, 425396.6059)              [42424, 42422, 885]  
(429961.973, 439686.461)        [37161, 52627, 52624, 72865]  
<class 'geopandas.geodataframe.GeoDataFrame'>


In [41]:
print nodes_gdf.head()


   id                   sid                            n2  \
0   0     [0, 88173, 35412]          [58338, 62077, 2526]   
1   1  [1, 49507, 2, 13009]      [11144, 33849, 2, 11144]   
2   2     [2, 10409, 13031]              [1, 8997, 11165]   
3   3       [3, 88206, 917]           [42424, 42422, 885]   
4   4    [4, 13, 18, 77969]  [37161, 52627, 52624, 72865]   

                                     geometry  
0                       POINT (415623 439681)  
1                       POINT (420759 445060)  
2  POINT (420745.253186412 445141.5722528308)  
3            POINT (440861.06732 425396.6059)  
4               POINT (429961.973 439686.461)  


In [42]:
edges_df['n_n'] = map(lambda x: tuple(sorted(x)), edges_df[['startnodeid', 'endnodeid']].values.tolist())
# group by n_n and length geom
nodenode_df = edges_df.groupby('n_n')['sid'].apply(list).to_frame()
nodenode_df = nodenode_df.reset_index()
nodenode_df = nodenode_df.apply(lambda row: len(row.sid),axis=1)
print nodenode_df.head()
# TODO: return n_n - sid - count 
parallels =  nodenode_df[nodenode_df > 1]
print parallels.head()

0    1
1    1
2    1
3    1
4    2
dtype: int64
4      2
41     2
214    2
288    2
323    2
dtype: int64


In [54]:
#selfloops_df = edges_df.map(lambda x: , edges_df['n_n'])
#edges_df['n_n'] = map(lambda x: tuple(sorted(x)), edges_df[['n_n']].values.tolist())
#print edges_df.head()
print edges_df['n_n'].loc[lambda x: len(set(x)) == 1]


#print edges_df[~(edges_df.n_n.size == 1)]


KeyError: False

In [None]:
# TODO: clean parallels and duplicate geometry 
edges_df = edges_df.drop_duplicates(subset='geometry', keep='first', inplace=False)

# TODO: clean selfloops 
selfloops_df = edges_df.query('n_n[0] = n_n[1]')

# TODO: roundabouts & short 
roundabouts_df = edges_df.query('formofway = "Roundabout"')
# connected comp -> groups (degree 0), nodes -> midpoint, conlines (degree 1) - endpoint to change


In [None]:
# clean dual car and parallel 
# find outer ring from unions 


In [None]:
# apply route hierarchy -> axial -> segment 