In [1]:
import geopandas as gpd
import pandas as pd
import numpy as np
from shapely.geometry import Point, LineString
from shapely.ops import split

#Set debugging flag
DEBUG = False

In [2]:
#Read in routes feature class as shapefile
gdfRoutes = gpd.read_file('..\\data\\processed\\Routes.shp')

In [3]:
#Columns to drop to keep things tidy
drop_cols = ['Facility N', 'Address', 'City', 'County Nam',
             'Zip', 'Latitude', 'Longitude', 'Regulated', 
             'Allowable', 'Total Wast']

In [4]:
#Copy routes geodataframe and update geometry to start points
gdfStart = gdfRoutes.copy(deep=True)
gdfStart['geometry'] = gdfRoutes['geometry'].apply(lambda x: Point(x.coords[0]))
gdfStart.drop(columns=drop_cols,axis=1,inplace=True)

In [5]:
#Copy routes geodataframe and update geometry to start points
gdfEnd = gdfRoutes.copy(deep=True)
gdfEnd['geometry'] = gdfRoutes['geometry'].apply(lambda x: Point(x.coords[-1]))
gdfEnd.drop(columns=drop_cols,axis=1,inplace=True)

Create junctions along route segments where upstream routes join them - done by iterating through each route feature and splitting it by the set of end points. 

In [6]:
#Combine endpoints into a single multipoint object
ends = gdfEnd.geometry.unary_union

In [7]:
#Create a geoseries of split routes (geometry collections)
theSplits = gdfRoutes.geometry.apply(lambda x: split(x,ends))

* Create a feature class of all segments (routes split at junctions)

In [8]:
#Create lists to fill
links = []
geom = []
#Iterate and add items to the list
for index, row in gdfRoutes.iterrows():
    #Iterate through split segments in the geometry collection
    for line in theSplits[index].geoms:
        #Add items to the list
        links.append(str(row['index']))
        geom.append(line)

#Construct an output geodataframe
gdfSegments = gpd.GeoDataFrame(pd.DataFrame({'route_id':links}),
                               geometry = geom, crs = gdfRoutes.crs)

#Add the index as a unique segment ID  
gdfSegments['edge_ID'] = gdfSegments.index.astype(str)

##### Create geodataframes from "from" and "to" nodes
Here the "from" nodes are actually the last point in each segment, and the "to" nodes are the first. This seems backwards, but these are both intermediate datasets used to determine "from-to" pairs, done by spatially joining them. When joined, the "from" node carries the attribute of the upstream segment and the "to" node carries the downstream one...

In [10]:
gdfSegments.to_file('../scratch/segs.shp')

In [11]:
#Construct a gdf of segement start and end points
gdfToNodes = gdfSegments.copy(deep=True)
gdfToNodes['geometry'] = gdfToNodes['geometry'].apply(lambda x: Point(x.coords[0]))
gdfToNodes['to_id'] = gdfToNodes.index.astype(str)

gdfFromNodes = gdfSegments.copy(deep=True)
gdfFromNodes['geometry'] = gdfFromNodes['geometry'].apply(lambda x: Point(x.coords[-1]))
gdfFromNodes['from_id'] = gdfFromNodes.index.astype(str)

In [12]:
#Print out for debugging
if DEBUG:
    gdfToNodes.to_file('../scratch/ToNodes.shp')
    gdfFromNodes.to_file('../scratch/FromNodes.shp')

* Overlap the to and from nodes, this provides a table linking the two (start of an edge list). However, in doing this, because there is no "outer" join option, the "to" nodes with no correspondng "from" nodes are dropped. We need to re-add them, assigning their value to the appropriate "from" value. 

In [13]:
#Spatially join the above geodataframes
gdfNodes = gpd.sjoin(left_df=gdfFromNodes, right_df=gdfToNodes,how='left')
#Set terminal nodes to the index + "T"
gdfNodes['idx'] = gdfNodes.index.astype(str)+"_T"
gdfNodes['to_id'].fillna(gdfNodes['idx'],inplace=True)
#Drop extraneous fields
gdfNodes.drop(columns=['index_right','route_id_right','edge_ID_right','idx'],axis=1,inplace=True)

In [14]:
gdfNodes.to_file('../scratch/nodes.shp')

##### Append the `to` and `from` node IDs to the segments
The `edge_id_left` in the spatially joined dataframe above contains the value corresponding to the edge from which the end points were extracted. We thus join back the end node data back to the segments via this value. 

In [48]:
#Join the from and to IDs to the segments dataframe
gdfOut = gdfSegments.merge(gdfNodes[['from_id','to_id']],left_on='edge_ID', right_on='from_id',how='left')

In [49]:
##### Attach biogas potential (in gdfStart) to segment from nodes (gdfToNode)
#Spatially join the start point features to the "ToPoint" feature (which are actually the "from pts")
gdfTo = gpd.sjoin(left_df=gdfToNodes,right_df=gdfStart,how='left')
#Merge the biogas data in the above result to the existing output segments
gdfOut = gdfOut.merge(gdfTo[['to_id','Biogas P_1']],left_on='edge_ID',right_on='to_id')

In [59]:
#Fix biogas column
gdfOut['BG_potential'] = gdfOut['Biogas P_1'].values.astype(np.int64)
gdfOut.loc[pd.isna(gdfOut['Biogas P_1']),'BG_potential'] = 0

In [61]:
#Fix merged column names
gdfOut.rename(columns={'to_id_x':'to_id'},inplace=True)
gdfOut.head()

Unnamed: 0,route_id,geometry,edge_ID,from_id,to_id,to_id_y,Biogas P_1,BG_potential
0,179,"LINESTRING (1582814.308 -311794.717, 1582814.3...",0,0,1,0,245692300.0,245692262
1,179,"LINESTRING (1582309.906 -310281.512, 1582309.9...",1,1,2,1,,0
2,179,"LINESTRING (1582309.906 -308768.307, 1582309.9...",2,2,3,2,,0
3,179,"LINESTRING (1582309.906 -308263.906, 1582309.9...",3,3,4,3,,0
4,179,"LINESTRING (1582309.906 -307255.102, 1582814.3...",4,4,5,4,,0


In [62]:
#Write out shapefile
gdfOut[['edge_ID','from_id','to_id',
        'route_id','BG_potential','geometry']].to_file('../data/processed/BasePipeline.shp')

In [63]:
#Write out edge list
gdfOut[['from_id','to_id','Biogas P_1']].to_csv('../data/processed/BaseEdgeList.csv',index=False)