In [1]:
import geopandas as gpd
import pandas as pd
import numpy as np
from shapely.geometry import Point, LineString
from shapely.ops import split

In [2]:
#Read in routes feature class as shapefile
gdfRoutes = gpd.read_file('..\\data\\processed\\Routes.shp')

In [3]:
#Columns to drop to keep things tidy
drop_cols = ['Facility N', 'Address', 'City', 'County Nam',
             'Zip', 'Latitude', 'Longitude', 'Regulated', 
             'Allowable', 'Total Wast']

In [4]:
#Copy routes geodataframe and update geometry to start points
gdfStart = gdfRoutes.copy(deep=True)
gdfStart['geometry'] = gdfRoutes['geometry'].apply(lambda x: Point(x.coords[0]))
gdfStart.drop(columns=drop_cols,axis=1,inplace=True)

In [5]:
#Copy routes geodataframe and update geometry to start points
gdfEnd = gdfRoutes.copy(deep=True)
gdfEnd['geometry'] = gdfRoutes['geometry'].apply(lambda x: Point(x.coords[-1]))
gdfEnd.drop(columns=drop_cols,axis=1,inplace=True)

Create junctions along route segments where upstream routes join them - done by iterating through each route feature and splitting it by the set of end points. 

In [6]:
#Combine endpoints into a single multipoint object
ends = gdfEnd.geometry.unary_union

In [7]:
#Create a geoseries of split routes (geometry collections)
theSplits = gdfRoutes.geometry.apply(lambda x: split(x,ends))

* Create a feature class of all segments (routes split at junctions)

In [8]:
#Create lists to fill
links = []
geom = []
#Iterate and add items to the list
for index, row in gdfRoutes.iterrows():
    #Iterate through split segments in the geometry collection
    for line in theSplits[index].geoms:
        #Add items to the list
        links.append(row['index'])
        geom.append(line)

#Construct an output geodataframe
gdfSegments = gpd.GeoDataFrame(pd.DataFrame({'route_id':links}),
                               geometry = geom, crs = gdfRoutes.crs)

#Add the index as a unique segment ID  
gdfSegments['edge_ID'] = gdfSegments.index

* Create geodataframes from from and to nodes

In [9]:
#Construct a gdf of segement start and end points
gdfToNodes = gdfSegments.copy(deep=True)
gdfToNodes['geometry'] = gdfToNodes['geometry'].apply(lambda x: Point(x.coords[0]))
gdfToNodes['to_id'] = gdfToNodes.index

gdfFromNodes = gdfSegments.copy(deep=True)
gdfFromNodes['geometry'] = gdfFromNodes['geometry'].apply(lambda x: Point(x.coords[-1]))
gdfFromNodes['from_id'] = gdfFromNodes.index

In [16]:
gdfToNodes.to_file('../scratch/ToNodes.shp')
gdfFromNodes.to_file('../scratch/FromNodes.shp')

* Overlap the to and from nodes, this provides a table linking the two (start of an edge list)

In [17]:
#Spatially join the above geodataframes
gdfNodes = gpd.sjoin(left_df=gdfToNodes, right_df=gdfFromNodes,how='right')
gdfNodes.to_file('../scratch/nodes.shp')

* Append the to and from node IDs to the segments

In [11]:
#Join the from and to IDs to the segments dataframe
gdfOut = gdfSegments.merge(gdfNodes[['from_id','to_id']],left_on='edge_ID', right_on='from_id')

In [12]:
#Join the biogas data to the segments (from gdfStart)
gdfOut = gpd.sjoin(left_df=gdfOut,right_df=gdfStart[['geometry','Biogas P_1']],how='left')
gdfOut['Biogas P_1'].fillna(0.0,inplace=True)

In [13]:
#Fix biogas column
gdfOut['BG_potential'] = gdfOut['Biogas P_1'].values.astype(np.int64)

In [14]:
#Write out shapefile
gdfOut[['edge_ID','from_id','to_id',
        'route_id','BG_potential','geometry']].to_file('../data/processed/BasePipeline.shp')

In [15]:
#Write out edge list
gdfOut[['from_id','to_id','Biogas P_1']].to_csv('../data/processed/BaseEdgeList.csv',index=False)

In [None]:
# Write out edges and nodes for osmnx
gdfEdges = gdfOut