# Network Analysis
This notebook compute cumulative biogas potential for the pipeline segments. 
1. Read in the edge list computed from the pipeline segments. This includes the edge ID, the downstream edge ID, and the amount of biogas potential at the edge source. 
2. Build a graph and then iterate through node on the graph, trace downstream, and tally cumulative downstream biogas potential values.
3. Export as a csv and shapefile

In [6]:
#Import packages
import networkx as nx
import numpy as np
import pandas as pd
import geopandas as gpd

In [7]:
#Read the data in from the BasePipelines shapefile
gdf_Pipelines = gpd.read_file('../data/processed/BasePipelines.shp')
gdf_Pipelines.head()

Unnamed: 0,route_id,edge_ID,downstream,Fac_ID,Waste,Biogas,Type,Connection,geometry
0,291,0,1,291,490225.577124,13726320.0,Source,,"LINESTRING (1618626.822 -297167.070, 1618626.8..."
1,291,1,2,-1,0.0,0.0,Route,,"LINESTRING (1619131.223 -296158.267, 1619635.6..."
2,291,2,3,-1,0.0,0.0,Route,,"LINESTRING (1621148.830 -293674.376, 1621148.8..."
3,291,3,4,-1,0.0,0.0,Route,,"LINESTRING (1621148.830 -293131.857, 1621148.8..."
4,291,4,5,-1,0.0,0.0,Route,,"LINESTRING (1621653.231 -288087.841, 1622157.6..."


In [8]:
#Set the field name variables
source_fld = 'edge_ID'
target_fld = 'downstream'
weight_fld = 'Waste'

#### Create and populate the graph
* Create a multi-directional graph
* Iterate through the edge list and add add edges from the source/downstream nodes & weights

In [9]:
#Construct the graph from the dataframe
G = nx.from_pandas_edgelist(gdf_Pipelines,
                            source=source_fld,
                            target=target_fld,
                            edge_attr=True,
                            create_using=nx.MultiDiGraph
                           )

In [13]:
#Subset terminal nodes
gdf_terminal = gdf_Pipelines[gdf_Pipelines['downstream'].str.contains("T")]
gdf_terminal.head()

Unnamed: 0,route_id,edge_ID,downstream,Fac_ID,Waste,Biogas,Type,Connection,geometry
5,291,5,5T,-1,0.0,0.0,Output,,"LINESTRING (1622662.034 -287079.038, 1623166.4..."
18,154,18,18T,-1,0.0,0.0,Output,,"LINESTRING (1581301.103 -298175.874, 1581301.1..."
21,1930,21,21T,-1,0.0,0.0,Output,,"LINESTRING (1624175.239 -226550.845, 1624679.6..."
24,1925,24,24T,1925,226652.386212,6346267.0,Output,Transmission,"LINESTRING (1737161.199 -158961.030, 1737665.6..."
26,1932,26,26T,-1,0.0,0.0,Output,,"LINESTRING (1614591.609 -259336.950, 1615096.0..."


In [25]:
#Get descendents
network_dict = {}
for terminal_node in gdf_terminal['downstream'].values:
    network_dict[terminal_node] = terminal_node
    for upstream_id in  nx.ancestors(G,terminal_node):
        network_dict[upstream_id] = terminal_node

In [27]:
#Attach attribute to pipeline GDF
gdf_Pipelines['Network'] = gdf_Pipelines['downstream'].apply(lambda x: network_dict[x])

Unnamed: 0,route_id,edge_ID,downstream,Fac_ID,Waste,Biogas,Type,Connection,geometry,Network
2813,1770,2813,2813T,1770,554.064031,15513.792862,Output,,"LINESTRING (1597162.785 -299334.101, 1597124.8...",2813T
2814,2040,2814,418,2040,514.902437,14417.268236,Source,,"LINESTRING (1630732.460 -241682.894, 1630732.4...",419T
2815,1794,2815,1401,1794,494.30634,13840.577507,Source,,"LINESTRING (1606521.183 -297167.070, 1606521.1...",563T
2816,2041,2816,899,2041,294.229964,8238.438992,Source,,"LINESTRING (1629219.255 -255301.737, 1629219.2...",241T
2817,299,2817,1730,299,192.078293,5378.192215,Source,,"LINESTRING (1630267.006 -287818.560, 1632245.6...",1731T


#### Compute total upstream biogas potential 
Iterate through each "from" node and find all its upstream nodes and sum their collective biogas potential values

In [28]:
#Create function that finds all upstream nodes and compute total upstream waste
def getUpstream(node_id):
    #Get a list of nodes downstream of the current node
    up_nodes = nx.ancestors(G,node_id)
    #Add the source node itself
    up_nodes.add(node_id)
    #return the sum of the weight field for all selected records
    return int(gdf_Pipelines.loc[gdf_Pipelines[source_fld].isin(list(up_nodes)),weight_fld].sum())

In [29]:
#Apply the function 
gdf_Pipelines['AccumWaste'] = gdf_Pipelines[source_fld].apply(getUpstream)

In [30]:
#Export results to a shapefile
gdf_Pipelines.to_file('../data/processed/AccumWaste.shp')

### Create node file
Here we want to extract the vertices from the gdf_Pipeline features to its own feature class. 

We start by pulling out the start point of each LineString feature to a new dataframe, but this still lacks the end point of the most downstream point of each network (i.e. where they connect to the pipeline). So we then extract the end points and append those to the start points, removing duplicates (i.e. nodes that occur at the end of one feature and the start of another). 

Finally, we append the edge attribute data to each node feature. 

In [31]:
#Transform to WGS84 (to extract lat/long coordinates)
gdfWGS84 = gdf_Pipelines.to_crs(4326)
gdfWGS84.head()

Unnamed: 0,route_id,edge_ID,downstream,Fac_ID,Waste,Biogas,Type,Connection,geometry,Network,AccumWaste
0,291,0,1,291,490225.577124,13726320.0,Source,,"LINESTRING (-78.12924 34.88877, -78.12816 34.8...",5T,2722067
1,291,1,2,-1,0.0,0.0,Route,,"LINESTRING (-78.12166 34.89680, -78.11515 34.9...",5T,3005386
2,291,2,3,-1,0.0,0.0,Route,,"LINESTRING (-78.09463 34.91522, -78.09455 34.9...",5T,3106569
3,291,3,4,-1,0.0,0.0,Route,,"LINESTRING (-78.09347 34.92001, -78.09239 34.9...",5T,3114801
4,291,4,5,-1,0.0,0.0,Route,,"LINESTRING (-78.07723 34.96372, -78.07072 34.9...",5T,3163657


In [32]:
#Extract the first points in each segment
gdfWGS84['longitude'] = gdfWGS84['geometry'].apply(lambda x: x.coords[0][0])
gdfWGS84['latitude'] = gdfWGS84['geometry'].apply(lambda x: x.coords[0][1])
df_First = (gdfWGS84[['edge_ID','latitude','longitude']]
             .reset_index()
             .drop('index',axis=1))
df_First.head(2)

Unnamed: 0,edge_ID,latitude,longitude
0,0,34.888768,-78.129238
1,1,34.896802,-78.121655


In [33]:
#Extract the last points in each segment
gdfWGS84['longitude'] = gdfWGS84['geometry'].apply(lambda x: x.coords[-1][0])
gdfWGS84['latitude'] = gdfWGS84['geometry'].apply(lambda x: x.coords[-1][1])
df_Last = (gdfWGS84[['downstream','latitude','longitude']]
            .reset_index()
            .rename({'downstream':'edge_ID'},axis=1)
            .drop('index',axis=1))
df_Last.head(2)

Unnamed: 0,edge_ID,latitude,longitude
0,1,34.896802,-78.121655
1,2,34.915217,-78.094629


In [35]:
#Combine the two, dropping duplicates
df_Nodes = df_First.append(df_Last,ignore_index=True).drop_duplicates()

In [38]:
#Join attributes from the edge dataframe
gdf_Out = gdf_Pipelines.merge(df_Nodes,on='edge_ID').drop(columns=['route_id','geometry'],axis=1)

Unnamed: 0,edge_ID,downstream,Fac_ID,Waste,Biogas,Type,Connection,Network,AccumWaste,latitude,longitude
0,0,1,291,490225.577124,13726320.0,Source,,5T,2722067,34.888768,-78.129238
1,1,2,-1,0.0,0.0,Route,,5T,3005386,34.896802,-78.121655
2,2,3,-1,0.0,0.0,Route,,5T,3106569,34.915217,-78.094629
3,3,4,-1,0.0,0.0,Route,,5T,3114801,34.920013,-78.093468
4,4,5,-1,0.0,0.0,Route,,5T,3163657,34.963718,-78.07723


In [39]:
#Rename columns and export as a CSV file
gdf_Out.rename({'edge_ID':'node_ID','downstream':'downstream_ID'},axis=1).to_csv("../data/processed/Nodes.csv",index=False)

In [43]:
#Write a metadata file
with open("../data/processed/Nodes_readme.txt","w") as metadata:
    metadata.write('''
node_id\t\tThe internal ID of the node

downstream_id\tThe internal ID of the node immediately downstream if the node

\t\t[A "T" indicates its a terminal node, i.e. connects to existing pipeline]

Fac_ID\t\tThe biogas facility ID, if the node originates from it ("-1" indicates its a connector)

Waste\t\tWaste originating at the node (if a biogas source)

Biogas\t\tBiogas potential at the node (if a biogas source)

Type\t\t"Source" = node occurs at a biogas source
\t\t"Route" = node is a junciton along the route form sources to exiting pipeline
\t\t"Output"= node occurs where the route meets an existing pipeline

Connection\t"Transmission" = output node connects to an existing transmission pipeline
\t\t"Distribution" = output node connects to an existing distribution pipeline
\t\tNA"/None = node is not an output node

Network\t\t[NEED VERIFICATION]ID of the network to which the node belongs. 
\t\t(Each network has its one connection to existing pipeline)

AccumWaste\tAccumulated upstream Waste at the node

latitude/longitude Coordinates of the node
    ''')