# Network Analysis
This notebook compute cumulative biogas potential for the pipeline segments. 
1. Read in the edge list computed from the pipeline segments. This includes the edge ID, the downstream edge ID, and the amount of biogas potential at the edge source. 
2. Build a graph and then iterate through node on the graph, trace downstream, and tally cumulative downstream biogas potential values.
3. Export as a csv and shapefile

In [1]:
#Import packages
import networkx as nx
import numpy as np
import pandas as pd
import geopandas as gpd

In [2]:
#Read the data in from the BasePipelines shapefile
gdf_Pipelines = gpd.read_file('../data/processed/BasePipelines.shp')
gdf_Pipelines.head()

Unnamed: 0,route_id,edge_ID,downstream,Fac_ID,Waste,Biogas,Type,Connection,geometry
0,291,0,1,291,490225.577124,13726320.0,Source,,"LINESTRING (1618626.822 -297167.070, 1618626.8..."
1,291,1,2,-1,0.0,0.0,Route,,"LINESTRING (1619131.223 -296158.267, 1619635.6..."
2,291,2,3,-1,0.0,0.0,Route,,"LINESTRING (1621148.830 -293674.376, 1621148.8..."
3,291,3,4,-1,0.0,0.0,Route,,"LINESTRING (1621148.830 -293131.857, 1621148.8..."
4,291,4,5,-1,0.0,0.0,Route,,"LINESTRING (1621653.231 -288087.841, 1622157.6..."


In [3]:
#Set the field name variables
source_fld = 'edge_ID'
target_fld = 'downstream'
weight_fld = 'Waste'

#### Create and populate the graph
* Create a multi-directional graph
* Iterate through the edge list and add add edges from the source/downstream nodes & weights

In [4]:
#Construct the graph from the dataframe
G = nx.from_pandas_edgelist(gdf_Pipelines,
                            source=source_fld,
                            target=target_fld,
                            edge_attr=True,
                            create_using=nx.MultiDiGraph
                           )

#### Compute total upstream biogas potential 
Iterate through each "from" node and find all its upstream nodes and sum their collective biogas potential values

In [5]:
def getUpstream(node_id):
    #Get a list of nodes downstream of the current node
    up_nodes = nx.ancestors(G,node_id)
    #Add the source node itself
    up_nodes.add(node_id)
    #return the sum of the weight field for all selected records
    return int(gdf_Pipelines.loc[gdf_Pipelines[source_fld].isin(list(up_nodes)),weight_fld].sum())

In [6]:
#Apply the function 
gdf_Pipelines['AccumWaste'] = gdf_Pipelines[source_fld].apply(getUpstream)

In [7]:
#Export results
gdf_Pipelines.to_file('../data/processed/AccumWaste.shp')

### Create node file

In [8]:
#Transform to WGS84
gdfWGS84 = gdf_Pipelines.to_crs(4326)
gdfWGS84.head()

Unnamed: 0,route_id,edge_ID,downstream,Fac_ID,Waste,Biogas,Type,Connection,geometry,AccumWaste
0,291,0,1,291,490225.577124,13726320.0,Source,,"LINESTRING (-78.12924 34.88877, -78.12816 34.8...",2722067
1,291,1,2,-1,0.0,0.0,Route,,"LINESTRING (-78.12166 34.89680, -78.11515 34.9...",3005386
2,291,2,3,-1,0.0,0.0,Route,,"LINESTRING (-78.09463 34.91522, -78.09455 34.9...",3106569
3,291,3,4,-1,0.0,0.0,Route,,"LINESTRING (-78.09347 34.92001, -78.09239 34.9...",3114801
4,291,4,5,-1,0.0,0.0,Route,,"LINESTRING (-78.07723 34.96372, -78.07072 34.9...",3163657


In [9]:
#Extract the first points in each segment
gdfWGS84['longitude'] = gdfWGS84['geometry'].apply(lambda x: x.coords[0][0])
gdfWGS84['latitude'] = gdfWGS84['geometry'].apply(lambda x: x.coords[0][1])
df_First = (gdfWGS84[['edge_ID','latitude','longitude']]
             .reset_index()
             .drop('index',axis=1))
df_First.head(2)

Unnamed: 0,edge_ID,latitude,longitude
0,0,34.888768,-78.129238
1,1,34.896802,-78.121655


In [10]:
#Extract the last points in each segment
gdfWGS84['longitude'] = gdfWGS84['geometry'].apply(lambda x: x.coords[-1][0])
gdfWGS84['latitude'] = gdfWGS84['geometry'].apply(lambda x: x.coords[-1][1])
df_Last = (gdfWGS84[['downstream','latitude','longitude']]
            .reset_index()
            .rename({'downstream':'edge_ID'},axis=1)
            .drop('index',axis=1))
df_Last.head(2)

Unnamed: 0,edge_ID,latitude,longitude
0,1,34.896802,-78.121655
1,2,34.915217,-78.094629


In [12]:
df_Nodes = df_First.append(df_Last,ignore_index=True).drop_duplicates()

In [14]:
gdf_Pipelines.merge(df_Nodes,on='edge_ID').drop(columns=['route_id','geometry'],axis=1).to_csv("../data/processed/Nodes.csv",index=False)