# Network Analysis
This notebook compute cumulative biogas potential for the pipeline segments. 
1. Read in the edge list computed from the pipeline segments. This includes the edge ID, the downstream edge ID, and the amount of biogas potential at the edge source. 
2. Build a graph and then iterate through node on the graph, trace downstream, and tally cumulative downstream biogas potential values.
3. Export as a csv and shapefile

In [1]:
#Import packages
import networkx as nx
import numpy as np
import pandas as pd
import geopandas as gpd

In [102]:
#Read the data in from the BasePipelines shapefile
gdf_Pipelines = gpd.read_file('../data/processed/BasePipelines.shp')
gdf_Pipelines.head()

Unnamed: 0,route_id,edge_ID,downstream,SiteNo,Waste,Biogas,Type,geometry
0,179,0,1,179,248472.480342,245692300.0,Source,"LINESTRING (1582814.308 -311794.717, 1582814.3..."
1,179,1,2,-1,0.0,0.0,Route,"LINESTRING (1582309.906 -310281.512, 1582309.9..."
2,179,2,3,-1,0.0,0.0,Route,"LINESTRING (1582309.906 -308768.307, 1582309.9..."
3,179,3,4,-1,0.0,0.0,Route,"LINESTRING (1582309.906 -308263.906, 1582309.9..."
4,179,4,5,-1,0.0,0.0,Route,"LINESTRING (1582309.906 -307255.102, 1582814.3..."


In [3]:
#Set the field name variables
source_fld = 'edge_ID'
target_fld = 'downstream'
weight_fld = 'Waste'

#### Create and populate the graph
* Create a multi-directional graph
* Iterate through the edge list and add add edges from the source/downstream nodes & weights

In [6]:
#Construct the graph from the dataframe
G = nx.from_pandas_edgelist(gdf_Pipelines,
                            source=source_fld,
                            target=target_fld,
                            edge_attr=True,
                            create_using=nx.MultiDiGraph
                           )

#### Compute total upstream biogas potential 
Iterate through each "from" node and find all its upstream nodes and sum their collective biogas potential values

In [130]:
def getUpstream(node_id):
    #Get a list of nodes downstream of the current node
    up_nodes = nx.ancestors(G,node_id)
    #Add the source node itself
    up_nodes.add(node_id)
    #return the sum of the weight field for all selected records
    return int(gdf_Pipelines.loc[gdf_Pipelines[source_fld].isin(list(up_nodes)),weight_fld].sum())

In [None]:
#Apply the function 
gdf_Pipelines['AccumWaste'] = gdf_Pipelines[source_fld].apply(getUpstream)

In [131]:
#Export results
gdf_Pipelines.to_file('../data/processed/AccumWaste.shp')

### Create node file

In [165]:
#Transform to WGS84
gdfWGS84 = gdf_Pipelines.to_crs(4326)
gdfWGS84.head()

Unnamed: 0,route_id,edge_ID,downstream,SiteNo,Waste,Biogas,Type,geometry,UpWaste
0,179,0,1,179,248472.480342,245692300.0,Source,"LINESTRING (-78.54546 34.82150, -78.54441 34.8...",306955
1,179,1,2,-1,0.0,0.0,Route,"LINESTRING (-78.54773 34.83576, -78.54668 34.8...",504646
2,179,2,3,-1,0.0,0.0,Route,"LINESTRING (-78.54458 34.84915, -78.54353 34.8...",522565
3,179,3,4,-1,0.0,0.0,Route,"LINESTRING (-78.54353 34.85361, -78.54247 34.8...",630884
4,179,4,5,-1,0.0,0.0,Route,"LINESTRING (-78.54142 34.86254, -78.53494 34.8...",635032


In [195]:
#Extract the first points in each segment
gdfWGS84['longitude'] = gdfWGS84['geometry'].apply(lambda x: x.coords[0][0])
gdfWGS84['latitude'] = gdfWGS84['geometry'].apply(lambda x: x.coords[0][1])
df_First = (gdfWGS84[['edge_ID','latitude','longitude']]
             .reset_index()
             .drop('index',axis=1))
df_First.head(2)

Unnamed: 0,edge_ID,latitude,longitude
0,0,34.821503,-78.545459
1,1,34.835759,-78.547733


In [196]:
#Extract the last points in each segment
gdfWGS84['longitude'] = gdfWGS84['geometry'].apply(lambda x: x.coords[-1][0])
gdfWGS84['latitude'] = gdfWGS84['geometry'].apply(lambda x: x.coords[-1][1])
df_Last = (gdfWGS84[['downstream','latitude','longitude']]
            .reset_index()
            .rename({'downstream':'edge_ID'},axis=1)
            .drop('index',axis=1))
df_Last.head(2)

Unnamed: 0,edge_ID,latitude,longitude
0,1,34.835759,-78.547733
1,2,34.84915,-78.544577


In [197]:
df_Nodes = gdf_First.append(gdf_Last,ignore_index=True).drop_duplicates()

In [204]:
gdf_Pipelines.merge(df_Nodes,on='edge_ID').drop('route_id',axis=1).to_csv("../data/processed/Nodes.csv",index=False)