# Network Analysis
This notebook compute cumulative biogas potential for the pipeline segments. 
1. Read in the edge list computed from the pipeline segments. This includes the edge ID, the downstream edge ID, and the amount of biogas potential at the edge source. 
2. Build a graph and then iterate through node on the graph, trace downstream, and tally cumulative downstream biogas potential values.
3. Export as a csv and shapefile

In [None]:
#Import packages
import networkx as nx
import numpy as np
import pandas as pd
import geopandas as gpd

In [None]:
#Read the data in from the BasePipelines shapefile
gdf_Pipelines = gpd.read_file('../data/processed/BasePipelines.shp')
gdf_Pipelines.head()

In [None]:
#Set the field name variables
source_fld = 'edge_ID'
target_fld = 'downstream'
weight_fld = 'Waste'

#### Create and populate the graph
* Create a multi-directional graph
* Iterate through the edge list and add add edges from the source/downstream nodes & weights

In [None]:
#Construct the graph from the dataframe
G = nx.from_pandas_edgelist(gdf_Pipelines,
                            source=source_fld,
                            target=target_fld,
                            edge_attr=True,
                            create_using=nx.MultiDiGraph
                           )

In [None]:
#Subset terminal nodes
gdf_terminal = gdf_Pipelines[gdf_Pipelines['downstream'].str.contains("T")]
gdf_terminal.head()

In [None]:
#Get descendents
network_dict = {}
for terminal_node in gdf_terminal['downstream'].values:
    network_dict[terminal_node] = terminal_node
    for upstream_id in  nx.ancestors(G,terminal_node):
        network_dict[upstream_id] = terminal_node

In [None]:
#Attach attribute to pipeline GDF
gdf_Pipelines['Network'] = gdf_Pipelines['downstream'].apply(lambda x: network_dict[x])

#### Compute total upstream biogas potential 
Iterate through each "from" node and find all its upstream nodes and sum their collective biogas potential values

In [None]:
#Create function that finds all upstream nodes and compute total upstream waste
def getUpstream(node_id):
    #Get a list of nodes downstream of the current node
    up_nodes = nx.ancestors(G,node_id)
    #Add the source node itself
    up_nodes.add(node_id)
    #return the sum of the weight field for all selected records
    return int(gdf_Pipelines.loc[gdf_Pipelines[source_fld].isin(list(up_nodes)),weight_fld].sum())

In [None]:
#Apply the function 
gdf_Pipelines['AccumWaste'] = gdf_Pipelines[source_fld].apply(getUpstream)

In [None]:
#Export results to a shapefile
gdf_Pipelines.to_file('../data/processed/AccumWaste.shp')

### Create node file
Here we want to extract the vertices from the gdf_Pipeline features to its own feature class. 

We start by pulling out the start point of each LineString feature to a new dataframe, but this still lacks the end point of the most downstream point of each network (i.e. where they connect to the pipeline). So we then extract the end points and append those to the start points, removing duplicates (i.e. nodes that occur at the end of one feature and the start of another). 

Finally, we append the edge attribute data to each node feature. 

In [None]:
#Transform to WGS84 (to extract lat/long coordinates)
gdfWGS84 = gdf_Pipelines.to_crs(4326)
gdfWGS84.head()

In [None]:
#Extract the first points in each segment
gdfWGS84['longitude'] = gdfWGS84['geometry'].apply(lambda x: x.coords[0][0])
gdfWGS84['latitude'] = gdfWGS84['geometry'].apply(lambda x: x.coords[0][1])
df_First = (gdfWGS84[['edge_ID','latitude','longitude']]
             .reset_index()
             .drop('index',axis=1))
df_First.head(2)

In [None]:
#Extract the last points in each segment
gdfWGS84['longitude'] = gdfWGS84['geometry'].apply(lambda x: x.coords[-1][0])
gdfWGS84['latitude'] = gdfWGS84['geometry'].apply(lambda x: x.coords[-1][1])
df_Last = (gdfWGS84[['downstream','latitude','longitude']]
            .reset_index()
            .rename({'downstream':'edge_ID'},axis=1)
            .drop('index',axis=1))
df_Last.head(2)

In [None]:
#Combine the two, dropping duplicates
df_Nodes = df_First.append(df_Last,ignore_index=True).drop_duplicates()

In [None]:
#Join attributes from the edge dataframe
gdf_Out = gdf_Pipelines.merge(df_Nodes,on='edge_ID')

In [None]:
#Compute edge length, in km
gdf_Out['length_km'] = gdf_Out['geometry'].length / 1000

In [None]:
#Drop the geometry column
gdf_Out.drop(columns=['geometry'],axis=1,inplace=True)
gdf_Out.head()

In [None]:
#Rename columns and export as a CSV file
gdf_Out.rename({'edge_ID':'node_ID',
                'downstream':'downstream_ID'},
               axis=1).to_csv("../data/processed/Nodes.csv",index=False)

In [None]:
#Write a metadata file
with open("../data/processed/Nodes_readme.txt","w") as metadata:
    metadata.write('''
node_id\t\tThe internal ID of the node

downstream_id\tThe internal ID of the node immediately downstream if the node
\t\t[A "T" indicates its a terminal node, i.e. connects to existing pipeline]

Fac_ID\t\tThe biogas facility ID, if the node originates from it ("NA" indicates its a connector)

Waste\t\tWaste originating at the node (if a biogas source)

Biogas\t\tBiogas potential at the node (if a biogas source)

Type\t\t"Source" = node occurs at a biogas source
\t\t"Route" = node is a junciton along the route form sources to exiting pipeline
\t\t"Output"= node occurs where the route meets an existing pipeline

Connection\t"Transmission" = output node connects to an existing transmission pipeline
\t\t"Distribution" = output node connects to an existing distribution pipeline
\t\tNA"/None = node is not an output node

Network\t\t[NEED VERIFICATION]ID of the network to which the node belongs. 
\t\t(Each network has its one connection to existing pipeline)

AccumWaste\tAccumulated upstream Waste at the node

latitude/longitude Coordinates of the node
    ''')