# Converting the Edges to Indexed Node Pairs

## converts coal edge_df into a tab separated .txt file (graph_coal_uw.txt) containing node pairs (edges), using the indexing from node_df, to index nodes

### this notebook has been converted into a script, Aaron_edge_conversion_coal_uw.py, due to the length of its run time; more extensive documentation is written there

In [15]:
import os, pickle, sys
import json
import matplotlib.pyplot as plt
from matplotlib.collections import LineCollection

from shapely import geometry

import geopandas as gpd

In [16]:
import pandas as pd
import numpy as np
import networkx as nx

In [17]:
root = os.path.abspath(os.path.join(os.getcwd(),'..'))

In [18]:
sys.path.append(root)

In [19]:
from ffsc.flow import *

In [20]:
# dictionary assigning colors to different features of the network
colors = {
    'coal_mines':           '#89163e',
    'oil_fields':           '#001c49',
    'well_pads':            '#6c0093',
    'processing_plants':    '#be46a6',
    'refineries':           '#be46a6',
    'ports':                '#7f5636',
    'lng':                  '#46beb1',
    'shipping':             '#7f5636',
    'pipelines':            '#006400',
    'railways':             '#646464',
    'cities':               '#96C3E4',
    'power_stations':       '#8ac27e',
}

In [21]:
# dictionary assigning paths to .csv files
all_data_dirs = {
            'cities-N':              os.path.join('.','..','results_backup','simplify','cities_nodes_dataframe.csv'),
            'pipelines-cities':      os.path.join('.','..','results_backup','simplify','cities_pipelines_edge_dataframe.csv'),
            'ports-cities':          os.path.join('.','..','results_backup','output','cities_ports_edge_dataframe.csv'),
            'railways-cities':       os.path.join('.','..','results_backup','simplify','cities_railways_edge_dataframe_alt.csv'),
            'coalmines-railways':    os.path.join('.','..','results_backup','simplify','coal_mine_railway_edge_dataframe.csv'),
            'coalmines-N':           os.path.join('.','..','results_backup','output','coal_mines_nodes_dataframe.csv'),
            'lng-N':                 os.path.join('.','..','results_backup','output','lng_nodes_dataframe.csv',  ),
            'lng-pipelines':         os.path.join('.','..','results_backup','simplify','lng_pipeline_edge_dataframe.csv'),
            'lng-shipping':          os.path.join('.','..','results_backup','output','lng_shipping_route_edge_dataframe.csv'),
            'oilfields-pipelines':   os.path.join('.','..','results_backup','simplify','oil_field_edge_dataframe.csv'),
            'oilfields-N':           os.path.join('.','..','results_backup','oil_field_nodes_fixup.csv'),
            'pipelines-pipelines':   os.path.join('.','..','results_backup','simplify','pipeline_edge_dataframe.csv'),
            'pipelines-N':           os.path.join('.','..','results_backup','simplify','pipeline_node_dataframe.csv'),
            'ports-N':               os.path.join('.','..','results_backup','output','port_node_dataframe.csv',  ),
            'ports-pipelines':       os.path.join('.','..','results_backup','simplify','port_pipeline_edge_dataframe.csv'),
            'ports-shipping':        os.path.join('.','..','results_backup','output','port_ship_edge_dataframe.csv'),
            'ports-railways':        os.path.join('.','..','results_backup','simplify','port_railway_edge_dataframe.csv'),
            'powerstn-N':            os.path.join('.','..','results_backup','output','power_station_nodes_dataframe.csv'),
            'powerstn-pipelines':    os.path.join('.','..','results_backup','simplify','power_station_pipeline_edge_dataframe.csv'),
            'powerstn-railways':     os.path.join('.','..','results_backup','simplify','power_station_railway_edge_dataframe.csv'),
            'procplant-N':           os.path.join('.','..','results_backup','output','processing_plant_nodes_dataframe.csv'),
            'procplant-pipelines':   os.path.join('.','..','results_backup','simplify','processing_plant_pipeline_edge_dataframe.csv'),
            'railways-railways':     os.path.join('.','..','results_backup','simplify','railway_edge_dataframe.csv'),
            'railways-N':            os.path.join('.','..','results_backup','simplify','railway_nodes_dataframe.csv'),
            'refineries-N':          os.path.join('.','..','results_backup','output','refinery_nodes_dataframe.csv'),
            'refineries-pipelines':  os.path.join('.','..','results_backup','simplify','refinery_pipeline_edge_dataframe.csv'),
            'shipping-shipping':     os.path.join('.','..','results_backup','output','shipping_edge_dataframe.csv'),
            'shipping-N':            os.path.join('.','..','results_backup','output','shipping_node_dataframe.csv'),
            'wellpads-N':            os.path.join('.','..','results_backup','output','well_pad_nodes_dataframe.csv'),
            'wellpads-pipelines':    os.path.join('.','..','results_backup','simplify','well_pad_pipeline_edge_dataframe.csv'),
        }

In [22]:
gen = make_nx('coal', 'coal')

carrier: coal
recipes used: coal


In [23]:
gen.all_data_dirs = all_data_dirs

In [24]:
gen._load_dfs()

In [25]:
gen._fill_graph()

INFO:ffsc.flow.network_flow:doing step add coalmines -> railways...
INFO:ffsc.flow.network_flow:doing step add railways -> railways...
INFO:ffsc.flow.network_flow:doing step add railways <- railways...
INFO:ffsc.flow.network_flow:doing step add railways -> ports...
INFO:ffsc.flow.network_flow:ports-railways missing distance
INFO:ffsc.flow.network_flow:['Unnamed: 0', 'Unnamed: 0.1', 'PortNode:START_ID(PortNode)', 'RailwayNodeID:END_ID(PipelineNode)', ':TYPE']
INFO:ffsc.flow.network_flow:doing step add railways <- ports...
INFO:ffsc.flow.network_flow:doing step add ports -> shipping_lanes...
INFO:ffsc.flow.network_flow:ports-shipping missing distance
INFO:ffsc.flow.network_flow:['PortNode:START_ID(PortNode)', 'ShipNode:END_ID(ShippingNode)', ':TYPE', 'impedance']
INFO:ffsc.flow.network_flow:doing step add ports <- shipping_lanes...
INFO:ffsc.flow.network_flow:doing step add shipping_lanes -> shipping_lanes...
INFO:ffsc.flow.network_flow:doing step add shipping_lanes <- shipping_lanes...


In [26]:
gen._prep_flow()

INFO:numexpr.utils:Note: NumExpr detected 32 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
INFO:numexpr.utils:NumExpr defaulting to 8 threads.
scope power stations) 17876 28664
INFO:ffsc.flow.network_flow:checking powerstation paths...
INFO:ffsc.flow.network_flow:ii_p 0, p_count 0
INFO:ffsc.flow.network_flow:ii_p 1000, p_count 304
INFO:ffsc.flow.network_flow:ii_p 2000, p_count 567
INFO:ffsc.flow.network_flow:ii_p 3000, p_count 582
INFO:ffsc.flow.network_flow:ii_p 4000, p_count 604
INFO:ffsc.flow.network_flow:ii_p 5000, p_count 604
INFO:ffsc.flow.network_flow:ii_p 6000, p_count 608
INFO:ffsc.flow.network_flow:ii_p 7000, p_count 652
INFO:ffsc.flow.network_flow:ii_p 8000, p_count 705
INFO:ffsc.flow.network_flow:ii_p 9000, p_count 751
INFO:ffsc.flow.network_flow:ii_p 10000, p_count 757
INFO:ffsc.flow.network_flow:ii_p 11000, p_count 759
INFO:ffsc.flow.network_flow:ii_p 12000, p_count 780
INFO:ffsc.flow.network_flow:ii_p 13000, p_count 796
INFO:ffsc.flow.network_flo

**approach:**
- get all the data into nice geopandas dfs
- filter the gdfs on the network edges
- plot the gdfs

In [27]:
ne = gpd.read_file('./../data/ne/ne_10m_countries.gpkg')

In [28]:
all_data = {}
for kk,vv in all_data_dirs.items():
    all_data[kk]= pd.read_csv(vv)

In [29]:
#pickle.load(open(os.path.join('.','..','results_backup','primary','prm_shipping_routes_data.pkl'),'rb'))

In [30]:
# join geometris back to shipping nodes
all_data['shipping-N'] = all_data['shipping-N'].merge(all_data['shipping-shipping'][['StartNodeId:START_ID(ShippingNode)','starting_point']], how='left',left_on='ShippingNodeID:ID(ShippingNode)', right_on='StartNodeId:START_ID(ShippingNode)')
all_data['shipping-N'] = all_data['shipping-N'].merge(all_data['shipping-shipping'][['EndNodeId:END_ID(ShippingNode)','end_point']], how='left',left_on='ShippingNodeID:ID(ShippingNode)', right_on='EndNodeId:END_ID(ShippingNode)')
all_data['shipping-N']['coordinates']=all_data['shipping-N']['starting_point']
all_data['shipping-N']['coordinates'] = all_data['shipping-N']['coordinates'].fillna(all_data['shipping-N']['end_point'])

In [31]:
# replace parentheses with sq brackets
all_data['pipelines-N']['coordinates'] = all_data['pipelines-N']['coordinates'].str.replace('(','[').str.replace(')',']')
all_data['railways-N']['coordinates'] = all_data['railways-N']['coordinates'].str.replace('(','[').str.replace(')',']')

In [32]:
all_nodes = pd.DataFrame({'node_ID':[],'coordinates':[]})

In [33]:
for kk, vv in all_data.items():
    if kk.split('-')[-1]=='N':
        print (kk)
        id_col = [cc for cc in vv.columns if 'ID' in cc][0]
        tmp = vv[[id_col, 'coordinates']]
        tmp = tmp.rename(columns={id_col:'node_ID'})
        # print (tmp.head(5))
        all_nodes = all_nodes.append(tmp)

cities-N
coalmines-N
lng-N
oilfields-N
pipelines-N
ports-N
powerstn-N
procplant-N
railways-N
refineries-N
shipping-N
wellpads-N


**do edges_df**

In [34]:
edge_df = pd.DataFrame.from_records([{'start':e[0],'end':e[1],'z':e[2]['z']} for e in gen.G.edges(data=True)])

In [35]:
edge_df = edge_df[edge_df.start!='supersource']

In [36]:
edge_df['start_type'] = edge_df.start.str.split('_').str[0]
edge_df['end_type'] = edge_df.end.str.split('_').str[0]

In [37]:
edge_df = edge_df.merge(all_nodes.drop_duplicates(subset='node_ID'), how='left',left_on='start', right_on='node_ID').rename(columns={'node_ID':'snode_ID', 'coordinate':'start_coordinates'})

In [38]:
edge_df = edge_df.merge(all_nodes.drop_duplicates(subset='node_ID'), how='left',left_on='end', right_on='node_ID').rename(columns={'node_ID':'enode_ID', 'coordinate':'end_coordinates'})

In [39]:
edge_df = edge_df.drop(columns=['snode_ID','enode_ID'])

In [40]:
edge_df.rename(columns={'coordinates_x':'start_coordinates', 'coordinates_y':'end_coordinates'}, inplace=True)

In [41]:
edge_df['start_coordinates'] = edge_df['start_coordinates'].apply(json.loads)
edge_df['end_coordinates'] = edge_df['end_coordinates'].apply(json.loads)

In [42]:
edge_df['geometry'] = edge_df.apply(lambda row: geometry.LineString([row['start_coordinates'], row['end_coordinates']]), axis=1)

In [43]:
edge_df

Unnamed: 0,start,end,z,start_type,end_type,start_coordinates,end_coordinates,geometry
0,coal_mine_1920,railway_node_25.9047571-24.6667781,0,coal,railway,"[25.917000000186725, -24.666999999954218]","[25.9047571, -24.6667781]",LINESTRING (25.91700000018673 -24.666999999954...
1,railway_node_25.9047571-24.6667781,railway_node_25.9047571-24.6664075,0,railway,railway,"[25.9047571, -24.6667781]","[25.9047571, -24.6664075]","LINESTRING (25.9047571 -24.6667781, 25.9047571..."
2,railway_node_25.9047571-24.6667781,railway_node_25.9047254-24.6670633,0,railway,railway,"[25.9047571, -24.6667781]","[25.9047254, -24.6670633]","LINESTRING (25.9047571 -24.6667781, 25.9047254..."
3,coal_mine_1922,railway_node_53.3958054211536635.55213928019969,0,coal,railway,"[53.394000000101364, 35.57499999971285]","[53.39580542115366, 35.55213928019969]",LINESTRING (53.39400000010136 35.5749999997128...
4,railway_node_53.3958054211536635.55213928019969,railway_node_53.38116831558798535.542777921444795,0,railway,railway,"[53.39580542115366, 35.55213928019969]","[53.381168315587985, 35.542777921444795]",LINESTRING (53.39580542115366 35.5521392801996...
...,...,...,...,...,...,...,...,...
2258601,shipping_node_34564,shipping_node_34768,1212,shipping,shipping,"[-70.69139099, 43.13928986]","[-70.55523682, 43.12379837]","LINESTRING (-70.69139099 43.13928986, -70.5552..."
2258602,shipping_node_34564,shipping_node_33992,508,shipping,shipping,"[-70.69139099, 43.13928986]","[-70.71349335, 43.17829895]","LINESTRING (-70.69139099 43.13928986, -70.7134..."
2258603,shipping_node_33992,shipping_node_34564,508,shipping,shipping,"[-70.71349335, 43.17829895]","[-70.69139099, 43.13928986]","LINESTRING (-70.71349334999999 43.17829895, -7..."
2258604,shipping_node_33992,shipping_node_34768,49,shipping,shipping,"[-70.71349335, 43.17829895]","[-70.55523682, 43.12379837]","LINESTRING (-70.71349334999999 43.17829895, -7..."


**do nodes df**

In [44]:
node_df = pd.DataFrame.from_records([{'node':n[0],'D':n[1]['D']} for n in gen.G.nodes(data=True)])

In [45]:
node_df = node_df.merge(all_nodes, how='left',left_on='node',right_on='node_ID')

In [46]:
node_df = node_df[node_df.node!='supersource']

In [47]:
node_df['geometry'] = node_df['coordinates'].apply(json.loads).apply(geometry.Point)

# Node Conversion

In [48]:
node_coordinates_dict = node_df.to_dict()['coordinates']
node_coordinates_dict

{0: '[25.917000000186725, -24.666999999954218]',
 1: '[25.9047571, -24.6667781]',
 2: '[53.394000000101364, 35.57499999971285]',
 3: '[53.39580542115366, 35.55213928019969]',
 4: '[52.98099999974161, 36.17099999993142]',
 5: '[52.975193103458786, 36.17194361255421]',
 6: '[-2.159999999710697, 34.31000000012751]',
 7: '[-2.183004, 34.3093436]',
 8: '[33.72899999977782, -16.115000000096515]',
 9: '[33.7301248, -16.120079]',
 10: '[7.482999999593258, 6.433000000523869]',
 11: '[7.4893002, 6.4352075]',
 12: '[29.1670000008292, -25.817000000379867]',
 13: '[29.1655643, -25.848461]',
 14: '[30.04999999971292, -28.00000000043711]',
 15: '[30.048072, -28.0005906]',
 16: '[29.233000000221832, -25.899999999797217]',
 17: '[29.2035176, -25.883162]',
 18: '[29.599999999738106, -25.916999999739232]',
 19: '[29.6221431, -25.9400583]',
 20: '[27.500000000086608, -23.667000000114562]',
 21: '[27.548099, -23.6539232]',
 22: '[28.817000000190863, -26.26700000012224]',
 23: '[28.8272418, -26.2690014]',
 

In [49]:
coordinates_node_dict = {value:key for key, value in node_coordinates_dict.items()}
coordinates_node_dict

{'[25.917000000186725, -24.666999999954218]': 0,
 '[25.9047571, -24.6667781]': 1,
 '[53.394000000101364, 35.57499999971285]': 2,
 '[53.39580542115366, 35.55213928019969]': 3,
 '[52.98099999974161, 36.17099999993142]': 4,
 '[52.975193103458786, 36.17194361255421]': 5,
 '[-2.159999999710697, 34.31000000012751]': 6,
 '[-2.183004, 34.3093436]': 7,
 '[33.72899999977782, -16.115000000096515]': 8,
 '[33.7301248, -16.120079]': 9,
 '[7.482999999593258, 6.433000000523869]': 10,
 '[7.4893002, 6.4352075]': 11,
 '[29.1670000008292, -25.817000000379867]': 12,
 '[29.1655643, -25.848461]': 13,
 '[30.04999999971292, -28.00000000043711]': 14,
 '[30.048072, -28.0005906]': 15,
 '[29.233000000221832, -25.899999999797217]': 16,
 '[29.2035176, -25.883162]': 17,
 '[29.599999999738106, -25.916999999739232]': 18,
 '[29.6221431, -25.9400583]': 19,
 '[27.500000000086608, -23.667000000114562]': 20,
 '[27.548099, -23.6539232]': 21,
 '[28.817000000190863, -26.26700000012224]': 22,
 '[28.8272418, -26.2690014]': 23,
 

## Converting Numbered Nodes to Coordinates into a .csv

In [50]:
import csv

In [51]:
with open('node2coordinates.csv', 'w') as f:
    fieldnames = ['Node Number', 'Coordinates']
    writer = csv.DictWriter(f, fieldnames=fieldnames, delimiter='\t')
    writer.writeheader()
    data = [dict(zip(fieldnames, [k, v])) for k, v in node_coordinates_dict.items()]
    writer.writerows(data)

### Creating Edge weights from the impedance
Using formula weight = 1/(1+ln(1+z))

In [70]:
edge_df['z'].max()

581541

In [71]:
z = []

for i in range(len(edge_df['z'])):
    z.append(edge_df['z'][i])

In [72]:
len(z)

2258606

In [73]:
# number of impedances greater than size 1000

count = 0

for i in range(len(x)):
    if z[i] >= 1000:
        count += 1

print(count)

0


In [74]:
# number of impedances equal to zero

count = 0

for i in range(len(x)):
    if z[i] == 0:
        count += 1

print(count)

239338


In [75]:
edge_df

Unnamed: 0,start,end,z,start_type,end_type,start_coordinates,end_coordinates,geometry
0,coal_mine_1920,railway_node_25.9047571-24.6667781,0,coal,railway,"[25.917000000186725, -24.666999999954218]","[25.9047571, -24.6667781]",LINESTRING (25.91700000018673 -24.666999999954...
1,railway_node_25.9047571-24.6667781,railway_node_25.9047571-24.6664075,0,railway,railway,"[25.9047571, -24.6667781]","[25.9047571, -24.6664075]","LINESTRING (25.9047571 -24.6667781, 25.9047571..."
2,railway_node_25.9047571-24.6667781,railway_node_25.9047254-24.6670633,0,railway,railway,"[25.9047571, -24.6667781]","[25.9047254, -24.6670633]","LINESTRING (25.9047571 -24.6667781, 25.9047254..."
3,coal_mine_1922,railway_node_53.3958054211536635.55213928019969,0,coal,railway,"[53.394000000101364, 35.57499999971285]","[53.39580542115366, 35.55213928019969]",LINESTRING (53.39400000010136 35.5749999997128...
4,railway_node_53.3958054211536635.55213928019969,railway_node_53.38116831558798535.542777921444795,0,railway,railway,"[53.39580542115366, 35.55213928019969]","[53.381168315587985, 35.542777921444795]",LINESTRING (53.39580542115366 35.5521392801996...
...,...,...,...,...,...,...,...,...
2258601,shipping_node_34564,shipping_node_34768,1212,shipping,shipping,"[-70.69139099, 43.13928986]","[-70.55523682, 43.12379837]","LINESTRING (-70.69139099 43.13928986, -70.5552..."
2258602,shipping_node_34564,shipping_node_33992,508,shipping,shipping,"[-70.69139099, 43.13928986]","[-70.71349335, 43.17829895]","LINESTRING (-70.69139099 43.13928986, -70.7134..."
2258603,shipping_node_33992,shipping_node_34564,508,shipping,shipping,"[-70.71349335, 43.17829895]","[-70.69139099, 43.13928986]","LINESTRING (-70.71349334999999 43.17829895, -7..."
2258604,shipping_node_33992,shipping_node_34768,49,shipping,shipping,"[-70.71349335, 43.17829895]","[-70.55523682, 43.12379837]","LINESTRING (-70.71349334999999 43.17829895, -7..."


In [76]:
z_numpy = np.array(z)

In [77]:
z_numpy

array([  0,   0,   0, ..., 508,  49, 712])

In [80]:
weights = 1/(1+np.log(1+z_numpy))

In [81]:
weights

array([1.        , 1.        , 1.        , ..., 0.13826577, 0.20358211,
       0.13210945])

In [83]:
np.savetxt('graph_weights_z_coal.txt', weights, delimiter='\t')

### Coordinates to numbers

In [60]:
edge_df_new = edge_df.drop(columns=['start','end','z','start_type','end_type','geometry']).copy(deep = True)

In [61]:
edge_df_new

Unnamed: 0,start_coordinates,end_coordinates
0,"[25.917000000186725, -24.666999999954218]","[25.9047571, -24.6667781]"
1,"[25.9047571, -24.6667781]","[25.9047571, -24.6664075]"
2,"[25.9047571, -24.6667781]","[25.9047254, -24.6670633]"
3,"[53.394000000101364, 35.57499999971285]","[53.39580542115366, 35.55213928019969]"
4,"[53.39580542115366, 35.55213928019969]","[53.381168315587985, 35.542777921444795]"
...,...,...
2258601,"[-70.69139099, 43.13928986]","[-70.55523682, 43.12379837]"
2258602,"[-70.69139099, 43.13928986]","[-70.71349335, 43.17829895]"
2258603,"[-70.71349335, 43.17829895]","[-70.69139099, 43.13928986]"
2258604,"[-70.71349335, 43.17829895]","[-70.55523682, 43.12379837]"


In [None]:
len(edge_df_new)

In [None]:
import time

In [None]:
time_1 = time.time()

for i in range(len(edge_df_new)):
    edge_df_new['start_coordinates'][i] = coordinates_node_dict[str(edge_df_new['start_coordinates'][i])]
    edge_df_new['end_coordinates'][i] = coordinates_node_dict[str(edge_df_new['end_coordinates'][i])]

time_2 = time.time()
print(time_2 - time_1)

In [100]:
### edge_df_new.to_csv('graph_coal_uw.txt', header=False, index = False, sep='\t')