# Adding flow weights to node pairs (edges)

## flow weights are added to the graph_coal_uw.txt file, created using Aaron_edge_conversion_coal.py, to create space separated .txt file graph_coal_flow_weighted.txt

> edges with zero flow/ weight are dropped (this is technically not necessary and could be removed); 127162/ 2258606 have non-zero weight

### this code can be modified to add different weighting to edges, such as impedances

### graph_coal_flow_weighted.txt can be moved to the graph folder of the Directed Louvain repository to analyse via the Directed Louvain algorithm

### the Directed Louvain algorithm can use either tabs or spaces as the delimiter, so the choice between which is chosen as the delimiter is unimportant

In [24]:
import os, pickle, sys
import json
import matplotlib.pyplot as plt
from matplotlib.collections import LineCollection

from shapely import geometry

import geopandas as gpd

In [25]:
import pandas as pd
import numpy as np
import networkx as nx

In [26]:
root = os.path.abspath(os.path.join(os.getcwd(),'..'))

In [27]:
sys.path.append(root)

In [28]:
from ffsc.flow import *

In [29]:
# dictionary assigning colors to different features of the network
colors = {
    'coal_mines':           '#89163e',
    'oil_fields':           '#001c49',
    'well_pads':            '#6c0093',
    'processing_plants':    '#be46a6',
    'refineries':           '#be46a6',
    'ports':                '#7f5636',
    'lng':                  '#46beb1',
    'shipping':             '#7f5636',
    'pipelines':            '#006400',
    'railways':             '#646464',
    'cities':               '#96C3E4',
    'power_stations':       '#8ac27e',
}

In [30]:
# dictionary assigning paths to .csv files
all_data_dirs = {
            'cities-N':              os.path.join('.','..','results_backup','simplify','cities_nodes_dataframe.csv'),
            'pipelines-cities':      os.path.join('.','..','results_backup','simplify','cities_pipelines_edge_dataframe.csv'),
            'ports-cities':          os.path.join('.','..','results_backup','output','cities_ports_edge_dataframe.csv'),
            'railways-cities':       os.path.join('.','..','results_backup','simplify','cities_railways_edge_dataframe_alt.csv'),
            'coalmines-railways':    os.path.join('.','..','results_backup','simplify','coal_mine_railway_edge_dataframe.csv'),
            'coalmines-N':           os.path.join('.','..','results_backup','output','coal_mines_nodes_dataframe.csv'),
            'lng-N':                 os.path.join('.','..','results_backup','output','lng_nodes_dataframe.csv',  ),
            'lng-pipelines':         os.path.join('.','..','results_backup','simplify','lng_pipeline_edge_dataframe.csv'),
            'lng-shipping':          os.path.join('.','..','results_backup','output','lng_shipping_route_edge_dataframe.csv'),
            'oilfields-pipelines':   os.path.join('.','..','results_backup','simplify','oil_field_edge_dataframe.csv'),
            'oilfields-N':           os.path.join('.','..','results_backup','oil_field_nodes_fixup.csv'),
            'pipelines-pipelines':   os.path.join('.','..','results_backup','simplify','pipeline_edge_dataframe.csv'),
            'pipelines-N':           os.path.join('.','..','results_backup','simplify','pipeline_node_dataframe.csv'),
            'ports-N':               os.path.join('.','..','results_backup','output','port_node_dataframe.csv',  ),
            'ports-pipelines':       os.path.join('.','..','results_backup','simplify','port_pipeline_edge_dataframe.csv'),
            'ports-shipping':        os.path.join('.','..','results_backup','output','port_ship_edge_dataframe.csv'),
            'ports-railways':        os.path.join('.','..','results_backup','simplify','port_railway_edge_dataframe.csv'),
            'powerstn-N':            os.path.join('.','..','results_backup','output','power_station_nodes_dataframe.csv'),
            'powerstn-pipelines':    os.path.join('.','..','results_backup','simplify','power_station_pipeline_edge_dataframe.csv'),
            'powerstn-railways':     os.path.join('.','..','results_backup','simplify','power_station_railway_edge_dataframe.csv'),
            'procplant-N':           os.path.join('.','..','results_backup','output','processing_plant_nodes_dataframe.csv'),
            'procplant-pipelines':   os.path.join('.','..','results_backup','simplify','processing_plant_pipeline_edge_dataframe.csv'),
            'railways-railways':     os.path.join('.','..','results_backup','simplify','railway_edge_dataframe.csv'),
            'railways-N':            os.path.join('.','..','results_backup','simplify','railway_nodes_dataframe.csv'),
            'refineries-N':          os.path.join('.','..','results_backup','output','refinery_nodes_dataframe.csv'),
            'refineries-pipelines':  os.path.join('.','..','results_backup','simplify','refinery_pipeline_edge_dataframe.csv'),
            'shipping-shipping':     os.path.join('.','..','results_backup','output','shipping_edge_dataframe.csv'),
            'shipping-N':            os.path.join('.','..','results_backup','output','shipping_node_dataframe.csv'),
            'wellpads-N':            os.path.join('.','..','results_backup','output','well_pad_nodes_dataframe.csv'),
            'wellpads-pipelines':    os.path.join('.','..','results_backup','simplify','well_pad_pipeline_edge_dataframe.csv'),
        }

In [31]:
gen = make_nx('coal', 'coal')

carrier: coal
recipes used: coal


In [32]:
gen.all_data_dirs = all_data_dirs

In [33]:
gen._load_dfs()

In [34]:
gen._fill_graph()

INFO:ffsc.flow.network_flow:doing step add coalmines -> railways...
INFO:ffsc.flow.network_flow:doing step add railways -> railways...
INFO:ffsc.flow.network_flow:doing step add railways <- railways...
INFO:ffsc.flow.network_flow:doing step add railways -> ports...
INFO:ffsc.flow.network_flow:ports-railways missing distance
INFO:ffsc.flow.network_flow:['Unnamed: 0', 'Unnamed: 0.1', 'PortNode:START_ID(PortNode)', 'RailwayNodeID:END_ID(PipelineNode)', ':TYPE']
INFO:ffsc.flow.network_flow:doing step add railways <- ports...
INFO:ffsc.flow.network_flow:doing step add ports -> shipping_lanes...
INFO:ffsc.flow.network_flow:ports-shipping missing distance
INFO:ffsc.flow.network_flow:['PortNode:START_ID(PortNode)', 'ShipNode:END_ID(ShippingNode)', ':TYPE', 'impedance']
INFO:ffsc.flow.network_flow:doing step add ports <- shipping_lanes...
INFO:ffsc.flow.network_flow:doing step add shipping_lanes -> shipping_lanes...
INFO:ffsc.flow.network_flow:doing step add shipping_lanes <- shipping_lanes...


In [35]:
gen._prep_flow()

scope power stations) 17876 28664
INFO:ffsc.flow.network_flow:checking powerstation paths...
INFO:ffsc.flow.network_flow:ii_p 0, p_count 0
INFO:ffsc.flow.network_flow:ii_p 1000, p_count 304
INFO:ffsc.flow.network_flow:ii_p 2000, p_count 567
INFO:ffsc.flow.network_flow:ii_p 3000, p_count 582
INFO:ffsc.flow.network_flow:ii_p 4000, p_count 604
INFO:ffsc.flow.network_flow:ii_p 5000, p_count 604
INFO:ffsc.flow.network_flow:ii_p 6000, p_count 608
INFO:ffsc.flow.network_flow:ii_p 7000, p_count 652
INFO:ffsc.flow.network_flow:ii_p 8000, p_count 705
INFO:ffsc.flow.network_flow:ii_p 9000, p_count 751
INFO:ffsc.flow.network_flow:ii_p 10000, p_count 757
INFO:ffsc.flow.network_flow:ii_p 11000, p_count 759
INFO:ffsc.flow.network_flow:ii_p 12000, p_count 780
INFO:ffsc.flow.network_flow:ii_p 13000, p_count 796
INFO:ffsc.flow.network_flow:ii_p 14000, p_count 819
INFO:ffsc.flow.network_flow:ii_p 15000, p_count 841
INFO:ffsc.flow.network_flow:ii_p 16000, p_count 856
INFO:ffsc.flow.network_flow:ii_p 17000

**approach:**
- get all the data into nice geopandas dfs
- filter the gdfs on the network edges
- plot the gdfs

In [36]:
ne = gpd.read_file('./../data/ne/ne_10m_countries.gpkg')

In [37]:
all_data = {}
for kk,vv in all_data_dirs.items():
    all_data[kk]= pd.read_csv(vv)

In [38]:
#pickle.load(open(os.path.join('.','..','results_backup','primary','prm_shipping_routes_data.pkl'),'rb'))

In [39]:
# join geometris back to shipping nodes
all_data['shipping-N'] = all_data['shipping-N'].merge(all_data['shipping-shipping'][['StartNodeId:START_ID(ShippingNode)','starting_point']], how='left',left_on='ShippingNodeID:ID(ShippingNode)', right_on='StartNodeId:START_ID(ShippingNode)')
all_data['shipping-N'] = all_data['shipping-N'].merge(all_data['shipping-shipping'][['EndNodeId:END_ID(ShippingNode)','end_point']], how='left',left_on='ShippingNodeID:ID(ShippingNode)', right_on='EndNodeId:END_ID(ShippingNode)')
all_data['shipping-N']['coordinates']=all_data['shipping-N']['starting_point']
all_data['shipping-N']['coordinates'] = all_data['shipping-N']['coordinates'].fillna(all_data['shipping-N']['end_point'])

In [40]:
# replace parentheses with sq brackets
all_data['pipelines-N']['coordinates'] = all_data['pipelines-N']['coordinates'].str.replace('(','[').str.replace(')',']')
all_data['railways-N']['coordinates'] = all_data['railways-N']['coordinates'].str.replace('(','[').str.replace(')',']')

In [41]:
all_nodes = pd.DataFrame({'node_ID':[],'coordinates':[]})

In [42]:
for kk, vv in all_data.items():
    if kk.split('-')[-1]=='N':
        print (kk)
        id_col = [cc for cc in vv.columns if 'ID' in cc][0]
        tmp = vv[[id_col, 'coordinates']]
        tmp = tmp.rename(columns={id_col:'node_ID'})
        # print (tmp.head(5))
        all_nodes = all_nodes.append(tmp)

cities-N
coalmines-N
lng-N
oilfields-N
pipelines-N
ports-N
powerstn-N
procplant-N
railways-N
refineries-N
shipping-N
wellpads-N


**do edges_df**

In [43]:
edge_df = pd.DataFrame.from_records([{'start':e[0],'end':e[1],'z':e[2]['z']} for e in gen.G.edges(data=True)])

In [44]:
edge_df = edge_df[edge_df.start!='supersource']

In [45]:
edge_df['start_type'] = edge_df.start.str.split('_').str[0]
edge_df['end_type'] = edge_df.end.str.split('_').str[0]

In [46]:
edge_df = edge_df.merge(all_nodes.drop_duplicates(subset='node_ID'), how='left',left_on='start', right_on='node_ID').rename(columns={'node_ID':'snode_ID', 'coordinate':'start_coordinates'})

In [47]:
edge_df = edge_df.merge(all_nodes.drop_duplicates(subset='node_ID'), how='left',left_on='end', right_on='node_ID').rename(columns={'node_ID':'enode_ID', 'coordinate':'end_coordinates'})

In [48]:
edge_df = edge_df.drop(columns=['snode_ID','enode_ID'])

In [49]:
edge_df.rename(columns={'coordinates_x':'start_coordinates', 'coordinates_y':'end_coordinates'}, inplace=True)

In [50]:
edge_df['start_coordinates'] = edge_df['start_coordinates'].apply(json.loads)
edge_df['end_coordinates'] = edge_df['end_coordinates'].apply(json.loads)

In [51]:
edge_df['geometry'] = edge_df.apply(lambda row: geometry.LineString([row['start_coordinates'], row['end_coordinates']]), axis=1)

In [52]:
edge_gdf = gpd.GeoDataFrame(edge_df, geometry=edge_df['geometry'], crs={'init':'epsg:4326'})

  return _prepare_from_string(" ".join(pjargs))


In [53]:
# #def vis_connected_network(G):
# fig, ax = plt.subplots(1,1,figsize=(40,20))
# ne.plot(ax=ax, color='#e3e3e3')

# # plot edges
# edge_gdf[(edge_gdf['start_type']=='railway') & (edge_gdf['end_type']=='railway') ].plot(ax=ax, colors=colors['railways'], alpha=0.2)
# edge_gdf[(edge_gdf['start_type']=='shipping') & (edge_gdf['end_type']=='shipping') ].plot(ax=ax, colors=colors['shipping'], alpha=0.2)


# ax.set_xlim([-180,180])
# ax.set_ylim([-62,90])
# ax.axis('off')
# #fig.savefig('coal_supplynetwork.png')

# plt.show()

> A Graph showing all the Edges that are Solely Railway or Shipping Routes
 - shipping routes (#7f5636 -> brown)
 - railways (#646464 -> grey)

In [54]:
edge_df

Unnamed: 0,start,end,z,start_type,end_type,start_coordinates,end_coordinates,geometry
0,coal_mine_1920,railway_node_25.9047571-24.6667781,0,coal,railway,"[25.917000000186725, -24.666999999954218]","[25.9047571, -24.6667781]","LINESTRING (25.91700 -24.66700, 25.90476 -24.6..."
1,railway_node_25.9047571-24.6667781,railway_node_25.9047571-24.6664075,0,railway,railway,"[25.9047571, -24.6667781]","[25.9047571, -24.6664075]","LINESTRING (25.90476 -24.66678, 25.90476 -24.6..."
2,railway_node_25.9047571-24.6667781,railway_node_25.9047254-24.6670633,0,railway,railway,"[25.9047571, -24.6667781]","[25.9047254, -24.6670633]","LINESTRING (25.90476 -24.66678, 25.90473 -24.6..."
3,coal_mine_1922,railway_node_53.3958054211536635.55213928019969,0,coal,railway,"[53.394000000101364, 35.57499999971285]","[53.39580542115366, 35.55213928019969]","LINESTRING (53.39400 35.57500, 53.39581 35.55214)"
4,railway_node_53.3958054211536635.55213928019969,railway_node_53.38116831558798535.542777921444795,0,railway,railway,"[53.39580542115366, 35.55213928019969]","[53.381168315587985, 35.542777921444795]","LINESTRING (53.39581 35.55214, 53.38117 35.54278)"
...,...,...,...,...,...,...,...,...
2258601,shipping_node_34564,shipping_node_34768,1212,shipping,shipping,"[-70.69139099, 43.13928986]","[-70.55523682, 43.12379837]","LINESTRING (-70.69139 43.13929, -70.55524 43.1..."
2258602,shipping_node_34564,shipping_node_33992,508,shipping,shipping,"[-70.69139099, 43.13928986]","[-70.71349335, 43.17829895]","LINESTRING (-70.69139 43.13929, -70.71349 43.1..."
2258603,shipping_node_33992,shipping_node_34564,508,shipping,shipping,"[-70.71349335, 43.17829895]","[-70.69139099, 43.13928986]","LINESTRING (-70.71349 43.17830, -70.69139 43.1..."
2258604,shipping_node_33992,shipping_node_34768,49,shipping,shipping,"[-70.71349335, 43.17829895]","[-70.55523682, 43.12379837]","LINESTRING (-70.71349 43.17830, -70.55524 43.1..."


**do nodes df**

In [55]:
node_df = pd.DataFrame.from_records([{'node':n[0],'D':n[1]['D']} for n in gen.G.nodes(data=True)])

In [56]:
node_df = node_df.merge(all_nodes, how='left',left_on='node',right_on='node_ID')

In [57]:
node_df = node_df[node_df.node!='supersource']

In [58]:
node_df['geometry'] = node_df['coordinates'].apply(json.loads).apply(geometry.Point)

In [59]:
node_gdf = gpd.GeoDataFrame(node_df, geometry=node_df['geometry'], crs = {'init':'epsg:4326'})

  return _prepare_from_string(" ".join(pjargs))


In [60]:
node_gdf['type']=node_gdf['node'].str.split('_').str[0]

In [61]:
node_gdf

Unnamed: 0,node,D,node_ID,coordinates,geometry,type
0,coal_mine_1920,0,coal_mine_1920,"[25.917000000186725, -24.666999999954218]",POINT (25.91700 -24.66700),coal
1,railway_node_25.9047571-24.6667781,0,railway_node_25.9047571-24.6667781,"[25.9047571, -24.6667781]",POINT (25.90476 -24.66678),railway
2,coal_mine_1922,0,coal_mine_1922,"[53.394000000101364, 35.57499999971285]",POINT (53.39400 35.57500),coal
3,railway_node_53.3958054211536635.55213928019969,0,railway_node_53.3958054211536635.55213928019969,"[53.39580542115366, 35.55213928019969]",POINT (53.39581 35.55214),railway
4,coal_mine_1923,0,coal_mine_1923,"[52.98099999974161, 36.17099999993142]",POINT (52.98100 36.17100),coal
...,...,...,...,...,...,...
1057524,city_node_9970,9,city_node_9970,"[9.140014958055534, 12.218561270578235]",POINT (9.14001 12.21856),city
1057525,city_node_9981,6,city_node_9981,"[6.893405117769672, 11.846367254346195]",POINT (6.89341 11.84637),city
1057526,city_node_9989,16,city_node_9989,"[13.147584159650059, 11.766774687687745]",POINT (13.14758 11.76677),city
1057527,city_node_9990,75,city_node_9990,"[8.593431162223446, 12.00241287418524]",POINT (8.59343 12.00241),city


In [62]:
node_gdf['type'].unique()

array(['coal', 'railway', 'port', 'shipping', 'power', 'city'],
      dtype=object)

In [63]:
# #def vis_connected_network(G):
# fig, ax = plt.subplots(1,1,figsize=(40,20))
# ne.plot(ax=ax, color='#e3e3e3')
# # plot coal mines, ports, cities, power stations

# node_gdf[(node_gdf['type']=='port')].plot(ax=ax, color=colors['ports'], markersize=2)
# node_gdf[(node_gdf['type']=='coal')].plot(ax=ax, color=colors['coal_mines'])

# node_gdf[(node_gdf['type']=='city') & (node_gdf['D']>0)].plot(ax=ax, color=colors['cities'], markersize=4)
# node_gdf[(node_gdf['type']=='power') & (node_gdf['D']>0)].plot(ax=ax, color=colors['power_stations'], markersize=3)

# # plot edges
# edge_gdf[(edge_gdf['start_type']=='railway') | (edge_gdf['end_type']=='railway') ].plot(ax=ax, colors=colors['railways'], alpha=0.2)
# edge_gdf[(edge_gdf['start_type']=='shipping') | (edge_gdf['end_type']=='shipping') ].plot(ax=ax, colors=colors['shipping'], alpha=0.2)


# ax.set_xlim([-180,180])
# ax.set_ylim([-62,90])
# ax.axis('off')
# fig.savefig('coal_supplynetwork.png')

# plt.show()

> A Graph showing all the Edges that are Solely Railway or Shipping Routes, all the Nodes of Ports and Coal Mines and Nodes of Cities and Power Stations where D > 0
 - shipping routes (#7f5636 -> brown)
 - railways (#646464 -> grey)
 - ports (#7f5636 -> brown)
 - coal mines (#89163e -> burgundy)
 - cities (#96C3E4 -> sky blue)
 - power_stations (#8ac27e -> lime green)

### Force-Directed Layout

Out of memory error... loolll
... try again once having run community detection

In [64]:
edge_df['z_inv'] = 1/(edge_df['z']+1)

In [65]:
len(node_df.node.unique())

1031170

In [66]:
# get pos as dict
pos = node_df.drop_duplicates(subset='node')[['node','coordinates']].set_index('node').to_dict('index')

In [67]:
pos = {kk:json.loads(vv['coordinates']) for kk, vv in pos.items()}

In [68]:
z_inv_attr = edge_df[['start','end','z_inv']].set_index(['start','end']).to_dict(orient='index')

In [69]:
nx.set_edge_attributes(gen.G, z_inv_attr)

In [70]:
# pos = nx.spring_layout(gen.G, pos, iterations=1, weight='z_inv')
# big ole 'out of memory' error

### load flows and visualise them

In [71]:
flow_dict = pickle.load(open('./../coal_flow_dict.pkl','rb'))

In [72]:
len(flow_dict.keys())

1031171

In [73]:
flow_dict_reshape = {(i,j): flow_dict[i][j] 
                           for i in flow_dict.keys() 
                           for j in flow_dict[i].keys()}

In [74]:
flow_dict_reshape

{('coal_mine_1920', 'railway_node_25.9047571-24.6667781'): 14684,
 ('railway_node_25.9047571-24.6667781',
  'railway_node_25.9047571-24.6664075'): 14684,
 ('railway_node_25.9047571-24.6667781',
  'railway_node_25.9047254-24.6670633'): 0,
 ('coal_mine_1922', 'railway_node_53.3958054211536635.55213928019969'): 4677,
 ('railway_node_53.3958054211536635.55213928019969',
  'railway_node_53.38116831558798535.542777921444795'): 4570,
 ('railway_node_53.3958054211536635.55213928019969',
  'railway_node_53.4143905469255735.55813984128466'): 107,
 ('coal_mine_1923', 'railway_node_52.97519310345878636.17194361255421'): 301,
 ('railway_node_52.97519310345878636.17194361255421',
  'railway_node_52.9916115331686136.16469565131024'): 0,
 ('railway_node_52.97519310345878636.17194361255421',
  'railway_node_52.9547501069948936.182582800548694'): 301,
 ('coal_mine_1926', 'railway_node_-2.18300434.3093436'): 100505,
 ('railway_node_-2.18300434.3093436',
  'railway_node_-2.186293834.3087238'): 100505,
 ('

In [75]:
flow_df = pd.DataFrame.from_dict(flow_dict_reshape, orient='index')

In [76]:
flow_df['source']=flow_df.index.str[0]

In [77]:
flow_df = flow_df[~(flow_df.source=='supersource')]

In [78]:
flow_df = flow_df.rename(columns={0:'flow'})

In [79]:
flow_df

Unnamed: 0,flow,source
"(coal_mine_1920, railway_node_25.9047571-24.6667781)",14684,coal_mine_1920
"(railway_node_25.9047571-24.6667781, railway_node_25.9047571-24.6664075)",14684,railway_node_25.9047571-24.6667781
"(railway_node_25.9047571-24.6667781, railway_node_25.9047254-24.6670633)",0,railway_node_25.9047571-24.6667781
"(coal_mine_1922, railway_node_53.3958054211536635.55213928019969)",4677,coal_mine_1922
"(railway_node_53.3958054211536635.55213928019969, railway_node_53.38116831558798535.542777921444795)",4570,railway_node_53.3958054211536635.55213928019969
...,...,...
"(shipping_node_34564, shipping_node_34768)",0,shipping_node_34564
"(shipping_node_34564, shipping_node_33992)",0,shipping_node_34564
"(shipping_node_33992, shipping_node_34564)",0,shipping_node_33992
"(shipping_node_33992, shipping_node_34768)",0,shipping_node_33992


In [80]:
edge_df = edge_df.set_index(['start','end'])

In [81]:
edge_df['flow']=0

In [82]:
edge_df

Unnamed: 0_level_0,Unnamed: 1_level_0,z,start_type,end_type,start_coordinates,end_coordinates,geometry,z_inv,flow
start,end,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
coal_mine_1920,railway_node_25.9047571-24.6667781,0,coal,railway,"[25.917000000186725, -24.666999999954218]","[25.9047571, -24.6667781]","LINESTRING (25.91700 -24.66700, 25.90476 -24.6...",1.000000,0
railway_node_25.9047571-24.6667781,railway_node_25.9047571-24.6664075,0,railway,railway,"[25.9047571, -24.6667781]","[25.9047571, -24.6664075]","LINESTRING (25.90476 -24.66678, 25.90476 -24.6...",1.000000,0
railway_node_25.9047571-24.6667781,railway_node_25.9047254-24.6670633,0,railway,railway,"[25.9047571, -24.6667781]","[25.9047254, -24.6670633]","LINESTRING (25.90476 -24.66678, 25.90473 -24.6...",1.000000,0
coal_mine_1922,railway_node_53.3958054211536635.55213928019969,0,coal,railway,"[53.394000000101364, 35.57499999971285]","[53.39580542115366, 35.55213928019969]","LINESTRING (53.39400 35.57500, 53.39581 35.55214)",1.000000,0
railway_node_53.3958054211536635.55213928019969,railway_node_53.38116831558798535.542777921444795,0,railway,railway,"[53.39580542115366, 35.55213928019969]","[53.381168315587985, 35.542777921444795]","LINESTRING (53.39581 35.55214, 53.38117 35.54278)",1.000000,0
...,...,...,...,...,...,...,...,...,...
shipping_node_34564,shipping_node_34768,1212,shipping,shipping,"[-70.69139099, 43.13928986]","[-70.55523682, 43.12379837]","LINESTRING (-70.69139 43.13929, -70.55524 43.1...",0.000824,0
shipping_node_34564,shipping_node_33992,508,shipping,shipping,"[-70.69139099, 43.13928986]","[-70.71349335, 43.17829895]","LINESTRING (-70.69139 43.13929, -70.71349 43.1...",0.001965,0
shipping_node_33992,shipping_node_34564,508,shipping,shipping,"[-70.71349335, 43.17829895]","[-70.69139099, 43.13928986]","LINESTRING (-70.71349 43.17830, -70.69139 43.1...",0.001965,0
shipping_node_33992,shipping_node_34768,49,shipping,shipping,"[-70.71349335, 43.17829895]","[-70.55523682, 43.12379837]","LINESTRING (-70.71349 43.17830, -70.55524 43.1...",0.020000,0


In [83]:
edge_df.loc[flow_df.index, 'flow'] = flow_df['flow']

In [84]:
edge_df.flow.sum()

14771218517

In [85]:
edge_df = edge_df.reset_index()

In [86]:
edge_df = edge_df[edge_df['start']!='supersource']

In [87]:
edge_df

Unnamed: 0,start,end,z,start_type,end_type,start_coordinates,end_coordinates,geometry,z_inv,flow
0,coal_mine_1920,railway_node_25.9047571-24.6667781,0,coal,railway,"[25.917000000186725, -24.666999999954218]","[25.9047571, -24.6667781]","LINESTRING (25.91700 -24.66700, 25.90476 -24.6...",1.000000,14684
1,railway_node_25.9047571-24.6667781,railway_node_25.9047571-24.6664075,0,railway,railway,"[25.9047571, -24.6667781]","[25.9047571, -24.6664075]","LINESTRING (25.90476 -24.66678, 25.90476 -24.6...",1.000000,14684
2,railway_node_25.9047571-24.6667781,railway_node_25.9047254-24.6670633,0,railway,railway,"[25.9047571, -24.6667781]","[25.9047254, -24.6670633]","LINESTRING (25.90476 -24.66678, 25.90473 -24.6...",1.000000,0
3,coal_mine_1922,railway_node_53.3958054211536635.55213928019969,0,coal,railway,"[53.394000000101364, 35.57499999971285]","[53.39580542115366, 35.55213928019969]","LINESTRING (53.39400 35.57500, 53.39581 35.55214)",1.000000,4677
4,railway_node_53.3958054211536635.55213928019969,railway_node_53.38116831558798535.542777921444795,0,railway,railway,"[53.39580542115366, 35.55213928019969]","[53.381168315587985, 35.542777921444795]","LINESTRING (53.39581 35.55214, 53.38117 35.54278)",1.000000,4570
...,...,...,...,...,...,...,...,...,...,...
2258601,shipping_node_34564,shipping_node_34768,1212,shipping,shipping,"[-70.69139099, 43.13928986]","[-70.55523682, 43.12379837]","LINESTRING (-70.69139 43.13929, -70.55524 43.1...",0.000824,0
2258602,shipping_node_34564,shipping_node_33992,508,shipping,shipping,"[-70.69139099, 43.13928986]","[-70.71349335, 43.17829895]","LINESTRING (-70.69139 43.13929, -70.71349 43.1...",0.001965,0
2258603,shipping_node_33992,shipping_node_34564,508,shipping,shipping,"[-70.71349335, 43.17829895]","[-70.69139099, 43.13928986]","LINESTRING (-70.71349 43.17830, -70.69139 43.1...",0.001965,0
2258604,shipping_node_33992,shipping_node_34768,49,shipping,shipping,"[-70.71349335, 43.17829895]","[-70.55523682, 43.12379837]","LINESTRING (-70.71349 43.17830, -70.55524 43.1...",0.020000,0


### Visualise flow

In [88]:
# #def vis_connected_network(G):
# fig, ax = plt.subplots(1,1,figsize=(30,20))
# ne.plot(ax=ax, color='#e3e3e3')
# # plot coal mines, ports, cities, power stations

# node_gdf[(node_gdf['type']=='port')].plot(ax=ax, color=colors['ports'], markersize=2)
# node_gdf[(node_gdf['type']=='coal')].plot(ax=ax, color=colors['coal_mines'])

# node_gdf[(node_gdf['type']=='city') & (node_gdf['D']>0)].plot(ax=ax, color=colors['cities'], markersize=4)
# node_gdf[(node_gdf['type']=='power') & (node_gdf['D']>0)].plot(ax=ax, color=colors['power_stations'], markersize=3)

# # plot edges
# segs = []
# line_widths=[]
# line_colors = []

# for edge in edge_df[(edge_df['flow']>0) & ((edge_gdf['start_type']=='railway') | (edge_gdf['end_type']=='railway'))].iterrows():
#     segs.append(list(edge[1]['geometry'].coords))
#     line_widths.append(np.log10(edge[1]['flow']))
#     line_colors.append(colors['railways'])
    
# for edge in edge_df[(edge_df['flow']>0) & ((edge_gdf['start_type']=='shipping') | (edge_gdf['end_type']=='shipping'))].iterrows():
#     segs.append(list(edge[1]['geometry'].coords))
#     line_widths.append(np.log10(edge[1]['flow']))
#     line_colors.append(colors['shipping'])

# line_collection = LineCollection(segs, linewidths=line_widths, colors=line_colors)
    
# ax.add_collection(line_collection)
    
# ax.set_xlim([-180,180])
# ax.set_ylim([-62,90])
# ax.axis('off')
# fig.savefig('coal_flownetwork.png')

# plt.show()

> A Graph showing all the Nodes of Ports and Coal Mines and Nodes of Cities and Power Stations where D > 0 & weighting all the Edges that are Solely Railway or Shipping Routes
 - shipping routes (#7f5636 -> brown)
 - railways (#646464 -> grey)
 - ports (#7f5636 -> brown)
 - coal mines (#89163e -> burgundy)
 - cities (#96C3E4 -> sky blue)
 - power_stations (#8ac27e -> lime green)

# Node Conversion

In [48]:
node_coordinates_dict = node_df.to_dict()['coordinates']
node_coordinates_dict

{0: '[25.917000000186725, -24.666999999954218]',
 1: '[25.9047571, -24.6667781]',
 2: '[53.394000000101364, 35.57499999971285]',
 3: '[53.39580542115366, 35.55213928019969]',
 4: '[52.98099999974161, 36.17099999993142]',
 5: '[52.975193103458786, 36.17194361255421]',
 6: '[-2.159999999710697, 34.31000000012751]',
 7: '[-2.183004, 34.3093436]',
 8: '[33.72899999977782, -16.115000000096515]',
 9: '[33.7301248, -16.120079]',
 10: '[7.482999999593258, 6.433000000523869]',
 11: '[7.4893002, 6.4352075]',
 12: '[29.1670000008292, -25.817000000379867]',
 13: '[29.1655643, -25.848461]',
 14: '[30.04999999971292, -28.00000000043711]',
 15: '[30.048072, -28.0005906]',
 16: '[29.233000000221832, -25.899999999797217]',
 17: '[29.2035176, -25.883162]',
 18: '[29.599999999738106, -25.916999999739232]',
 19: '[29.6221431, -25.9400583]',
 20: '[27.500000000086608, -23.667000000114562]',
 21: '[27.548099, -23.6539232]',
 22: '[28.817000000190863, -26.26700000012224]',
 23: '[28.8272418, -26.2690014]',
 

In [49]:
coordinates_node_dict = {value:key for key, value in node_coordinates_dict.items()}
coordinates_node_dict

{'[25.917000000186725, -24.666999999954218]': 0,
 '[25.9047571, -24.6667781]': 1,
 '[53.394000000101364, 35.57499999971285]': 2,
 '[53.39580542115366, 35.55213928019969]': 3,
 '[52.98099999974161, 36.17099999993142]': 4,
 '[52.975193103458786, 36.17194361255421]': 5,
 '[-2.159999999710697, 34.31000000012751]': 6,
 '[-2.183004, 34.3093436]': 7,
 '[33.72899999977782, -16.115000000096515]': 8,
 '[33.7301248, -16.120079]': 9,
 '[7.482999999593258, 6.433000000523869]': 10,
 '[7.4893002, 6.4352075]': 11,
 '[29.1670000008292, -25.817000000379867]': 12,
 '[29.1655643, -25.848461]': 13,
 '[30.04999999971292, -28.00000000043711]': 14,
 '[30.048072, -28.0005906]': 15,
 '[29.233000000221832, -25.899999999797217]': 16,
 '[29.2035176, -25.883162]': 17,
 '[29.599999999738106, -25.916999999739232]': 18,
 '[29.6221431, -25.9400583]': 19,
 '[27.500000000086608, -23.667000000114562]': 20,
 '[27.548099, -23.6539232]': 21,
 '[28.817000000190863, -26.26700000012224]': 22,
 '[28.8272418, -26.2690014]': 23,
 

## Converting Numbered Nodes to Coordinates into a .csv

In [50]:
import csv

In [51]:
with open('node2coordinates.csv', 'w') as f:
    fieldnames = ['Node Number', 'Coordinates']
    writer = csv.DictWriter(f, fieldnames=fieldnames, delimiter='\t')
    writer.writeheader()
    data = [dict(zip(fieldnames, [k, v])) for k, v in node_coordinates_dict.items()]
    writer.writerows(data)

### Creating Edge weights from the impedance
Using formula weight = 1/(1+ln(1+z))

In [70]:
edge_df['z'].max()

581541

In [71]:
z = []

for i in range(len(edge_df['z'])):
    z.append(edge_df['z'][i])

In [72]:
len(z)

2258606

In [73]:
# number of impedances greater than size 1000

count = 0

for i in range(len(x)):
    if z[i] >= 1000:
        count += 1

print(count)

0


In [74]:
# number of impedances equal to zero

count = 0

for i in range(len(x)):
    if z[i] == 0:
        count += 1

print(count)

239338


In [75]:
edge_df

Unnamed: 0,start,end,z,start_type,end_type,start_coordinates,end_coordinates,geometry
0,coal_mine_1920,railway_node_25.9047571-24.6667781,0,coal,railway,"[25.917000000186725, -24.666999999954218]","[25.9047571, -24.6667781]",LINESTRING (25.91700000018673 -24.666999999954...
1,railway_node_25.9047571-24.6667781,railway_node_25.9047571-24.6664075,0,railway,railway,"[25.9047571, -24.6667781]","[25.9047571, -24.6664075]","LINESTRING (25.9047571 -24.6667781, 25.9047571..."
2,railway_node_25.9047571-24.6667781,railway_node_25.9047254-24.6670633,0,railway,railway,"[25.9047571, -24.6667781]","[25.9047254, -24.6670633]","LINESTRING (25.9047571 -24.6667781, 25.9047254..."
3,coal_mine_1922,railway_node_53.3958054211536635.55213928019969,0,coal,railway,"[53.394000000101364, 35.57499999971285]","[53.39580542115366, 35.55213928019969]",LINESTRING (53.39400000010136 35.5749999997128...
4,railway_node_53.3958054211536635.55213928019969,railway_node_53.38116831558798535.542777921444795,0,railway,railway,"[53.39580542115366, 35.55213928019969]","[53.381168315587985, 35.542777921444795]",LINESTRING (53.39580542115366 35.5521392801996...
...,...,...,...,...,...,...,...,...
2258601,shipping_node_34564,shipping_node_34768,1212,shipping,shipping,"[-70.69139099, 43.13928986]","[-70.55523682, 43.12379837]","LINESTRING (-70.69139099 43.13928986, -70.5552..."
2258602,shipping_node_34564,shipping_node_33992,508,shipping,shipping,"[-70.69139099, 43.13928986]","[-70.71349335, 43.17829895]","LINESTRING (-70.69139099 43.13928986, -70.7134..."
2258603,shipping_node_33992,shipping_node_34564,508,shipping,shipping,"[-70.71349335, 43.17829895]","[-70.69139099, 43.13928986]","LINESTRING (-70.71349334999999 43.17829895, -7..."
2258604,shipping_node_33992,shipping_node_34768,49,shipping,shipping,"[-70.71349335, 43.17829895]","[-70.55523682, 43.12379837]","LINESTRING (-70.71349334999999 43.17829895, -7..."


In [76]:
z_numpy = np.array(z)

In [77]:
z_numpy

array([  0,   0,   0, ..., 508,  49, 712])

In [80]:
weights = 1/(1+np.log(1+z_numpy))

In [81]:
weights

array([1.        , 1.        , 1.        , ..., 0.13826577, 0.20358211,
       0.13210945])

In [83]:
np.savetxt('graph_weights_z_coal.txt', weights, delimiter='\t')

### Coordinates to numbers

In [60]:
edge_df_new = edge_df.drop(columns=['start','end','z','start_type','end_type','geometry']).copy(deep = True)

In [61]:
edge_df_new

Unnamed: 0,start_coordinates,end_coordinates
0,"[25.917000000186725, -24.666999999954218]","[25.9047571, -24.6667781]"
1,"[25.9047571, -24.6667781]","[25.9047571, -24.6664075]"
2,"[25.9047571, -24.6667781]","[25.9047254, -24.6670633]"
3,"[53.394000000101364, 35.57499999971285]","[53.39580542115366, 35.55213928019969]"
4,"[53.39580542115366, 35.55213928019969]","[53.381168315587985, 35.542777921444795]"
...,...,...
2258601,"[-70.69139099, 43.13928986]","[-70.55523682, 43.12379837]"
2258602,"[-70.69139099, 43.13928986]","[-70.71349335, 43.17829895]"
2258603,"[-70.71349335, 43.17829895]","[-70.69139099, 43.13928986]"
2258604,"[-70.71349335, 43.17829895]","[-70.55523682, 43.12379837]"


In [None]:
len(edge_df_new)

In [None]:
import time

In [None]:
### this takes ~24 hours to run so script will run this


time_1 = time.time()

for i in range(len(edge_df_new)):
    edge_df_new['start_coordinates'][i] = coordinates_node_dict[str(edge_df_new['start_coordinates'][i])]
    edge_df_new['end_coordinates'][i] = coordinates_node_dict[str(edge_df_new['end_coordinates'][i])]

time_2 = time.time()
print(time_2 - time_1)

In [100]:
### edge_df_new.to_csv('graph_coal_uw.txt', header=False, index = False, sep='\t')

## Creating Weights using Flow Data

In [89]:
edge_df

Unnamed: 0,start,end,z,start_type,end_type,start_coordinates,end_coordinates,geometry,z_inv,flow
0,coal_mine_1920,railway_node_25.9047571-24.6667781,0,coal,railway,"[25.917000000186725, -24.666999999954218]","[25.9047571, -24.6667781]","LINESTRING (25.91700 -24.66700, 25.90476 -24.6...",1.000000,14684
1,railway_node_25.9047571-24.6667781,railway_node_25.9047571-24.6664075,0,railway,railway,"[25.9047571, -24.6667781]","[25.9047571, -24.6664075]","LINESTRING (25.90476 -24.66678, 25.90476 -24.6...",1.000000,14684
2,railway_node_25.9047571-24.6667781,railway_node_25.9047254-24.6670633,0,railway,railway,"[25.9047571, -24.6667781]","[25.9047254, -24.6670633]","LINESTRING (25.90476 -24.66678, 25.90473 -24.6...",1.000000,0
3,coal_mine_1922,railway_node_53.3958054211536635.55213928019969,0,coal,railway,"[53.394000000101364, 35.57499999971285]","[53.39580542115366, 35.55213928019969]","LINESTRING (53.39400 35.57500, 53.39581 35.55214)",1.000000,4677
4,railway_node_53.3958054211536635.55213928019969,railway_node_53.38116831558798535.542777921444795,0,railway,railway,"[53.39580542115366, 35.55213928019969]","[53.381168315587985, 35.542777921444795]","LINESTRING (53.39581 35.55214, 53.38117 35.54278)",1.000000,4570
...,...,...,...,...,...,...,...,...,...,...
2258601,shipping_node_34564,shipping_node_34768,1212,shipping,shipping,"[-70.69139099, 43.13928986]","[-70.55523682, 43.12379837]","LINESTRING (-70.69139 43.13929, -70.55524 43.1...",0.000824,0
2258602,shipping_node_34564,shipping_node_33992,508,shipping,shipping,"[-70.69139099, 43.13928986]","[-70.71349335, 43.17829895]","LINESTRING (-70.69139 43.13929, -70.71349 43.1...",0.001965,0
2258603,shipping_node_33992,shipping_node_34564,508,shipping,shipping,"[-70.71349335, 43.17829895]","[-70.69139099, 43.13928986]","LINESTRING (-70.71349 43.17830, -70.69139 43.1...",0.001965,0
2258604,shipping_node_33992,shipping_node_34768,49,shipping,shipping,"[-70.71349335, 43.17829895]","[-70.55523682, 43.12379837]","LINESTRING (-70.71349 43.17830, -70.55524 43.1...",0.020000,0


In [90]:
flow = []

for i in range(len(edge_df['flow'])):
    flow.append(edge_df['flow'][i])

In [91]:
flow

[14684,
 14684,
 0,
 4677,
 4570,
 107,
 301,
 0,
 301,
 100505,
 100505,
 1096,
 1096,
 0,
 2010,
 127,
 1883,
 0,
 0,
 0,
 0,
 0,
 0,
 27431,
 0,
 27431,
 0,
 0,
 0,
 75497,
 75497,
 0,
 0,
 0,
 18922,
 0,
 18922,
 73794,
 68118,
 0,
 5676,
 30294,
 30294,
 0,
 0,
 0,
 13095,
 13095,
 0,
 0,
 0,
 0,
 79767,
 79767,
 0,
 10018,
 0,
 10018,
 17408,
 17408,
 0,
 0,
 0,
 0,
 11012,
 0,
 11012,
 64885,
 24374,
 40511,
 0,
 0,
 0,
 0,
 0,
 2838,
 0,
 2838,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 32545,
 2271,
 30274,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 122026,
 91751,
 30275,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 19791,
 5222,
 14569,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 40511,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 430,
 430,
 0,
 0,
 0,
 0,
 102693,
 0,
 102693,
 913,
 448,
 465,
 0,
 169845,
 155309,
 14536,
 0,
 155309,
 0,
 132497,
 0,
 132497,
 3407,
 0,
 3407,
 33309,
 16114,
 17195,
 88175,
 88175,
 0,
 24409,
 24409,
 0,
 1021

In [92]:
len(flow)

2258606

In [93]:
flow_weights = np.array(flow)

In [94]:
count = 0
for i in range(len(flow)):
    if flow[i] == 0:
        count += 1
print(count)

2131444


In [95]:
np.savetxt('graph_weights_flow_coal.txt', flow_weights, delimiter='\t')

# Combine Coordinates and Weights

In [97]:
graph_coal = pd.read_csv("graph_coal_uw.txt", delimiter = '\t', header = None) 

In [98]:
graph_coal

Unnamed: 0,0,1
0,0,1
1,1,16103
2,1,16102
3,2,3
4,3,486067
...,...,...
2258601,1036560,1032223
2258602,1036560,1036562
2258603,1036562,1036560
2258604,1036562,1032223


In [99]:
graph_coal['weights'] = flow_weights ### either weights (weights from impedances) or flow_weights (weights from flow data)

In [100]:
graph_coal

Unnamed: 0,0,1,weights
0,0,1,14684
1,1,16103,14684
2,1,16102,0
3,2,3,4677
4,3,486067,4570
...,...,...,...
2258601,1036560,1032223,0
2258602,1036560,1036562,0
2258603,1036562,1036560,0
2258604,1036562,1032223,0


In [104]:
edges_to_drop = []

for i in range(len(graph_coal)):
    if graph_coal['weights'][i] == 0:
        edges_to_drop.append(i)
        

In [108]:
graph_coal = graph_coal.drop(index = edges_to_drop)

In [109]:
graph_coal

Unnamed: 0,0,1,weights
0,0,1,14684
1,1,16103,14684
3,2,3,4677
4,3,486067,4570
5,3,486066,107
...,...,...,...
2256754,1035198,1032826,651
2257259,1035555,1036125,5004
2257388,1035651,1031485,651
2257926,1036038,1035555,5004


In [111]:
graph_coal.to_csv('graph_coal_flow_weighted.txt', header=False, index = False, sep=' ')