## Country-level INFRA-SAP

- Origins: Population grid (Worldpop 1 km grid)
- Destinations: Cities, airports, border crossings, and ports

Typical access analysis using GOSTnets with one adjustment:
    1. Extract different sets of destinations from OD

In [1]:
import os, sys, time, importlib

import geopandas as gpd
import pandas as pd
import networkx as nx
sys.path.append('/home/wb514197/Repos/GOSTnets')

import GOSTnets as gn
import GOSTnets.calculate_od_raw as calcOD
from GOSTnets.load_osm import *
import rasterio as rio
from osgeo import gdal
import numpy as np
from shapely.geometry import Point

from utm_zone import epsg as epsg_get
import json

# sys.path.append('/home/wb514197/Repos/INFRA_SAP')
# from infrasap import aggregator

%load_ext autoreload
%autoreload 2

In [2]:
country = 'mozambique'
iso3 = 'MOZ'

### Load origins and graph

In [3]:
# base_in = "/home/public/Data/PROJECTS/INFRA_SAP"
base_in = "/home/wb514197/data/INFRA_SAP"
in_folder = os.path.join(base_in, iso3)

# define data paths
focal_admin2 = os.path.join(in_folder, "admin.shp")
focal_osm = os.path.join(in_folder, f"{country}-latest.osm.pbf")
pop_name = "WP_2020_1km"
wp_1km = os.path.join(in_folder, f"{pop_name}.tif")
urban_extents = os.path.join(in_folder, "urban_extents.shp")
airports = os.path.join(in_folder, "airports.shp")
ports = os.path.join(in_folder, "ports.shp")
borders = os.path.join(in_folder, "borders.shp")

out_folder = os.path.join(in_folder, "output")

if not os.path.exists(out_folder):
    os.makedirs(out_folder)

Cheat library to get UTM code

In [4]:
bounds = gpd.read_file(focal_admin2)
bounds_json = json.loads(bounds.to_json())
epsg = epsg_get(bounds_json)

In [9]:
epsg

32737

Convert **WP_2020_1km.tif** into a point GeoData frame

In [6]:
inDs = gdal.Open(wp_1km)
out_pop = os.path.join(out_folder, f"{pop_name}.xyz")
outDs = gdal.Translate(out_pop, inDs, format='XYZ', creationOptions=["ADD_HEADER_LINE=YES"])
out_pop_csv = os.path.join(out_folder, f"{pop_name}.csv")
os.rename(out_pop, out_pop_csv)

In [7]:
wp_df = pd.read_csv(out_pop_csv, sep=' ')
wp_df.rename(columns={"Z":"Pop"}, inplace=True)
wp_df = wp_df.loc[wp_df.Pop!=-99999.0].copy()
geoms = [Point(xy) for xy in zip(wp_df.X, wp_df.Y)]
wp_df.drop(["X","Y"], axis=1, inplace=True)
crs = 'EPSG:4326'
origins = gpd.GeoDataFrame(wp_df, crs=crs, geometry=geoms)
origins['pointid'] = origins.index

### Prepare Graph

In [10]:
in_folder

'/home/wb514197/data/INFRA_SAP/MOZ'

In [9]:
G_path = None

In [10]:
if G_path is None:
    print("Creating graph from scratch")
    osm_raw = OSM_to_network(focal_osm)
    accepted_road_types = ['trunk','trunk_link','primary','primary_link','secondary','secondary_link','tertiary','tertiary_link']
    osm_raw.filterRoads(acceptedRoads = accepted_road_types)
    osm_raw.generateRoadsGDF(verbose = False)
    osm_raw.initialReadIn()
    
    if not os.path.exists(os.path.join(out_folder, 'graph')):
        os.mkdir(os.path.join(out_folder, 'graph'))
    gn.save(osm_raw.network,f'G_{iso3}_unclean',os.path.join(out_folder, 'graph'))
    
    #clean graph
    G_clean = gn.clean_network(osm_raw.network, UTM = f"EPSG:{epsg}", WGS = "EPSG:4326", junctdist = 50, verbose = False)
    G = G_clean.copy()

else:
    G = nx.read_gpickle(G_path)
    for u, v, data in G.edges(data = True):
        if type(data['Wkt']) == list:
            data['Wkt'] = gn.unbundle_geometry(data['Wkt'])

Creating graph from scratch


  return _prepare_from_string(" ".join(pjargs))

  juncs_gdf_unproj['centroid'] = juncs_gdf_unproj.centroid


7755
14408
2911
5642
Edge reduction: 13003 to 5642 (56 percent)


In [12]:
# Salt network
attr_list = ['id', 'infra_type', 'osm_id', 'key', 'Type']
G_salt = gn.salt_long_lines(G, source='EPSG:4326', target=f'EPSG:{epsg}', thresh=5000, factor=1000, attr_list=attr_list)
G_time = gn.convert_network_to_time(G_salt, distance_tag = 'length', road_col = 'infra_type', factor = 1000)
if not os.path.exists(os.path.join(out_folder, 'graph')):
    os.mkdir(os.path.join(out_folder, 'graph'))
gn.save(G_time, f"G_{iso3}_Salt", os.path.join(out_folder, 'graph'))



  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))


Identified 1301 unique edge(s) longer than 5000. 
Beginning new node creation...
15662 new edges added and 2609 removed to bring total edges to 18695
6530 new nodes added to bring total nodes to 8615


In [11]:
G_time = nx.read_gpickle(os.path.join(out_folder, 'graph', f'G_{iso3}_Salt.pickle'))

#### Select largest graph

In [12]:
list_of_subgraphs = [G_time.subgraph(c).copy() for c in sorted(nx.strongly_connected_components(G_time), key=len, reverse=True)]
G_largest = list_of_subgraphs[0]

### Prepare destinations

In [13]:
inCities = gpd.read_file(urban_extents)
# pop_thresh = 0
pop_thresh=50000
inCities = inCities.loc[inCities.Pop>pop_thresh]
inCities['geometry'] = inCities.geometry.centroid
dest_cities = inCities.assign(dest_type = 'city')


  """


In [14]:
dest_airports = gpd.read_file(airports)
dest_airports = dest_airports.assign(dest_type = 'airport')

In [15]:
dest_ports = gpd.read_file(ports).assign(dest_type = 'port')
dest_borders = gpd.read_file(borders).assign(dest_type = 'border')
dest_borders['geometry'] = dest_borders.geometry.apply(lambda x: x[0])

In [16]:
dest_all = pd.concat([dest_cities, dest_airports, dest_ports, dest_borders], ignore_index=True)

In [17]:
dest_all.dest_type.value_counts()

border     32
city       29
airport    12
port        4
Name: dest_type, dtype: int64

In [24]:
dest_all.to_file(os.path.join(out_folder, 'destination_all.shp'), driver="ESRI Shapefile")

In [18]:
len(origins), len(dest_all)

(959933, 77)

### Snap origins and destinations

#### Snap to origins to nearest nodes in the network

In [21]:
utm = f"EPSG:{epsg}"

In [22]:
%%time
origins_snapped = gn.pandana_snap_c(G_largest, origins, source_crs='epsg:4326', target_crs=utm, 
                                               add_dist_to_node_col = True)

CPU times: user 51 s, sys: 795 ms, total: 51.8 s
Wall time: 51.8 s


In [25]:
dest_snapped = gn.pandana_snap_c(G_largest, dest_all, source_crs='epsg:4326', target_crs=utm,
                                 add_dist_to_node_col = False)

Get unique nodes to calculate Origin Destination Matrix

In [26]:
origins_unique_nn = list(set(origins_snapped['NN']))
dest_nn = list(dest_snapped['NN'])

In [28]:
len(origins_unique_nn), len(dest_nn)

(8016, 77)

In [29]:
%%time
curOD = gn.calculate_OD(G_largest, origins_unique_nn, dest_nn, fail_value = 999999999)

CPU times: user 2.16 s, sys: 5.99 ms, total: 2.17 s
Wall time: 2.17 s


quick checks

In [30]:
curOD[curOD==999999999]

array([], dtype=float64)

In [31]:
curOD.shape

(8016, 77)

In [32]:
od_df = pd.DataFrame(curOD, index=origins_unique_nn, columns=dest_nn)

Time in seconds

In [33]:
od_df.head()

Unnamed: 0,9644,new_obj_2345,new_obj_2286,6188_816_5074,new_obj_2085,5119_778_4853,new_obj_1990_1285_7746,new_obj_1906_728_4534,new_obj_1877,5650_646_4086,...,577_108_684,577_108_684.1,577_108_684.2,577_108_684.3,new_obj_1688_1225_7461,new_obj_1688_1225_7461.1,4861,4861.1,8125,8125.1
10988_317_2043,72209.318702,65412.915703,50967.253229,117076.13277,53917.199615,24205.494439,48430.086122,51848.820812,36726.246122,22142.807923,...,99199.984859,99199.984859,99199.984859,99199.984859,96839.284859,96839.284859,26443.826869,26443.826869,111891.476434,111891.476434
5,137319.216151,138970.428907,124524.766433,67019.176817,119027.097064,97763.007644,113539.983571,116958.718261,101836.143571,93973.465601,...,49143.028906,49143.028906,49143.028906,49143.028906,46782.328905,46782.328905,74421.561461,74421.561461,177001.373883,177001.373883
1008_184_1029,27190.38258,29576.18663,76514.091468,170491.750343,21178.164695,61490.988182,25510.467016,28525.384908,31352.19,72996.877183,...,152615.602432,152615.602432,152615.602432,152615.602432,150254.902432,150254.902432,81541.635051,81541.635051,66872.540312,66872.540312
6910_941_5766,40184.070092,30929.744931,77867.649769,202653.420806,65172.613956,73197.235073,59685.500463,63104.235153,63513.860463,83363.259198,...,184777.272895,184777.272895,184777.272895,184777.272895,182416.572895,182416.572895,100601.289537,100601.289537,33064.692047,33064.692047
2041_340_2199,177843.931473,179495.144229,165049.481755,45128.470746,159551.812386,138287.722966,154064.698893,157483.433583,142360.858893,134498.180923,...,42375.740907,42375.740907,42375.740907,42375.740907,40015.040907,40015.040907,114946.276783,114946.276783,217526.089205,217526.089205


Join the OD values back to origins data frame based on NN (Nearest Node) ID

In [34]:
origins_snapped.head()

Unnamed: 0,Pop,geometry,pointid,NN,NN_dist
1226,12.027128,POINT (40.43875 -10.47458),1226,8125,8687.144305
1227,10.314653,POINT (40.44708 -10.47458),1227,8125,9291.638068
1228,8.879251,POINT (40.45542 -10.47458),1228,8125,9943.169391
1229,7.824205,POINT (40.46375 -10.47458),1229,8125,10633.096114
2500,11.924029,POINT (40.43042 -10.48292),2500,8125,7397.180644


Create an additional travel time value to account for the snapping distance. We choose a custom speed for this.

In [35]:
custom_speed = 30 # km/h

In [36]:
origins_snapped['NN_dist_seconds'] = ((origins_snapped.NN_dist / 1000) / custom_speed) * 60 * 60
origins_snapped['NN_dist_hours'] = ((origins_snapped.NN_dist / 1000) / custom_speed)

In [37]:
origins_snapped.head()

Unnamed: 0,Pop,geometry,pointid,NN,NN_dist,NN_dist_seconds,NN_dist_hours
1226,12.027128,POINT (40.43875 -10.47458),1226,8125,8687.144305,1042.457317,0.289571
1227,10.314653,POINT (40.44708 -10.47458),1227,8125,9291.638068,1114.996568,0.309721
1228,8.879251,POINT (40.45542 -10.47458),1228,8125,9943.169391,1193.180327,0.331439
1229,7.824205,POINT (40.46375 -10.47458),1229,8125,10633.096114,1275.971534,0.354437
2500,11.924029,POINT (40.43042 -10.48292),2500,8125,7397.180644,887.661677,0.246573


In [38]:
origins_join = origins_snapped.join(od_df, on='NN')

In [43]:
all(origins_join.columns[7:] == dest_snapped.NN)

True

The columns of our data frame represent the destination nearest node, let's map it back to the destination index

In [47]:
dest_snapped.head()

Unnamed: 0,ID,Pop,geometry,dest_type,Orig,Name,TotalSeats,Country Na,Airport1La,Airport1Lo,...,outflows,CNTRY_NAME,NAME,ALT1_NAME,ALT2_NAME,ETYPE,CNTRY_BORD,x,y,NN
0,26.0,353488.09375,POINT (40.50833 -13.03271),city,,,,,,,...,,,,,,,,40.508329,-13.032714,9644
1,28.0,72147.539062,POINT (39.00729 -13.13644),city,,,,,,,...,,,,,,,,39.007295,-13.136438,new_obj_2345
2,33.0,256415.25,POINT (35.25256 -13.29065),city,,,,,,,...,,,,,,,,35.252564,-13.290649,new_obj_2286
3,70.0,54299.5,POINT (34.32517 -14.42367),city,,,,,,,...,,,,,,,,34.325167,-14.423666,6188_816_5074
4,82.0,299286.53125,POINT (40.69389 -14.56434),city,,,,,,,...,,,,,,,,40.693888,-14.564339,new_obj_2085


In [44]:
origins_join_rename = origins_join.copy()
origins_join_rename.columns = pd.MultiIndex.from_arrays([['origin' for each in origins_snapped.columns]+list(dest_snapped.dest_type), origins_snapped.columns.append(dest_snapped.index)])

In [45]:
origins_join_rename.head()

Unnamed: 0_level_0,origin,origin,origin,origin,origin,origin,origin,city,city,city,...,border,border,border,border,border,border,border,border,border,border
Unnamed: 0_level_1,Pop,geometry,pointid,NN,NN_dist,NN_dist_seconds,NN_dist_hours,0,1,2,...,67,68,69,70,71,72,73,74,75,76
1226,12.027128,POINT (40.43875 -10.47458),1226,8125,8687.144305,1042.457317,0.289571,51835.439941,51189.608634,98127.513472,...,196428.642744,196428.642744,196428.642744,196428.642744,194067.942744,194067.942744,120861.15324,120861.15324,0.0,0.0
1227,10.314653,POINT (40.44708 -10.47458),1227,8125,9291.638068,1114.996568,0.309721,51835.439941,51189.608634,98127.513472,...,196428.642744,196428.642744,196428.642744,196428.642744,194067.942744,194067.942744,120861.15324,120861.15324,0.0,0.0
1228,8.879251,POINT (40.45542 -10.47458),1228,8125,9943.169391,1193.180327,0.331439,51835.439941,51189.608634,98127.513472,...,196428.642744,196428.642744,196428.642744,196428.642744,194067.942744,194067.942744,120861.15324,120861.15324,0.0,0.0
1229,7.824205,POINT (40.46375 -10.47458),1229,8125,10633.096114,1275.971534,0.354437,51835.439941,51189.608634,98127.513472,...,196428.642744,196428.642744,196428.642744,196428.642744,194067.942744,194067.942744,120861.15324,120861.15324,0.0,0.0
2500,11.924029,POINT (40.43042 -10.48292),2500,8125,7397.180644,887.661677,0.246573,51835.439941,51189.608634,98127.513472,...,196428.642744,196428.642744,196428.642744,196428.642744,194067.942744,194067.942744,120861.15324,120861.15324,0.0,0.0


Add snapping time to each column (speed of 30 km/h)

In [48]:
origins_join2 = origins_join_rename.apply(lambda x: x + origins_join_rename.origin.NN_dist_seconds if x.name[1] in dest_snapped.index else x)

In [49]:
origins_join2.head()

Unnamed: 0_level_0,origin,origin,origin,origin,origin,origin,origin,city,city,city,...,border,border,border,border,border,border,border,border,border,border
Unnamed: 0_level_1,Pop,geometry,pointid,NN,NN_dist,NN_dist_seconds,NN_dist_hours,0,1,2,...,67,68,69,70,71,72,73,74,75,76
1226,12.027128,POINT (40.43875 -10.47458),1226,8125,8687.144305,1042.457317,0.289571,52877.897257,52232.065951,99169.970789,...,197471.100061,197471.100061,197471.100061,197471.100061,195110.400061,195110.400061,121903.610557,121903.610557,1042.457317,1042.457317
1227,10.314653,POINT (40.44708 -10.47458),1227,8125,9291.638068,1114.996568,0.309721,52950.436509,52304.605202,99242.51004,...,197543.639312,197543.639312,197543.639312,197543.639312,195182.939312,195182.939312,121976.149808,121976.149808,1114.996568,1114.996568
1228,8.879251,POINT (40.45542 -10.47458),1228,8125,9943.169391,1193.180327,0.331439,53028.620268,52382.788961,99320.693799,...,197621.823071,197621.823071,197621.823071,197621.823071,195261.123071,195261.123071,122054.333567,122054.333567,1193.180327,1193.180327
1229,7.824205,POINT (40.46375 -10.47458),1229,8125,10633.096114,1275.971534,0.354437,53111.411475,52465.580168,99403.485006,...,197704.614278,197704.614278,197704.614278,197704.614278,195343.914278,195343.914278,122137.124774,122137.124774,1275.971534,1275.971534
2500,11.924029,POINT (40.43042 -10.48292),2500,8125,7397.180644,887.661677,0.246573,52723.101618,52077.270311,99015.175149,...,197316.304421,197316.304421,197316.304421,197316.304421,194955.604421,194955.604421,121748.814917,121748.814917,887.661677,887.661677


Save OD

In [49]:
origins_join2.to_csv(os.path.join(out_folder, 'OD_11_08.csv'))

In [50]:
od_cities = np.array(origins_join2['city'])

### Make rasters of min travel time to each dest

In [63]:
out_folder

'/home/wb514197/data/INFRA_SAP/MOZ/output'

In [64]:
output_path = os.path.join(out_folder, "travel_time")
if not os.path.exists(output_path):
    os.mkdir(output_path)

In [66]:
city_min = pd.DataFrame(origins_join2['city'].min(axis=1).apply(lambda x: (x/3600)), columns=["tt_city"])
ports_min = pd.DataFrame(origins_join2['port'].min(axis=1).apply(lambda x: (x/3600)), columns=["tt_port"])
airports_min = pd.DataFrame(origins_join2['airport'].min(axis=1).apply(lambda x: (x/3600)), columns=["tt_airport"])
borders_min = pd.DataFrame(origins_join2['border'].min(axis=1).apply(lambda x: (x/3600)), columns=["tt_border"])

In [73]:
origins_tt = origins_snapped.join([city_min, airports_min, ports_min, borders_min])

In [68]:
output_path

'/home/wb514197/data/INFRA_SAP/MOZ/output/travel_time'

In [None]:
aggregator.rasterize_gdf(origins_tt, 'tt_city', raster_path, os.path.join(output_path,f"cities_min_tt.tif"))
aggregator.rasterize_gdf(origins_tt, 'tt_port', raster_path, os.path.join(output_path,f"port_min_tt.tif"))
aggregator.rasterize_gdf(origins_tt, 'tt_airport', raster_path, os.path.join(output_path,f"airport_min_tt.tif"))
aggregator.rasterize_gdf(origins_tt, 'tt_border', raster_path, os.path.join(output_path,f"borders_min_tt.tif"))