## Country-level INFRA-SAP

This notebook exemplifies a simple market access estimation based on global datasets:

- **Global Friction Surface (Malaria Atlas Project)**  
see: https://developers.google.com/earth-engine/datasets/catalog/Oxford_MAP_friction_surface_2019


- **World Pop 1 km Population Grid**

In [1]:
import os, sys, time, importlib

import geopandas as gpd
import pandas as pd
sys.path.append('/home/wb514197/Repos/GOSTnets')

import rasterio as rio
import numpy as np
from shapely.geometry import Point

sys.path.append('/home/wb514197/Repos/INFRA_SAP')
from infrasap import aggregator
from utm_zone import epsg as epsg_get
import json

sys.path.append('/home/wb514197/Repos/gostrocks/src')
sys.path.append('/home/wb514197/Repos/GOSTNets_Raster/src')

import GOSTRocks.rasterMisc as rMisc
import GOSTNetsRaster.market_access as ma
import skimage.graph as graph

%load_ext autoreload
%autoreload 2

In [2]:
data_dir = os.path.join(os.path.expanduser("~"), 'data', 'south_sudan')

In [3]:
iso3 = "SSD"

In [4]:
out_folder = os.path.join(data_dir, "output_raster")
if not os.path.exists(out_folder):
    os.makedirs(out_folder)

### Clip friction surface and population to country extent

In [30]:
global_friction_surface = "/home/public/Data/GLOBAL/INFRA/FRICTION_2020/2020_motorized_friction_surface.geotiff"
global_population = "/home/public/Data/GLOBAL/Population/WorldPop_PPP_2020/ppp_2020_1km_Aggregated.tif"
inG = rio.open(global_friction_surface)
inP = rio.open(global_population)

# Read in country bounds
global_bounds = "/home/public/Data/GLOBAL/ADMIN/Admin0_Polys.shp"
admin1 = "/home/public/Data/GLOBAL/ADMIN/Admin1_Polys.shp"
admin2 = "/home/wb514197/data/PAK/pakistan_indicators.shp"

inB = gpd.read_file(global_bounds)
inB = inB.loc[inB['ISO3'] == iso3]
inB = inB.to_crs(inG.crs.to_string())

In [26]:
g = inB.geometry
g = g.unary_union
bb = g.bounds
bb = shapely.geometry.box(bb[0], bb[1], bb[2], bb[3])
bb = gpd.GeoDataFrame(geometry=[bb], crs='EPSG:4326')

In [33]:
# Clip the travel raster to ISO3
out_travel_surface = os.path.join(data_dir, "TRAVEL_SURFACE.tif")
rMisc.clipRaster(inG, bb, out_travel_surface)

In [None]:
# Clip the population raster to ISO3
out_pop_surface = os.path.join(data_dir, "POP_2020_NEW.tif")
rMisc.clipRaster(inP, inB, out_pop_surface)

In [35]:
out_pop_surface = os.path.join(data_dir, "POP_2020_NEW.tif")


In [36]:
travel_surf = rio.open(out_travel_surface)
pop_surf = rio.open(out_pop_surface)

In [37]:
# Make sure that both rasters have the exact same resolution, crs, and number of pixels
out_pop_surface_std = os.path.join(data_dir, "POP_2020_NEW_STD.tif")
rMisc.standardizeInputRasters(pop_surf, travel_surf, os.path.join(data_dir, "POP_2020_NEW_STD.tif"), data_type="C")

[array([[[-3.4028235e+38, -3.4028235e+38, -3.4028235e+38, ...,
          -3.4028235e+38, -3.4028235e+38, -3.4028235e+38],
         [-3.4028235e+38, -3.4028235e+38, -3.4028235e+38, ...,
          -3.4028235e+38, -3.4028235e+38, -3.4028235e+38],
         [-3.4028235e+38, -3.4028235e+38, -3.4028235e+38, ...,
          -3.4028235e+38, -3.4028235e+38, -3.4028235e+38],
         ...,
         [-3.4028235e+38, -3.4028235e+38, -3.4028235e+38, ...,
          -3.4028235e+38, -3.4028235e+38, -3.4028235e+38],
         [-3.4028235e+38, -3.4028235e+38, -3.4028235e+38, ...,
          -3.4028235e+38, -3.4028235e+38, -3.4028235e+38],
         [-3.4028235e+38, -3.4028235e+38, -3.4028235e+38, ...,
          -3.4028235e+38, -3.4028235e+38, -3.4028235e+38]]], dtype=float32),
 {'driver': 'GTiff',
  'dtype': 'float32',
  'nodata': -3.4028234663852886e+38,
  'width': 1341,
  'height': 1051,
  'count': 1,
  'crs': CRS.from_epsg(4326),
  'transform': Affine(0.008333333333333333, 0.0, 24.150000000000006,
        

### Prepare Raster

In [7]:
out_pop_surface_std = os.path.join(data_dir, "POP_2020_NEW_STD.tif")
out_travel_surface = os.path.join(data_dir, "TRAVEL_SURFACE.tif")

In [8]:
travel_surf = rio.open(out_travel_surface)

In [11]:
pop_surf = rio.open(out_pop_surface_std)
pop = pop_surf.read(1, masked=True)
indices = list(np.ndindex(pop.shape))
xys = [pop_surf.xy(ind[0], ind[1]) for ind in indices]
res_df = pd.DataFrame({
    'spatial_index': indices, 
    'xy': xys, 
    'pop': pop.flatten()
})
res_df['pointid'] = res_df.index

In [12]:
# create MCP object
inG_data = travel_surf.read(1) * 1000 #
# Correct no data values
inG_data[inG_data < 0] = 99999999
# inG_data[inG_data < 0] = np.nan
mcp = graph.MCP_Geometric(inG_data)

### Prepare destinations

In [13]:
inWater = gpd.read_file(os.path.join(data_dir, "SSd_Waterpoints_WIMS_2012.shp"))

In [14]:
inWater.head()

Unnamed: 0,ID,cleaning,Source_Typ,State,County,Payam,Boma,Village,Local_Name,Latitude,...,Pump_Type,SWL,Estimated,Total_Dept,DWL,Pump_Insta,Assessed_b,Drilled_by,Report_Dat,geometry
0,1.0,1.0,Borehole,CE,Juba,Juba,Goron,Goron 2,Goron 2,4.80375,...,,13.14,7.2,32.0,,,,,12/2/2006,POINT (31.36732 4.80375)
1,2.0,1.0,Borehole,CE,Juba,Juba,Juba,Kupuri 2 - B School,Kupuri 2 - B School,4.86687,...,,7.0,0.6,24.0,,,,,6/17/2006,POINT (31.50628 4.86687)
2,3.0,1.0,Borehole,CE,Juba,Juba,Juba,Kapuri Lemon Gaba,Kapuri 1- Lemon Gaba,4.86613,...,,10.9,0.0,26.0,,,,,6/15/2006,POINT (31.50215 4.86613)
3,4.0,1.0,Borehole,CE,Juba,Juba,Juba,Juba,Barge port,4.8312,...,,2.51,0.45,26.0,,,,,9/21/2006,POINT (31.61228 4.83120)
4,5.0,1.0,Borehole,CE,Juba,Juba,Juba,Gorom 1,Gorom 1,4.7998,...,,15.62,2.4,28.0,,,,,11/10/2006,POINT (31.36932 4.79980)


In [32]:
inWater.Status.value_counts()

operational      5836
needs repairs    1622
seasonal          139
abandoned           9
Name: Status, dtype: int64

In [31]:
len(inWater[inWater.Status != None])

9607

In [56]:
# inWater = inWater.loc[inWater.Status.isin(['operational', 'seasonal'])].copy()

In [33]:
%%time
res_all = ma.calculate_travel_time(travel_surf, mcp, inWater)[0]

CPU times: user 1.18 s, sys: 41.2 ms, total: 1.22 s
Wall time: 1.33 s


In [58]:
res_df.loc[:,'tt_water'] = res_all.flatten()

array([1.32667482e+03, 1.31323843e+03, 1.29970666e+03, ...,
       2.54554427e+02, 2.71523210e+02, 5.00002800e+07])

In [38]:
# remove values where pop is 0 or nan
res_df = res_df.loc[res_df['pop']!=0].copy()
res_df = res_df.loc[~(res_df['pop'].isna())].copy()

In [39]:
res_df.loc[:,'xy'] = res_df.loc[:,'xy'].apply(lambda x: Point(x))

In [40]:
len(res_df)

735858

Remove values where travel time was undefined

In [41]:
res_df = res_df.loc[res_df["tt_water"]<99999999]

In [42]:
len(res_df)

735858

In [43]:
origins_join = res_df.copy()

In [44]:
origins_join2 = gpd.GeoDataFrame(origins_join, geometry='xy', crs='EPSG:4326')

In [45]:
origins_join2.rename(columns={'xy':'geometry'}, inplace=True)

#### Convert to raster and save results

In [46]:
raster_path = out_pop_surface_std

In [47]:
# origins_join2.loc[:, "tt_water"] = origins_join2.loc[:, "tt_water"].apply(lambda x: x/60)

In [48]:
aggregator.rasterize_gdf(origins_join2, 'tt_water', raster_path, os.path.join(out_folder ,f"tt_water_all.tif"))