In [1]:
# system packages
import sys
import time
import warnings
import os

# non-geo numeric packages
import numpy as np
import math
from itertools import product, combinations
import pandas as pd

# network and OSM packages
import networkx as nx
import osmnx as ox
city_geo = ox.geocoder.geocode_to_gdf

# Earth engine packages
import ee
import geemap

# General geo-packages
import libpysal
import rasterio
import geopandas as gpd
import shapely
from shapely import geometry
from shapely.geometry import Point, MultiLineString, LineString, Polygon, MultiPolygon

In [2]:
# Authenticate and Initialize Google Earth Engine
ee.Authenticate()
ee.Initialize()

Enter verification code: 4/1AbUR2VNHdBMb5WnD-iCCuJknJxVQknFuYm5GFjzJ5UyAWaU3grycF-9Pakg

Successfully saved authorization token.


In [12]:
%%time
# Thresholds and cities
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

thresholds = [300, 600, 1000] # route threshold in metres. WHO guideline speaks of access within 300m

# Extract cities list
iso = pd.read_excel('iso_countries.xlsx')
cities = pd.read_excel('cities.xlsx')
cities_adj = cities[cities['City'].isin(['Zibo'])]
cities_adj = cities_adj.reset_index()

# 1. Required preprocess for information extraction
warnings.filterwarnings('ignore')

# Predifine in Excel: the (1) city name as "City" and (2) the OSM area that needs to be extracted as "OSM_area"
# i.e. City = "Los Angeles" and OSM_area = "Los Angeles county, Orange county CA"
files = gee_worldpop_extract(cities_adj,iso,'D:/Dumps/GEE_city_grids/')

# Files are downloaded automatically to the specified path. Files are also stored in Google with a downloadlink:

# 2. Information extraction

# Get road networks
road_networks = road_network(cities_adj, # Get 'all' (drive,walk,bike) network
                              thresholds,
                              undirected = True)
print(' ')
# Extract urban greenspace (UGS)
UGS = urban_greenspace(cities_adj, 
                       thresholds,
                       one_UGS_buf = 25, # buffer at which UGS is seen as one
                       min_UGS_size = 400) # WHO sees this as minimum UGS size (400m2)

print(' ')
# Clip cities from countries, format population grids
population_grids = city_grids_format(files,
                                     cities_adj['OSM_area'],
                                     road_networks['nodes'],
                                     UGS,
                                     grid_size = 100) # aggregating upwards to i.e. 200m, 300m etc. is possible
print('')
# Get fake entry points (between UGS and buffer limits)
UGS_entry = UGS_fake_entry(UGS, 
                           road_networks['nodes'], 
                           road_networks['graphs'],
                           cities_adj['City'],
                           population_grids,
                           thresholds,
                           UGS_entry_buf = 25, # road nodes within 25 meters are seen as fake entry points
                           walk_radius = 500, # assume that the average person only views a UGS up to 500m in radius
                                                # more attractive
                           entry_point_merge = 0) # merges closeby fake UGS entry points within X meters 
                                                    # what may be done for performance
print('')
suitible_enh = suitible_enhanced(UGS_entry, 
                                 population_grids, 
                                 road_networks['nodes'], 
                                 cities_adj['City'], 
                                 thresholds)
print('')
subgraphs = obtaining_subgraphs(road_networks['graphs'],
                                population_grids,
                                UGS_entry,
                                road_networks['nodes'],
                                cities_adj['City'],
                                thresholds)
print('')
Dir_Routes = direct_routing (suitible_enh,
                             subgraphs['graphs'],
                             road_networks['edges'],
                             cities_adj['City'],
                            time_sleep = 30)

print('')
grid_scores = grid_score_summary (Dir_Routes, # Shortest routes by the Dijkstra algorithm, with gravity variant distance adj.
                                  cities_adj['City'], 
                                  population_grids, 
                                  ext = '_Zibo', # At multiple runs, the extention prevents the summarized file to be overwritten.
                                  save_path = 'D:/Dumps/GEE-WP Scores/Gravity_adj/',
                                  grid_size = 100) # Size of the grid in meters
grid_scores

['China']
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1alpha/projects/earthengine-legacy/thumbnails/747b1673c81bd682790926900b1b8d50-cf06eb95fab2260ad60ca4e9e3ce799e:getPixels
Please wait ...
Data downloaded to D:\Dumps\GEE_city_grids\CHN_Zibo_2020.tif
get road networks from OSM
Zibo done 0.11 mns
 
get urban greenspaces from OSM
Zibo done
 
100m resolution grids extraction
Zibo 0.65 mns

Zibo
0.0 % fake entry points done 0.0  mns
Zibo 100 % fake entry points done 0.06  mns
Zibo 100 % done 0.07  mns

get (Euclidean) suitible combinations
0.0 % 0.0 mns
100 % finding combinations done
Zibo 185966 suitible combinations

obtain local graphs
Zibo
0.0 % done 0.03 mns
100 % done 0.05 mns

Zibo
0.0 % 0.12 mns
48.46 % 0.69 mns
96.93 % 1.2 mns
100 % done 1.7 mns

Zibo
entrance 0.04 mns
grid  300
grid  600
grid  1000
gravity**(1/2) 1.04 mns
grid  300
grid  600
grid  1000
gravity**(1/3) 2.17 mns
grid  300
grid  600
grid  1000
gravity**(1/5) 3.18 mns
grid  300
grid 

Unnamed: 0,City,Zibo
entrance_300,1 high,0.061045
entrance_300,2 medium,0.010304
entrance_300,3 low,0.028739
entrance_300,4 no,0.899912
entrance_600,1 high,0.070557
entrance_600,2 medium,0.035164
entrance_600,3 low,0.067924
entrance_600,4 no,0.826355
entrance_1000,1 high,0.099716
entrance_1000,2 medium,0.061056


In [13]:
%%time
# Thresholds and cities
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

thresholds = [300, 600, 1000] # route threshold in metres. WHO guideline speaks of access within 300m

# Extract cities list
iso = pd.read_excel('iso_countries.xlsx')
cities = pd.read_excel('cities.xlsx')
cities_adj = cities[cities['City'].isin(['Indore'])]
cities_adj = cities_adj.reset_index()

# 1. Required preprocess for information extraction
warnings.filterwarnings('ignore')

# Predifine in Excel: the (1) city name as "City" and (2) the OSM area that needs to be extracted as "OSM_area"
# i.e. City = "Los Angeles" and OSM_area = "Los Angeles county, Orange county CA"
files = gee_worldpop_extract(cities_adj,iso,'D:/Dumps/GEE_city_grids/')

# Files are downloaded automatically to the specified path. Files are also stored in Google with a downloadlink:

# 2. Information extraction

# Get road networks
road_networks = road_network(cities_adj, # Get 'all' (drive,walk,bike) network
                              thresholds,
                              undirected = True)
print(' ')
# Extract urban greenspace (UGS)
UGS = urban_greenspace(cities_adj, 
                       thresholds,
                       one_UGS_buf = 25, # buffer at which UGS is seen as one
                       min_UGS_size = 400) # WHO sees this as minimum UGS size (400m2)

print(' ')
# Clip cities from countries, format population grids
population_grids = city_grids_format(files,
                                     cities_adj['OSM_area'],
                                     road_networks['nodes'],
                                     UGS,
                                     grid_size = 100) # aggregating upwards to i.e. 200m, 300m etc. is possible
print('')
# Get fake entry points (between UGS and buffer limits)
UGS_entry = UGS_fake_entry(UGS, 
                           road_networks['nodes'], 
                           road_networks['graphs'],
                           cities_adj['City'],
                           population_grids,
                           thresholds,
                           UGS_entry_buf = 25, # road nodes within 25 meters are seen as fake entry points
                           walk_radius = 500, # assume that the average person only views a UGS up to 500m in radius
                                                # more attractive
                           entry_point_merge = 0) # merges closeby fake UGS entry points within X meters 
                                                    # what may be done for performance
print('')
suitible_enh = suitible_enhanced(UGS_entry, 
                                 population_grids, 
                                 road_networks['nodes'], 
                                 cities_adj['City'], 
                                 thresholds)
print('')
subgraphs = obtaining_subgraphs(road_networks['graphs'],
                                population_grids,
                                UGS_entry,
                                road_networks['nodes'],
                                cities_adj['City'],
                                thresholds)
print('')
Dir_Routes = direct_routing (suitible_enh,
                             subgraphs['graphs'],
                             road_networks['edges'],
                             cities_adj['City'],
                            time_sleep = 30)

print('')
grid_scores = grid_score_summary (Dir_Routes, # Shortest routes by the Dijkstra algorithm, with gravity variant distance adj.
                                  cities_adj['City'], 
                                  population_grids, 
                                  ext = '_Indore', # At multiple runs, the extention prevents the summarized file to be overwritten.
                                  save_path = 'D:/Dumps/GEE-WP Scores/Gravity_adj/',
                                  grid_size = 100) # Size of the grid in meters
grid_scores

['India']
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1alpha/projects/earthengine-legacy/thumbnails/6c681418fb83e1a2bfd116d8664a4eaf-d39ade2fd44e70c79976ee87c8eeb3ae:getPixels
Please wait ...
Data downloaded to D:\Dumps\GEE_city_grids\IND_Indore_2020.tif
get road networks from OSM
Indore done 1.32 mns
 
get urban greenspaces from OSM
Indore done
 
100m resolution grids extraction
Indore 1.57 mns

Indore
0.0 % fake entry points done 0.0  mns
19.3 % fake entry points done 0.39  mns
38.5 % fake entry points done 0.76  mns
57.8 % fake entry points done 1.12  mns
77.1 % fake entry points done 1.49  mns
96.3 % fake entry points done 1.87  mns
Indore 100 % fake entry points done 1.94  mns
Indore 100 % done 1.95  mns

get (Euclidean) suitible combinations
0.0 % 0.0 mns
33.88 % 0.6 mns
67.75 % 1.24 mns
100 % finding combinations done
Indore 280376 suitible combinations

obtain local graphs
Indore
0.0 % done 0.9 mns
33.88 % done 0.95 mns
67.75 % done 1.51 mns
100

Unnamed: 0,City,Indore
entrance_300,1 high,0.083574
entrance_300,2 medium,0.034204
entrance_300,3 low,0.089766
entrance_300,4 no,0.792456
entrance_600,1 high,0.175276
entrance_600,2 medium,0.081642
entrance_600,3 low,0.144365
entrance_600,4 no,0.598717
entrance_1000,1 high,0.305662
entrance_1000,2 medium,0.099871


In [14]:
%%time
# Thresholds and cities
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

thresholds = [300, 600, 1000] # route threshold in metres. WHO guideline speaks of access within 300m

# Extract cities list
iso = pd.read_excel('iso_countries.xlsx')
cities = pd.read_excel('cities.xlsx')
cities_adj = cities[cities['City'].isin(['Guatemala City'])]
cities_adj = cities_adj.reset_index()

# 1. Required preprocess for information extraction
warnings.filterwarnings('ignore')

# Predifine in Excel: the (1) city name as "City" and (2) the OSM area that needs to be extracted as "OSM_area"
# i.e. City = "Los Angeles" and OSM_area = "Los Angeles county, Orange county CA"
files = gee_worldpop_extract(cities_adj,iso,'D:/Dumps/GEE_city_grids/')

# Files are downloaded automatically to the specified path. Files are also stored in Google with a downloadlink:

# 2. Information extraction

# Get road networks
road_networks = road_network(cities_adj, # Get 'all' (drive,walk,bike) network
                              thresholds,
                              undirected = True)
print(' ')
# Extract urban greenspace (UGS)
UGS = urban_greenspace(cities_adj, 
                       thresholds,
                       one_UGS_buf = 25, # buffer at which UGS is seen as one
                       min_UGS_size = 400) # WHO sees this as minimum UGS size (400m2)

print(' ')
# Clip cities from countries, format population grids
population_grids = city_grids_format(files,
                                     cities_adj['OSM_area'],
                                     road_networks['nodes'],
                                     UGS,
                                     grid_size = 100) # aggregating upwards to i.e. 200m, 300m etc. is possible
print('')
# Get fake entry points (between UGS and buffer limits)
UGS_entry = UGS_fake_entry(UGS, 
                           road_networks['nodes'], 
                           road_networks['graphs'],
                           cities_adj['City'],
                           population_grids,
                           thresholds,
                           UGS_entry_buf = 25, # road nodes within 25 meters are seen as fake entry points
                           walk_radius = 500, # assume that the average person only views a UGS up to 500m in radius
                                                # more attractive
                           entry_point_merge = 0) # merges closeby fake UGS entry points within X meters 
                                                    # what may be done for performance
print('')
suitible_enh = suitible_enhanced(UGS_entry, 
                                 population_grids, 
                                 road_networks['nodes'], 
                                 cities_adj['City'], 
                                 thresholds)
print('')
subgraphs = obtaining_subgraphs(road_networks['graphs'],
                                population_grids,
                                UGS_entry,
                                road_networks['nodes'],
                                cities_adj['City'],
                                thresholds)
print('')
Dir_Routes = direct_routing (suitible_enh,
                             subgraphs['graphs'],
                             road_networks['edges'],
                             cities_adj['City'],
                            time_sleep = 30)

print('')
grid_scores = grid_score_summary (Dir_Routes, # Shortest routes by the Dijkstra algorithm, with gravity variant distance adj.
                                  cities_adj['City'], 
                                  population_grids, 
                                  ext = '_Guatemala City', # At multiple runs, the extention prevents the summarized file to be overwritten.
                                  save_path = 'D:/Dumps/GEE-WP Scores/Gravity_adj/',
                                  grid_size = 100) # Size of the grid in meters
grid_scores

['Guatemala']
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1alpha/projects/earthengine-legacy/thumbnails/0e4df133f539bd506383073a7f23eee0-ab4e233e7091b25445264bad507663a0:getPixels
Please wait ...
Data downloaded to D:\Dumps\GEE_city_grids\GTM_Guatemala City_2020.tif
get road networks from OSM
Guatemala City done 1.25 mns
 
get urban greenspaces from OSM
Guatemala City done
 
100m resolution grids extraction
Guatemala City 1.88 mns

Guatemala City
0.0 % fake entry points done 0.0  mns
11.5 % fake entry points done 0.43  mns
23.0 % fake entry points done 0.84  mns
34.5 % fake entry points done 1.27  mns
46.0 % fake entry points done 1.72  mns
57.5 % fake entry points done 2.18  mns
69.0 % fake entry points done 2.66  mns
80.6 % fake entry points done 3.07  mns
92.1 % fake entry points done 3.5  mns
Guatemala City 100 % fake entry points done 3.8  mns
Guatemala City 100 % done 3.82  mns

get (Euclidean) suitible combinations
0.0 % 0.0 mns
31.85 % 0.35 mns


Unnamed: 0,City,Guatemala City
entrance_300,1 high,0.115183
entrance_300,2 medium,0.003307
entrance_300,3 low,0.026636
entrance_300,4 no,0.854874
entrance_600,1 high,0.126035
entrance_600,2 medium,0.031104
entrance_600,3 low,0.07058
entrance_600,4 no,0.772281
entrance_1000,1 high,0.149469
entrance_1000,2 medium,0.063332


In [15]:
%%time
# Thresholds and cities
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

thresholds = [300, 600, 1000] # route threshold in metres. WHO guideline speaks of access within 300m

# Extract cities list
iso = pd.read_excel('iso_countries.xlsx')
cities = pd.read_excel('cities.xlsx')
cities_adj = cities[cities['City'].isin(['Lagos'])]
cities_adj = cities_adj.reset_index()

# 1. Required preprocess for information extraction
warnings.filterwarnings('ignore')

# Predifine in Excel: the (1) city name as "City" and (2) the OSM area that needs to be extracted as "OSM_area"
# i.e. City = "Los Angeles" and OSM_area = "Los Angeles county, Orange county CA"
files = gee_worldpop_extract(cities_adj,iso,'D:/Dumps/GEE_city_grids/')

# Files are downloaded automatically to the specified path. Files are also stored in Google with a downloadlink:

# 2. Information extraction

# Get road networks
road_networks = road_network(cities_adj, # Get 'all' (drive,walk,bike) network
                              thresholds,
                              undirected = True)
print(' ')
# Extract urban greenspace (UGS)
UGS = urban_greenspace(cities_adj, 
                       thresholds,
                       one_UGS_buf = 25, # buffer at which UGS is seen as one
                       min_UGS_size = 400) # WHO sees this as minimum UGS size (400m2)

print(' ')
# Clip cities from countries, format population grids
population_grids = city_grids_format(files,
                                     cities_adj['OSM_area'],
                                     road_networks['nodes'],
                                     UGS,
                                     grid_size = 100) # aggregating upwards to i.e. 200m, 300m etc. is possible
print('')
# Get fake entry points (between UGS and buffer limits)
UGS_entry = UGS_fake_entry(UGS, 
                           road_networks['nodes'], 
                           road_networks['graphs'],
                           cities_adj['City'],
                           population_grids,
                           thresholds,
                           UGS_entry_buf = 25, # road nodes within 25 meters are seen as fake entry points
                           walk_radius = 500, # assume that the average person only views a UGS up to 500m in radius
                                                # more attractive
                           entry_point_merge = 0) # merges closeby fake UGS entry points within X meters 
                                                    # what may be done for performance
print('')
suitible_enh = suitible_enhanced(UGS_entry, 
                                 population_grids, 
                                 road_networks['nodes'], 
                                 cities_adj['City'], 
                                 thresholds)
print('')
subgraphs = obtaining_subgraphs(road_networks['graphs'],
                                population_grids,
                                UGS_entry,
                                road_networks['nodes'],
                                cities_adj['City'],
                                thresholds)
print('')
Dir_Routes = direct_routing (suitible_enh,
                             subgraphs['graphs'],
                             road_networks['edges'],
                             cities_adj['City'],
                            time_sleep = 30)

print('')
grid_scores = grid_score_summary (Dir_Routes, # Shortest routes by the Dijkstra algorithm, with gravity variant distance adj.
                                  cities_adj['City'], 
                                  population_grids, 
                                  ext = '_Lagos', # At multiple runs, the extention prevents the summarized file to be overwritten.
                                  save_path = 'D:/Dumps/GEE-WP Scores/Gravity_adj/',
                                  grid_size = 100) # Size of the grid in meters
grid_scores

['Nigeria']
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1alpha/projects/earthengine-legacy/thumbnails/9dd5fa1c7e53874dedb5b233adbd80ae-a2c6dceb08a2300a65a81a799020bb10:getPixels
Please wait ...
Data downloaded to D:\Dumps\GEE_city_grids\NGA_Lagos_2020.tif
get road networks from OSM
Lagos done 3.41 mns
 
get urban greenspaces from OSM
Lagos done
 
100m resolution grids extraction
Lagos 3.94 mns

Lagos
0.0 % fake entry points done 0.0  mns
90.9 % fake entry points done 0.93  mns
Lagos 100 % fake entry points done 1.07  mns
Lagos 100 % done 1.07  mns

get (Euclidean) suitible combinations
0.0 % 0.0 mns
91.24 % 0.98 mns
100 % finding combinations done
Lagos 281939 suitible combinations

obtain local graphs
Lagos
0.0 % done 0.55 mns
91.24 % done 0.6 mns
100 % done 1.11 mns

Lagos
0.0 % 0.07 mns
14.55 % 0.72 mns
29.1 % 1.35 mns
43.66 % 1.99 mns
58.21 % 2.57 mns
72.76 % 3.16 mns
87.31 % 3.75 mns
100 % done 4.25 mns

Lagos
entrance 0.06 mns
grid  300
grid  600


Unnamed: 0,City,Lagos
entrance_300,1 high,0.019116
entrance_300,2 medium,0.001731
entrance_300,3 low,0.005822
entrance_300,4 no,0.973331
entrance_600,1 high,0.019403
entrance_600,2 medium,0.007535
entrance_600,3 low,0.022363
entrance_600,4 no,0.950699
entrance_1000,1 high,0.021123
entrance_1000,2 medium,0.020488


In [16]:
%%time
# Thresholds and cities
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

thresholds = [300, 600, 1000] # route threshold in metres. WHO guideline speaks of access within 300m

# Extract cities list
iso = pd.read_excel('iso_countries.xlsx')
cities = pd.read_excel('cities.xlsx')
cities_adj = cities[cities['City'].isin(['Abidjan'])]
cities_adj = cities_adj.reset_index()

# 1. Required preprocess for information extraction
warnings.filterwarnings('ignore')

# Predifine in Excel: the (1) city name as "City" and (2) the OSM area that needs to be extracted as "OSM_area"
# i.e. City = "Los Angeles" and OSM_area = "Los Angeles county, Orange county CA"
files = gee_worldpop_extract(cities_adj,iso,'D:/Dumps/GEE_city_grids/')

# Files are downloaded automatically to the specified path. Files are also stored in Google with a downloadlink:

# 2. Information extraction

# Get road networks
road_networks = road_network(cities_adj, # Get 'all' (drive,walk,bike) network
                              thresholds,
                              undirected = True)
print(' ')
# Extract urban greenspace (UGS)
UGS = urban_greenspace(cities_adj, 
                       thresholds,
                       one_UGS_buf = 25, # buffer at which UGS is seen as one
                       min_UGS_size = 400) # WHO sees this as minimum UGS size (400m2)

print(' ')
# Clip cities from countries, format population grids
population_grids = city_grids_format(files,
                                     cities_adj['OSM_area'],
                                     road_networks['nodes'],
                                     UGS,
                                     grid_size = 100) # aggregating upwards to i.e. 200m, 300m etc. is possible
print('')
# Get fake entry points (between UGS and buffer limits)
UGS_entry = UGS_fake_entry(UGS, 
                           road_networks['nodes'], 
                           road_networks['graphs'],
                           cities_adj['City'],
                           population_grids,
                           thresholds,
                           UGS_entry_buf = 25, # road nodes within 25 meters are seen as fake entry points
                           walk_radius = 500, # assume that the average person only views a UGS up to 500m in radius
                                                # more attractive
                           entry_point_merge = 0) # merges closeby fake UGS entry points within X meters 
                                                    # what may be done for performance
print('')
suitible_enh = suitible_enhanced(UGS_entry, 
                                 population_grids, 
                                 road_networks['nodes'], 
                                 cities_adj['City'], 
                                 thresholds)
print('')
subgraphs = obtaining_subgraphs(road_networks['graphs'],
                                population_grids,
                                UGS_entry,
                                road_networks['nodes'],
                                cities_adj['City'],
                                thresholds)
print('')
Dir_Routes = direct_routing (suitible_enh,
                             subgraphs['graphs'],
                             road_networks['edges'],
                             cities_adj['City'],
                            time_sleep = 30)

print('')
grid_scores = grid_score_summary (Dir_Routes, # Shortest routes by the Dijkstra algorithm, with gravity variant distance adj.
                                  cities_adj['City'], 
                                  population_grids, 
                                  ext = '_Abidjan', # At multiple runs, the extention prevents the summarized file to be overwritten.
                                  save_path = 'D:/Dumps/GEE-WP Scores/Gravity_adj/',
                                  grid_size = 100) # Size of the grid in meters
grid_scores

["Côte d'Ivoire"]
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1alpha/projects/earthengine-legacy/thumbnails/133236acd52df493cbb311880356fb52-9d9f1f1d949ea30476fc728970d3891a:getPixels
Please wait ...
Data downloaded to D:\Dumps\GEE_city_grids\CIV_Abidjan_2020.tif
get road networks from OSM
Abidjan done 2.32 mns
 
get urban greenspaces from OSM
Abidjan done
 
100m resolution grids extraction
Abidjan 2.97 mns

Abidjan
0.0 % fake entry points done 0.01  mns
32.4 % fake entry points done 0.58  mns
64.7 % fake entry points done 1.2  mns
97.1 % fake entry points done 1.92  mns
Abidjan 100 % fake entry points done 2.0  mns
Abidjan 100 % done 2.03  mns

get (Euclidean) suitible combinations
0.0 % 0.0 mns
36.66 % 0.58 mns
73.31 % 1.22 mns
100 % finding combinations done
Abidjan 1004661 suitible combinations

obtain local graphs
Abidjan
0.0 % done 0.9 mns
36.66 % done 1.0 mns
73.31 % done 1.67 mns
100 % done 2.3 mns

Abidjan
0.0 % 0.15 mns
4.7 % 0.78 mns
9.39 % 1

Unnamed: 0,City,Abidjan
entrance_300,1 high,0.072288
entrance_300,2 medium,0.014692
entrance_300,3 low,0.04327
entrance_300,4 no,0.869749
entrance_600,1 high,0.082141
entrance_600,2 medium,0.056726
entrance_600,3 low,0.132901
entrance_600,4 no,0.728232
entrance_1000,1 high,0.126597
entrance_1000,2 medium,0.132357


In [12]:
%%time
# Thresholds and cities
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

thresholds = [300, 600, 1000] # route threshold in metres. WHO guideline speaks of access within 300m

# Extract cities list
iso = pd.read_excel('iso_countries.xlsx')
cities = pd.read_excel('cities.xlsx')
cities_adj = cities[cities['City'].isin(['Johannesburg'])]
cities_adj = cities_adj.reset_index()

# 1. Required preprocess for information extraction
warnings.filterwarnings('ignore')

# Predifine in Excel: the (1) city name as "City" and (2) the OSM area that needs to be extracted as "OSM_area"
# i.e. City = "Los Angeles" and OSM_area = "Los Angeles county, Orange county CA"
files = gee_worldpop_extract(cities_adj,iso,'D:/Dumps/GEE_city_grids/')

# Files are downloaded automatically to the specified path. Files are also stored in Google with a downloadlink:

# 2. Information extraction

# Get road networks
road_networks = road_network(cities_adj, # Get 'all' (drive,walk,bike) network
                              thresholds,
                              undirected = True)
print(' ')
# Extract urban greenspace (UGS)
UGS = urban_greenspace(cities_adj, 
                       thresholds,
                       one_UGS_buf = 25, # buffer at which UGS is seen as one
                       min_UGS_size = 400) # WHO sees this as minimum UGS size (400m2)

print(' ')
# Clip cities from countries, format population grids
population_grids = city_grids_format(files,
                                     cities_adj['OSM_area'],
                                     road_networks['nodes'],
                                     UGS,
                                     grid_size = 100) # aggregating upwards to i.e. 200m, 300m etc. is possible
print('')
# Get fake entry points (between UGS and buffer limits)
UGS_entry = UGS_fake_entry(UGS, 
                           road_networks['nodes'], 
                           road_networks['graphs'],
                           cities_adj['City'],
                           population_grids,
                           thresholds,
                           UGS_entry_buf = 25, # road nodes within 25 meters are seen as fake entry points
                           walk_radius = 500, # assume that the average person only views a UGS up to 500m in radius
                                                # more attractive
                           entry_point_merge = 0) # merges closeby fake UGS entry points within X meters 
                                                    # what may be done for performance
print('')
suitible_enh = suitible_enhanced(UGS_entry, 
                                 population_grids, 
                                 road_networks['nodes'], 
                                 cities_adj['City'], 
                                 thresholds)
print('')
subgraphs = obtaining_subgraphs(road_networks['graphs'],
                                population_grids,
                                UGS_entry,
                                road_networks['nodes'],
                                cities_adj['City'],
                                thresholds)
print('')
Dir_Routes = direct_routing (suitible_enh,
                             subgraphs['graphs'],
                             road_networks['edges'],
                             cities_adj['City'],
                            time_sleep = 30)

print('')
grid_scores = grid_score_summary (Dir_Routes, # Shortest routes by the Dijkstra algorithm, with gravity variant distance adj.
                                  cities_adj['City'], 
                                  population_grids, 
                                  ext = '_Johannesburg', # At multiple runs, the extention prevents the summarized file to be overwritten.
                                  save_path = 'D:/Dumps/GEE-WP Scores/Gravity_adj/',
                                  grid_size = 100) # Size of the grid in meters
grid_scores

['South Africa']
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1alpha/projects/earthengine-legacy/thumbnails/8b604efbbcc47906bd0ec1f825dd5a07-94db0194128512b4409e41d829b10f5c:getPixels
Please wait ...
Data downloaded to D:\Dumps\GEE_city_grids\ZAF_Johannesburg_2020.tif
get road networks from OSM
Johannesburg done 4.98 mns
 
get urban greenspaces from OSM
Johannesburg done
 
100m resolution grids extraction
Johannesburg 9.64 mns

Johannesburg
0.0 % fake entry points done 0.02  mns
15.1 % fake entry points done 1.64  mns
30.3 % fake entry points done 3.12  mns
45.4 % fake entry points done 4.59  mns
60.5 % fake entry points done 6.05  mns
75.6 % fake entry points done 7.61  mns
90.8 % fake entry points done 9.16  mns
Johannesburg 100 % fake entry points done 10.14  mns
Johannesburg 100 % done 10.18  mns

get (Euclidean) suitible combinations
0.0 % 0.0 mns
11.62 % 1.37 mns
23.25 % 2.78 mns
34.87 % 4.27 mns
46.49 % 5.79 mns
58.11 % 7.35 mns
69.74 % 8.93 mns
8

Unnamed: 0,City,Johannesburg
entrance_300,1 high,0.023651
entrance_300,2 medium,0.011473
entrance_300,3 low,0.029957
entrance_300,4 no,0.934919
entrance_600,1 high,0.033872
entrance_600,2 medium,0.039216
entrance_600,3 low,0.076822
entrance_600,4 no,0.85009
entrance_1000,1 high,0.070556
entrance_1000,2 medium,0.071597


In [None]:
%%time
# Thresholds and cities
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

thresholds = [300, 600, 1000] # route threshold in metres. WHO guideline speaks of access within 300m

# Extract cities list
iso = pd.read_excel('iso_countries.xlsx')
cities = pd.read_excel('cities.xlsx')
cities_adj = cities[cities['City'].isin(['Bangalore'])]
cities_adj = cities_adj.reset_index()

# 1. Required preprocess for information extraction
warnings.filterwarnings('ignore')

# Predifine in Excel: the (1) city name as "City" and (2) the OSM area that needs to be extracted as "OSM_area"
# i.e. City = "Los Angeles" and OSM_area = "Los Angeles county, Orange county CA"
files = gee_worldpop_extract(cities_adj,iso,'D:/Dumps/GEE_city_grids/')

# Files are downloaded automatically to the specified path. Files are also stored in Google with a downloadlink:

# 2. Information extraction

# Get road networks
road_networks = road_network(cities_adj, # Get 'all' (drive,walk,bike) network
                              thresholds,
                              undirected = True)
print(' ')
# Extract urban greenspace (UGS)
UGS = urban_greenspace(cities_adj, 
                       thresholds,
                       one_UGS_buf = 25, # buffer at which UGS is seen as one
                       min_UGS_size = 400) # WHO sees this as minimum UGS size (400m2)

print(' ')
# Clip cities from countries, format population grids
population_grids = city_grids_format(files,
                                     cities_adj['OSM_area'],
                                     road_networks['nodes'],
                                     UGS,
                                     grid_size = 100) # aggregating upwards to i.e. 200m, 300m etc. is possible
print('')
# Get fake entry points (between UGS and buffer limits)
UGS_entry = UGS_fake_entry(UGS, 
                           road_networks['nodes'], 
                           road_networks['graphs'],
                           cities_adj['City'],
                           population_grids,
                           thresholds,
                           UGS_entry_buf = 25, # road nodes within 25 meters are seen as fake entry points
                           walk_radius = 500, # assume that the average person only views a UGS up to 500m in radius
                                                # more attractive
                           entry_point_merge = 0) # merges closeby fake UGS entry points within X meters 
                                                    # what may be done for performance
print('')
suitible_enh = suitible_enhanced(UGS_entry, 
                                 population_grids, 
                                 road_networks['nodes'], 
                                 cities_adj['City'], 
                                 thresholds)
print('')
subgraphs = obtaining_subgraphs(road_networks['graphs'],
                                population_grids,
                                UGS_entry,
                                road_networks['nodes'],
                                cities_adj['City'],
                                thresholds)
print('')
Dir_Routes = direct_routing (suitible_enh,
                             subgraphs['graphs'],
                             road_networks['edges'],
                             cities_adj['City'],
                            time_sleep = 30)

print('')
grid_scores = grid_score_summary (Dir_Routes, # Shortest routes by the Dijkstra algorithm, with gravity variant distance adj.
                                  cities_adj['City'], 
                                  population_grids, 
                                  ext = '_Bangalore', # At multiple runs, the extention prevents the summarized file to be overwritten.
                                  save_path = 'D:/Dumps/GEE-WP Scores/Gravity_adj/',
                                  grid_size = 100) # Size of the grid in meters
grid_scores

['India']
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1alpha/projects/earthengine-legacy/thumbnails/1260c38ac9a886bd9baa59c0c638f489-898dbdb56af955c30b12aa5c6e5e9c59:getPixels
Please wait ...
Data downloaded to D:\Dumps\GEE_city_grids\IND_Bangalore_2020.tif
get road networks from OSM
Bangalore done 5.07 mns
 
get urban greenspaces from OSM
Bangalore done
 
100m resolution grids extraction
Bangalore 10.12 mns

Bangalore
0.0 % fake entry points done 0.02  mns
6.4 % fake entry points done 1.89  mns
12.7 % fake entry points done 3.79  mns
19.1 % fake entry points done 5.69  mns
25.4 % fake entry points done 7.56  mns
31.8 % fake entry points done 9.53  mns
38.2 % fake entry points done 11.51  mns
44.5 % fake entry points done 13.3  mns
50.9 % fake entry points done 15.2  mns
57.3 % fake entry points done 17.11  mns
63.6 % fake entry points done 19.12  mns
70.0 % fake entry points done 21.32  mns
76.3 % fake entry points done 23.27  mns
82.7 % fake entry poi

In [3]:
def gee_worldpop_extract (city_file, iso, save_path = None):
    
    cities = city_file
    iso['name'] = np.where(iso['name'] == 'Macedonia','North Macedonia',iso['name'])
    
    # Get included city areas
    OSM_incl = [cities[cities['City'] == city]['OSM_area'].tolist()[0].rsplit(', ') for city in cities['City'].tolist()]

    # Get the city geoms
    obj = [city_geo(city).dissolve()['geometry'].tolist()[0] for city in OSM_incl]

    # Get the city countries
    obj_displ = [city_geo(city).dissolve()['display_name'].tolist()[0].rsplit(', ')[-1]for city in OSM_incl]
    print(obj_displ)
    obj_displ = np.where(pd.Series(obj_displ).str.contains("Ivoire"),"CIte dIvoire",obj_displ)

    # Get the country's iso-code
    iso_list = [iso[iso['name'] == ob]['alpha3'].tolist()[0] for ob in obj_displ]

    # Based on the iso-code return the worldpop 2020
    ee_worldpop = [ee.ImageCollection("WorldPop/GP/100m/pop")\
        .filter(ee.Filter.date('2020'))\
        .filter(ee.Filter.inList('country', [io])).first() for io in iso_list]

    # Clip the countries with the city geoms.
    clipped = [ee_worldpop[i].clip(shapely.geometry.mapping(obj[i])) for i in range(0,len(obj))]

    # Create path if non-existent
    if save_path == None:
        path = ''
    else:
        path = save_path
        if not os.path.exists(path):
                    os.makedirs(path)

    # Export as TIFF file.
    # Stored in form path + USA_Los Angeles_2020.tif
    filenames = [path+iso_list[i]+'_'+cities['City'][i]+'_2020.tif' for i in range(len(obj))]
    [geemap.ee_export_image(clipped[i], filename = filenames[i]) for i in range(0,len(obj))]
    return(filenames)
    sys.stdout.flush()
    
    # Block 2 Road networks
def road_network (cities, thresholds, undirected = False):
    print('get road networks from OSM')
    start_time = time.time()
    graphs = list()
    road_nodes = list()
    road_edges = list()
    road_conn = list()

    for i in enumerate(cities['OSM_area']):
        # Get graph, road nodes and edges
        road_node = pd.DataFrame()
        roads = pd.DataFrame()
        
        # For each included OSM_area get the roads
        for district in i[1].rsplit(', '):
            graph = ox.graph_from_place(district, network_type = "all", buffer_dist = (np.max(thresholds)+1000))
            node, edge = ox.graph_to_gdfs(graph)
            road_node = pd.concat([road_node, node], axis = 0)
            roads = pd.concat([roads, edge], axis = 0)
        
        # Eliminate lists in the df which prevents drop of duplicate columns
        road_edge = pd.DataFrame([[c[0] if isinstance(c,list) else c for c in roads[col]]\
                              for col in roads]).transpose()
        road_edge.columns = roads.columns
        road_edge.index = roads.index
        road_edge = gpd.GeoDataFrame(road_edge, crs = 4326)
        
        # Return the unique nodes and edges of the (often) adjacent OSM_areas.
        road_node = road_node.drop_duplicates()
        road_edge = road_edge.drop_duplicates()
        
        # Road nodes format
        road_node = road_node.to_crs(4326)
        road_node['geometry_m'] = gpd.GeoSeries(road_node['geometry'], crs = 4326).to_crs(3043)
        road_node['osmid_var'] = road_node.index
        road_node = gpd.GeoDataFrame(road_node, geometry = 'geometry', crs = 4326)

        # format road edges
        road_edge['geometry_m'] = gpd.GeoSeries(road_edge['geometry'], crs = 4326).to_crs(3043)
        road_edge = road_edge.reset_index()
        road_edge.rename(columns={'u':'from', 'v':'to', 'key':'keys'}, inplace=True)
        road_edge['key'] = road_edge['from'].astype(str) + '-' + road_edge['to'].astype(str)
        
        if undirected == True:
            # Apply one-directional to both for walking
            both = road_edge[road_edge['oneway'] == False]
            one = road_edge[road_edge['oneway'] == True]
            rev = pd.DataFrame()
            rev[['from','to']] = one[['to','from']]
            rev = pd.concat([rev,one.iloc[:,2:]],axis = 1)
            edge_bidir = pd.concat([both, one, rev])
            edge_bidir = edge_bidir.reset_index()
            edge_bidir['oneway'] = False
        else:
            edge_bidir = road_edge

        # Exclude highways and ramps on edges    
        edge_filter = edge_bidir[(edge_bidir['highway'].str.contains('motorway') | 
              (edge_bidir['highway'].str.contains('trunk') & 
               edge_bidir['maxspeed'].astype(str).str.contains(
                   '40 mph|45 mph|50 mph|55 mph|60 mph|65|70|75|80|85|90|95|100|110|120|130|140'))) == False]
        road_edges.append(edge_filter)

        # Exclude isolated nodes
        fltrnodes = pd.Series(list(edge_filter['from']) + list(edge_filter['to'])).unique()
        newnodes = road_node[road_node['osmid_var'].isin(fltrnodes)]
        road_nodes.append(newnodes)

        # Get only necessary road connections columns for network performance
        road_con = edge_filter[['osmid','key','length','geometry']]
        road_con = road_con.set_index('key')

        road_conn.append(road_con)

        # formatting to graph again.
        newnodes = newnodes.loc[:, ~newnodes.columns.isin(['geometry_m', 'osmid_var'])]
        edge_filter = edge_filter.set_index(['from','to','keys'])
        edge_filter = edge_filter.loc[:, ~edge_filter.columns.isin(['geometry_m', 'key'])]

        graph2 = ox.graph_from_gdfs(newnodes, edge_filter)

        graphs.append(graph2)
        print(cities['City'][i[0]].rsplit(',')[0], 'done', round((time.time() - start_time) / 60,2),'mns')
    return({'graphs':graphs,'nodes':road_nodes,'edges':road_conn,'edges long':road_edges})
# Block 3 city greenspace
def urban_greenspace (cities, thresholds, one_UGS_buf = 25, min_UGS_size = 400):
    print('get urban greenspaces from OSM')
    parks_in_range = list()
    for i in enumerate(cities['OSM_area']):
        # Tags seen as Urban Greenspace (UGS) require the following:
        # 1. Tag represent an area
        # 2. The area is outdoor
        # 3. The area is (semi-)publically available
        # 4. The area is likely to contain trees, grass and/or greenery
        # 5. The area can reasonable be used for walking or recreational activities
        tags = {'landuse':['allotments','forest','greenfield','village_green'],\
                'leisure':['garden','fitness_station','nature_reserve','park','playground'],\
                'natural':'grassland'}
        gdf = ox.geometries_from_place(i[1].rsplit(', '),tags = tags,buffer_dist = np.max(thresholds))
        gdf = gdf[(gdf.geom_type == 'Polygon') | (gdf.geom_type == 'MultiPolygon')]
        greenspace = gdf.reset_index()    
        warnings.filterwarnings("ignore")

        green_buffer = gpd.GeoDataFrame(geometry = greenspace.to_crs(3043).buffer(one_UGS_buf).to_crs(4326))
        greenspace['geometry_w_buffer'] = green_buffer
        greenspace['geometry_w_buffer'] = gpd.GeoSeries(greenspace['geometry_w_buffer'], crs = 4326)
        greenspace['geom buffer diff'] = greenspace['geometry_w_buffer'].difference(greenspace['geometry'])

        # This function group components in itself that overlap (with the buffer set of 25 metres)
        # https://stackoverflow.com/questions/68036051/geopandas-self-intersection-grouping
        W = libpysal.weights.fuzzy_contiguity(greenspace['geometry_w_buffer'])
        greenspace['components'] = W.component_labels
        parks = greenspace.dissolve('components')

        # Exclude parks below 0.04 ha.
        parks = parks[parks.to_crs(3043).area > min_UGS_size]
        print(cities['City'][i[0]], 'done')
        parks = parks.reset_index()
        parks['geometry_m'] = parks['geometry'].to_crs(3043)
        parks['park_area'] = parks['geometry_m'].area
        parks_in_range.append(parks)
    return(parks_in_range)
# Block 4 population grids extraction
def city_grids_format(city_grids, cities_area, road_nodes, UGS, grid_size = 100):
    start_time = time.time()
    grids = []
    print(str(grid_size) + 'm resolution grids extraction')
    for i in range(len(city_grids)):
        
        # Open the raster file
        with rasterio.open(city_grids[i]) as src:
            band= src.read() # the population values
            aff = src.transform # the raster bounds and size (affine)
        
        # Get the rowwise arrays, get a 2D dataframe
        grid = pd.DataFrame()
        for b in enumerate(band[0]):
            grid = pd.concat([grid, pd.Series(b[1],name=b[0])],axis=1)
        grid= grid.unstack().reset_index()
        
        # Unstack df to columns
        grid.columns = ['row','col','value']
        grid['minx'] = aff[2]+aff[0]*grid['col']
        grid['miny'] = aff[5]+aff[4]*grid['row']
        grid['maxx'] = aff[2]+aff[0]*grid['col']+aff[0]
        grid['maxy'] = aff[5]+aff[4]*grid['row']+aff[4]
        
        # Create polygon from affine bounds and row/col indices
        grid['geometry'] = [Polygon([(grid.minx[i],grid.miny[i]),
                                   (grid.maxx[i],grid.miny[i]),
                                   (grid.maxx[i],grid.maxy[i]),
                                   (grid.minx[i],grid.maxy[i])])\
                          for i in range(len(grid))]
        
        # Set the df as geo-df
        grid = gpd.GeoDataFrame(grid, crs = 4326) 

        # Get dissolvement_key for dissolvement. 
        grid['row3'] = np.floor(grid['row']/(grid_size/100)).astype(int)
        grid['col3'] = np.floor(grid['col']/(grid_size/100)).astype(int)
        grid['dissolve_key'] = grid['row3'].astype(str) +'-'+ grid['col3'].astype(str)
        
        # Define a city's OSM area as Polygon.
        geo_ls = gpd.GeoSeries(city_geo(cities_area[i].split(', ')).dissolve().geometry)
        
        # Intersect grids with the city boundary Polygon.
        insec = grid.intersection(geo_ls.tolist()[0])
        
        # Exclude grids outside the specified city boundaries
        insec = insec[insec.area > 0]
        
        # Join in other information.
        insec = gpd.GeoDataFrame(geometry = insec, crs = 4326).join(grid.loc[:, grid.columns != 'geometry'])
        
        # Dissolve into block by block grids
        popgrid = insec[['dissolve_key','geometry','row3','col3']].dissolve('dissolve_key')
        
        # Get those grids populations and area. Only blocks with population and full blocks
        popgrid['population'] = round(insec.groupby('dissolve_key')['value'].sum()).astype(int)
        popgrid['area_m'] = round(gpd.GeoSeries(popgrid['geometry'], crs = 4326).to_crs(3043).area).astype(int)
        popgrid = popgrid[popgrid['population'] > 0]
        popgrid = popgrid[popgrid['area_m'] / popgrid['area_m'].max() > 0.95]

        # Get centroids and coords
        popgrid['centroid'] = popgrid['geometry'].centroid
        popgrid['centroid_m'] = gpd.GeoSeries(popgrid['centroid'], crs = 4326).to_crs(3043)
        popgrid['grid_lon'] = popgrid['centroid_m'].x
        popgrid['grid_lat'] = popgrid['centroid_m'].y
        popgrid = popgrid.reset_index()

        minx = popgrid.bounds['minx']
        maxx = popgrid.bounds['maxx']
        miny = popgrid.bounds['miny']
        maxy = popgrid.bounds['maxy']

        # Some geometries result in a multipolygon when dissolving (like i.e. 0.05 meters), coords error.
        # Therefore recreate the polygon.
        Poly = []
        for k in range(len(popgrid)):
            Poly.append(Polygon([(minx[k],maxy[k]),(maxx[k],maxy[k]),(maxx[k],miny[k]),(minx[k],miny[k])]))
        popgrid['geometry'] = Poly
        
        try:
            entry_index = [int(road_nodes[i]['geometry'].sindex.nearest(grid)[1])\
                                 for grid in popgrid['centroid']]
        except:
            entry_index = [int(road_nodes[i]['geometry'].sindex.nearest(grid)[1][0])\
                                 for grid in popgrid['centroid']]
            
        nearest_index = road_nodes[i].iloc[entry_index]
        popgrid['grid_osm'] = nearest_index.reset_index(drop = True)['osmid_var']
        popgrid['node_geom'] = nearest_index.reset_index(drop = True)['geometry']
        popgrid['node_geom_m'] = nearest_index.reset_index(drop = True)['geometry_m']
        popgrid['G-entry cost'] = popgrid['node_geom_m'].distance(popgrid['centroid_m'])
        
        UGS_all = UGS[i].dissolve().geometry[0]
        popgrid['in_out_UGS'] = popgrid.intersection(UGS_all).is_empty == False
        
        grids.append(popgrid)

        print(city_grids[i].rsplit('_')[3], round((time.time() - start_time)/60,2),'mns')
    return(grids)

In [4]:
# Block 5 park entry points
def UGS_fake_entry(UGS, road_nodes, graphs, cities, pop_grids,
                   thresholds, UGS_entry_buf = 25, walk_radius = 500, entry_point_merge = 0):
    
    start_time = time.time()
    ParkRoads = list()
    for j in range(len(cities)):
        ParkRoad = pd.DataFrame()
        mat = list()
        # For all
        print(cities[j].rsplit(',')[0])
        for i in range(len(UGS[j])):
            dist = road_nodes[j]['geometry'].to_crs(3043).distance(UGS[j]['geometry'].to_crs(
                3043)[i])
            buf_nodes = road_nodes[j][(dist < UGS_entry_buf) & (dist > 0)]
            mat.append(list(np.repeat(i, len(buf_nodes))))
            ParkRoad = pd.concat([ParkRoad, buf_nodes])
            if i % 100 == 0: print(round(i/len(UGS[j])*100,1),'% fake entry points done', 
                                  round((time.time() - start_time) / 60,2),' mns')
                
        print(cities[j].rsplit(',')[0],'100 % fake entry points done', round((time.time() - start_time) / 60,2),' mns')
        
        # Park no list conversion
        mat_u = [i for b in map(lambda x:[x] if not isinstance(x, list) else x, mat) for i in b]

        # Format
        ParkRoad['Park_No'] = mat_u
        ParkRoad = ParkRoad.reset_index()
        ParkRoad['park_lon'] = ParkRoad['geometry_m'].x
        ParkRoad['park_lat'] = ParkRoad['geometry_m'].y
        
        # Get the road nodes intersecting with the parks' buffer
        ParkRoad = pd.merge(ParkRoad, UGS[j][['geometry']], left_on = 'Park_No', right_index = True)

        # Get the walkable park size
        ParkRoad['park_size_walkable'] = ParkRoad['geometry_m'].buffer(walk_radius).to_crs(4326).intersection(ParkRoad['geometry_y'])
        ParkRoad['walk_area'] = ParkRoad['park_size_walkable'].to_crs(3043).area
        ParkRoad['park_area'] = ParkRoad['geometry_y'].to_crs(3043).area
        ParkRoad['share_walked'] = ParkRoad['walk_area'] / ParkRoad['park_area']
        
        # Get size inflation factors for the gravity model
        ParkRoad['size_infl_factor'] = ParkRoad['walk_area'] / ParkRoad['walk_area'].median()
        ParkRoad['size_infl_sqr2'] = ParkRoad['size_infl_factor']**(1/2)
        ParkRoad['size_infl_sqr3'] = ParkRoad['size_infl_factor']**(1/3)
        ParkRoad['size_infl_sqr5'] = ParkRoad['size_infl_factor']**(1/5)
        ParkRoad['raw'] = 1
                
        # Merge fake UGS entry points if within X meters of each other for better system performance
        # Standard no merging
        ParkRoad = simplify_UGS_entry(ParkRoad, entry_point_merge = 0)
        ParkRoads.append(ParkRoad)
        
        print(cities[j].rsplit(',')[0],'100 % done', 
                                  round((time.time() - start_time) / 60,2),' mns')
        
    return(ParkRoads)
# Block 5.5 (not in use, buffer is 0, thus retains all the park entry points as is)
def simplify_UGS_entry(fake_UGS_entry, entry_point_merge = 0):
    # Get buffer of nodes close to each other.
    # Get the buffer
    ParkComb = fake_UGS_entry
    ParkComb['geometry_m_buffer'] = ParkComb['geometry_m'].buffer(entry_point_merge)

    # Get and merge components
    M = libpysal.weights.fuzzy_contiguity(ParkComb['geometry_m_buffer'])
    ParkComb['components'] = M.component_labels

    # Take centroid of merged components
    centr = gpd.GeoDataFrame(ParkComb, geometry = 'geometry_x', crs = 4326).dissolve('components')['geometry_x'].centroid
    centr = gpd.GeoDataFrame(centr)
    centr.columns = ['comp_centroid']

    # Get node closest to the centroid of all merged nodes, which accesses the road network.
    ParkComb = pd.merge(ParkComb, centr, left_on = 'components', right_index = True)
    ParkComb['centr_dist'] = ParkComb['geometry_x'].distance(ParkComb['comp_centroid'])
    ParkComb = ParkComb.iloc[ParkComb.groupby('components')['centr_dist'].idxmin()]
    return(ParkComb)

In [5]:
def suitible_enhanced (UGS_entry, pop_grids, road_nodes, cities, thresholds):
    start_time = time.time()
    suits_all = []
    for j in range(len(cities)):
        print('get (Euclidean) suitible combinations')
        print('0.0 %', round((time.time() - start_time) / 60,2),'mns')
        UGSe = UGS_entry[j]
        entry_geoms = UGSe.geometry_m
        pop = pop_grids[j]
        road_node = road_nodes[j]

        suits = pd.DataFrame()
        cols = ['osmid','Park_No','walk_area','size_infl_sqr2','size_infl_sqr3','size_infl_sqr5']
        for i in range(len(entry_geoms)):
            max_infl = np.max(UGSe[['raw','size_infl_sqr2','size_infl_sqr3','size_infl_sqr5']], axis = 1)[i]
            suit_df = pop[pop.node_geom_m.distance(entry_geoms.iloc[i]) < (max_infl*np.max(thresholds))]
        
            suit_df['UGSe_osmid_m'] = entry_geoms.iloc[i]
            suit_df['Grid_No'] = suit_df.index
            suit_df = suit_df[['Grid_No','grid_osm','G-entry cost','in_out_UGS','node_geom_m','UGSe_osmid_m']].reset_index(drop = True)
            suit_df['Park_entry_No'] = UGSe.index[i]
            #suit_df = pd.merge(suit_df, UGSe[cols], left_on = 'Park_entry_No',right_index = True, how = 'left')
            suits = pd.concat([suits,suit_df])
            if (i+1) % 500 == 0: print(round((i+1) / len(entry_geoms)*100,2),'%',
                                       round((time.time() - start_time) / 60,2),'mns')
            
        suits = pd.merge(suits, UGSe[cols], left_on = 'Park_entry_No',right_index = True, how = 'left')
        suits = suits.reset_index(drop = True)
        suits = suits.rename(columns = {'osmid':'Parkroad_osmid','walk_area':'walk_area_m2'})
        suits['gridpark_no'] = suits['Grid_No'].astype(str)+'-'+suits['Park_No'].astype(str)
        suits['graph_key'] = suits['grid_osm'].astype(str)+'-'+suits['Parkroad_osmid'].astype(str)
        suits_all.append(suits)
        print('100 % finding combinations done')
        print(cities[j],len(suits),'suitible combinations')
    return(suits_all)

In [6]:
def obtaining_subgraphs(graphs, pop_grids, UGS_entry, nodes, cities, thresholds, time_sleep = 30):
    print('obtain local graphs')
    start_time = time.time()
    subgraphs_all = []
    suits_all = []
    for j in range(len(cities)):
        print(cities[j])
        Graph = graphs[j]
        pop = pop_grids[j]
        UGSe = UGS_entry[j].sort_values('osmid')
        road_node = nodes[j]
        node_geoms = road_node.geometry_m
        entry_geoms = UGSe.geometry_m
        osmid = UGSe['osmid']
        max_infl = np.max(UGSe[['raw','size_infl_sqr2','size_infl_sqr3','size_infl_sqr5']], axis = 1)*(np.max(thresholds))

        dist = [node_geoms.distance(Point(i)) for i in entry_geoms]

        print('0.0 % done',round((time.time() - start_time) / 60,2),'mns')
        subgraphs = []
        UGSe_ids = []
        suits = pd.DataFrame()
        for i in range(len(entry_geoms)):      
            suit = road_node[['geometry_m']]
            suit['UGSe_osmid_m'] = entry_geoms.iloc[i]
            suit_df = dist[i]
            suit_in = suit_df[suit_df <= max_infl.iloc[i]]
            UGSe_ids.append(osmid.iloc[i])
            suit_in = pd.DataFrame(suit_in).join(node_geoms)
            suit_in['Parkroad_osmid'] = osmid.iloc[i]
            subgraphs.append(Graph.subgraph(suit_in.index))
            suits = pd.concat([suits, suit_in])

            if (i+1) % 500 == 0: 
                print(round((i+1) / len(entry_geoms)*100,2),'% done',
                                        round((time.time() - start_time) / 60,2),'mns')
                time.sleep(time_sleep)
        print('100 % done',round((time.time() - start_time) / 60,2),'mns')
        subgraphs_all.append(pd.Series(subgraphs, index = UGSe_ids))
        suits_all.append(suits)
    return({'graphs':subgraphs_all,'graph nodes':suits_all})

In [7]:
def distance_fast (Geo_1, Geo_2):
    return((abs(Geo_1.x - Geo_2.x)**2 + abs(Geo_1.y - Geo_2.y)**2).apply(math.sqrt))

In [8]:
def direct_routing (suitible_comb, graphs, edges, cities, chunk = 20000, time_sleep = 15):
    start_time = time.time()
    Routes = []
    Lines = []
    for j in enumerate(cities):
        print(j[1])
        
        suitible = suitible_comb[j[0]].sort_values('Parkroad_osmid').reset_index()
        grouped = suitible[suitible['in_out_UGS'] == False].groupby(['Parkroad_osmid'])['grid_osm'].apply(list)
        sets = grouped.apply(np.unique)

        Conn = edges[j[0]]
        SG = graphs[j[0]]
        
        SGr = SG.reset_index()
        SG = SGr.iloc[pd.Series(SGr['index'].drop_duplicates()).index].set_index('index')[0]

        num = int(np.ceil(chunk / sets.apply(len).mean()))
        length = int(np.ceil(len(suitible['Parkroad_osmid'].unique())/num))

        Routes_df = pd.DataFrame()
        Lines_df = pd.DataFrame()
        for l in range(length):
            comb = suitible[suitible['Parkroad_osmid'].isin(sets.index[l*num:l*num+num])]
            sets2 = sets[l*num:l*num+num]

            parknode = list(comb['Parkroad_osmid'])
            gridnode = list(comb['grid_osm'])
            subgraph = SG[sets2.index]

            ls = []
            ls2 = []
            ls3 = []
            lod = []
            lgk = []
            Routes
            for i in range(len(sets2)):
                path = nx.single_source_dijkstra(subgraph.iloc[i], sets2.index[i], weight = 'length')

                incl = np.isin(list(path[0].keys()),sets2.iloc[i])
                incl2 = np.isin(list(path[1].keys()),sets2.iloc[i])

                # route cost
                orig_c = list(np.repeat(sets2.index[i],sum(incl)))
                dest_c = list(np.array(list(path[0].keys()))[incl])
                cost = list(np.array(list(path[0].values()))[incl])

                ls = ls + orig_c
                ls2= ls2+ dest_c
                ls3= ls3+ cost

                # route steps
                orig_s = list(np.repeat(sets2.index[i],sum(incl2)))
                dest_s = list(np.array(list(path[1].keys()))[incl2])
                steps = list(np.array(list(path[1].values()),dtype=object)[incl2])

                fr = []
                to = []
                og = []
                de = []
                for j in enumerate(steps):
                    if len(j[1]) > 1:
                        fr.append(j[1][:-1])
                        to.append(j[1][1:])
                        og.append(list(np.repeat(orig_s[j[0]], len(j[1][:-1]))))
                        de.append(list(np.repeat(dest_s[j[0]], len(j[1][:-1]))))
                    else:
                        pass

                fr = [i for b in map(lambda x:[x] if not isinstance(x, list) else x, fr) for i in b]
                to = [i for b in map(lambda x:[x] if not isinstance(x, list) else x, to) for i in b]
                og = [i for b in map(lambda x:[x] if not isinstance(x, list) else x, og) for i in b]
                de = [i for b in map(lambda x:[x] if not isinstance(x, list) else x, de) for i in b]

                gk = [str(fr[k])+'-'+str(to[k]) for k in range(len(to))]
                gkr = [str(to[k])+'-'+str(fr[k]) for k in range(len(to))]
                od = [str(de[k])+'-'+str(og[k]) for k in range(len(og))]

                lgk.append(gk)
                lod.append(od)

            dist_df = pd.DataFrame({'UGSe_id':ls,'GrE_id':ls2,'route cost':ls3})
            dist_df['graph_key'] = dist_df['GrE_id'].astype(str)+'-'+dist_df['UGSe_id'].astype(str)

            routes = pd.merge(comb, dist_df, on = 'graph_key', how = 'left')
            routes['route cost'] = np.where(routes['in_out_UGS'],0,routes['route cost'])
            routes = routes[~routes['route cost'].isna()].reset_index(drop = True)

            routes['G-entry cost'] = np.where(routes['in_out_UGS'],0,routes['G-entry cost'])

            routes['raw_Tcost'] = routes['route cost']+routes['G-entry cost']
            routes['grav2_Tcost'] = routes['raw_Tcost'] / routes['size_infl_sqr2']
            routes['grav3_Tcost'] = routes['raw_Tcost'] / routes['size_infl_sqr3']
            routes['grav5_Tcost'] = routes['raw_Tcost'] / routes['size_infl_sqr5']

            lgk = [i for b in map(lambda x:[x] if not isinstance(x, list) else x, lgk) for i in b]
            lod = [i for b in map(lambda x:[x] if not isinstance(x, list) else x, lod) for i in b]
            
            linestr = pd.DataFrame({'route no':lod,'route step':lgk})
            
            linestr = pd.merge(linestr, Conn.geometry, left_on = 'route step', right_index = True, how = 'left')
            linestr = linestr[['route no','geometry']]
            linestr = gpd.GeoDataFrame(linestr[['route no','geometry']], crs = 4326)
            
            linestr = linestr.dissolve('route no')
            routes2 = pd.merge(routes, linestr, left_on = 'graph_key', right_index = True, how = 'left')
            
            Lines_df = pd.concat([Lines_df, linestr])
            Routes_df = pd.concat([Routes_df, routes2])
            
            print(round(l*num / len(sets)*100,2),'%', 
                  round((time.time() - start_time) / 60,2),'mns')
            time.sleep(time_sleep)
        Routes_df = Routes_df.sort_values('index')
        Routes_df = Routes_df.set_index('index')
        Routes_df = Routes_df.reset_index(drop = True)
        
        Routes_df = Routes_df[Routes_df.columns[~Routes_df.columns.isin(['UGSe_id', 'GrE_id','size_infl_sqr2',\
                                                                  'size_infl_sqr3', 'size_infl_sqr5'])]]
        
        print('100 % done',round((time.time() - start_time) / 60,2),'mns')
        
        Routes.append(Routes_df)
        Lines.append(Lines_df)
    return(Routes)

In [9]:
# Block 8 determine best parkentry points from each grid, then calculate grid scores
# and finally aggregate city access in categories (high, medium, low and no access)
def grid_score_summary (routes, cities, pop_grids, ext = '', grid_size = 100, save_path = 'C:/Dumps/GEE-WP Scores/Gravity/'):
    start_time = time.time()
    popg_acc = pd.DataFrame()
    grid_scores = list([])
    gridpark = list([])
    for n in range(len(cities)):    
        print(cities[n])

        # For the four distance decay variants regarding park size.
        l1 = list(['raw','grav2','grav3','grav5'])
        m1 = list(['entrance','gravity**(1/2)','gravity**(1/3)','gravity**(1/5)'])
        grid_score = list([])
        gridparks = list([])
        gridpark.append(gridparks)
        popgrid_access = pd.DataFrame()
        for i in range(len(l1)):
            # Get the lowest indices grouped by a key consisting of grid no and park no (best entry point from a grid to a park)
            var_best_routes = best_gridpark_comb (routes[n], l1[i], pop_grids[n])

            grdsc = pd.DataFrame()
            gridsc = pd.DataFrame()
            print(m1[i], round((time.time() - start_time) / 60,2), 'mns')

            # For each threshold given, calculate a score
            for k in range(len(thresholds)):
                
                t = thresholds[k]
                score = 'tr_'+ str(t)
                scores = determine_scores(var_best_routes, pop_grids[n], thresholds[k], l1[i], cities[n], 
                                          save_path, grid_size = 100)
                
                grdsc = pd.concat([grdsc, scores['score_w_route']], axis = 1)
                gridsc = pd.concat([gridsc, scores['grid_score']], axis = 1)
                                
                # Group according to the categories just created and sum the populations living in those grids
                popgacc = pd.DataFrame()
                popgacc[m1[i]+'_'+str(t)] = scores['score_w_route'].groupby(score+'_access')['population'].sum()
                popgrid_access = pd.concat([popgrid_access, popgacc],axis=1)   

                print('grid ',t)

            grid_score.append(grdsc)

            gridsc = gridsc.join(pop_grids[n]['geometry'])
            gridsc = gpd.GeoDataFrame(gridsc, geometry = 'geometry', crs = 4326)

            if not os.path.exists(save_path+str(grid_size)+'m grids/Grid_geoms/'):
                os.makedirs(save_path+str(grid_size)+'m grids/Grid_geoms/')

            gridsc.to_file(save_path+str(grid_size)+'m grids/Grid_geoms/gridscore_'+ l1[i] + '_' + cities[n] + '.gpkg')

            # Detailed scores to files number of cities * ways to measure = number of files.
            # Different threshold-scores are in the same dataframe
            gridsc = gridsc.loc[:, gridsc.columns!='geometry']

            if not os.path.exists(save_path+str(grid_size)+'m grids/Grid_csv/'):
                os.makedirs(save_path+str(grid_size)+'m grids/Grid_csv/')

            gridsc.to_csv(save_path+str(grid_size)+'m grids/Grid_csv/gridscore_'+ l1[i] + '_' + cities[n] + '.csv')
            gridparks.append(var_best_routes)

        grid_scores.append(grid_score)

        # For each city, divide the population access by group by the total to get its share.
        popgrid_access = popgrid_access / popgrid_access.sum()
        popgrid_access = pd.DataFrame(popgrid_access.unstack())
        popg_acc = pd.concat([popg_acc, popgrid_access], axis = 1)

        print(cities[n],'done', round((time.time() - start_time) / 60,2), 'mns')
    popg_acc.columns = cities
    popg_acc.to_csv(save_path+str(grid_size)+'m grids/popgrid_access.csv')
    return(popg_acc)    


In [10]:
def best_gridpark_comb (routes, var_abbr, pop_grid):
    str1 = 'gridpark_' + var_abbr
    locals()[str1] = routes.iloc[routes.groupby('gridpark_no')[(str(var_abbr) +'_Tcost')].idxmin()]  

    # Get grid information
    locals()[str1] = pd.merge(locals()[str1], pop_grid[['population','geometry']],
                            left_on = 'Grid_No', right_index = True, how = 'outer')
    locals()[str1] = locals()[str1].reset_index()

    # formatting
    locals()[str1]['Park_No'] = locals()[str1]['Park_No'].fillna(-1)
    locals()[str1]['Park_No'] = locals()[str1]['Park_No'].astype(int)
    locals()[str1]['Park_entry_No'] = locals()[str1]['Park_entry_No'].fillna(-1)
    locals()[str1]['Park_entry_No'] = locals()[str1]['Park_entry_No'].astype(int)
    return(locals()[str1])

In [11]:
def determine_scores(var_df, pop_grid, thresholds, var_abbr, city, save_path, grid_size = 100):
    t = thresholds
    str2 = str(t)
    score = 'tr_'+ str2

    #Only get routes within the threshold given (it loops over every threshold) and calculate the scores
    thold = var_df[var_df[var_abbr + '_Tcost'] <= t]
    thold[score] = t - thold[var_abbr + '_Tcost']
    thold['pop' + score] = thold[score] * thold['population']
    thold['walk_area_ha' + str2] = var_df['walk_area_m2'] /10000
    thold['walkha_person' + str2] = thold['population'] / thold['walk_area_ha' + str2]

    # Join the gridpark information from before.
    var_df = var_df.join(thold[[score,'pop' + score,'walk_area_ha' + str2, 'walkha_person' + str2]])
    # get the grid_scores
    gs = pd.DataFrame()
    gs[[score,'pop_' + score,'walkha_' + str2]] = var_df.groupby(
            'Grid_No')[score,'pop' + score, 'walk_area_ha' + str2].sum()

    gs['walkha_person_' + score] = var_df.groupby('Grid_No')['walkha_person' + str2].mean()

    trstr = var_df[var_df[score] > 0]
    gs[score + '_parks'] = trstr.groupby('Grid_No')['gridpark_no'].count()

    # Add the routes as a dissolved line_geom
    gs[score + '_routes'] = gpd.GeoDataFrame(trstr[['Grid_No','geometry_x']],
                                                  geometry = 'geometry_x', crs = 4326).dissolve('Grid_No')

    # Add parks which grids have access to with its closest access point
    gs[score+'Park:entry'] = trstr[trstr['Park_No'] >=0].groupby('Grid_No')['Park_No'].apply(list).astype(str
    ) + ':' + trstr[trstr['Park_entry_No'] >=0].groupby('Grid_No')['Park_entry_No'].apply(list).astype(str)
                
    # determine the thresholds category-score. 
    # High >= threshold (perfect score to one park), medium is above half perfect, 
    # low is below this and no is no access to a park for a certain grid within the threshold given
    gs[score+'_access'] = np.select([gs[score] >= t, (gs[score] < t) & (
    gs[score]>= t/2), (gs[score] < t/2) & (gs[score]> 0), gs[score] <= 0],
          ['1 high','2 medium','3 low','4 no'])
    gs = gs.join(pop_grid['population'], how = 'outer')
            
    gs = gpd.GeoDataFrame(gs, geometry = score + '_routes', crs = 4326)
            
    if not os.path.exists(save_path+str(grid_size)+'m grids/Grid_lines/'):
        os.makedirs(save_path+str(grid_size)+'m grids/Grid_lines/')
                
    gs.to_file(save_path+str(grid_size)+'m grids/Grid_lines/gridscore_'+ var_abbr + '_' + str2 + '_' + city + '.gpkg')
            
    gsc = gs.loc[:,~gs.columns.isin([score + '_routes'])]

    return({'grid_score':gsc,'score_w_route':gs})