In [2]:
# system packages
import sys
import time
import warnings
import os

# non-geo numeric packages
import numpy as np
import math
from itertools import product, combinations
import pandas as pd

# network and OSM packages
import networkx as nx
import osmnx as ox
city_geo = ox.geocoder.geocode_to_gdf

# Earth engine packages
import ee
import geemap

# General geo-packages
import libpysal
import rasterio
import geopandas as gpd
import shapely
from shapely import geometry
from shapely.geometry import Point, MultiLineString, LineString, Polygon, MultiPolygon

In [3]:
# Authenticate and Initialize Google Earth Engine
ee.Authenticate()
ee.Initialize()

Enter verification code: 4/1AVHEtk575uHD8-D1tg494NBnIX6eXe9QnW-xLoeWVMhBNniS2D26aLNfXSg

Successfully saved authorization token.


In [4]:
# Thresholds and cities
thresholds = [300, 600, 1000] # route threshold in metres. WHO guideline speaks of access within 300m

# Extract cities list
iso = pd.read_excel('iso_countries.xlsx')
cities = pd.read_excel('cities.xlsx')
cities_adj = cities[cities['City'].isin(['Addis Ababa','Dhaka','Shijiazhuang','Santo Domingo'])]
cities_adj = cities_adj.reset_index()

In [28]:
%%time
# 1. Required preprocess for information extraction
warnings.filterwarnings('ignore')

# Predifine in Excel: the (1) city name as "City" and (2) the OSM area that needs to be extracted as "OSM_area"
# i.e. City = "Los Angeles" and OSM_area = "Los Angeles county, Orange county CA"
files = gee_worldpop_extract(cities_adj,iso,'C:/Dumps/GEE_city_grids/')

# Files are downloaded automatically to the specified path. Files are also stored in Google with a downloadlink:

Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1alpha/projects/earthengine-legacy/thumbnails/f0de31b02bb07953d66a210523e93d01-76a258af2bfa11770a4695c13a1f5b2c:getPixels
Please wait ...
Data downloaded to C:\Dumps\GEE_city_grids\ETH_Addis Ababa_2020.tif
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1alpha/projects/earthengine-legacy/thumbnails/e374c5e555e31457cffe53390382762f-45c7cc7aa94fed3f50d21c2f19e15a2e:getPixels
Please wait ...
Data downloaded to C:\Dumps\GEE_city_grids\BGD_Dhaka_2020.tif
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1alpha/projects/earthengine-legacy/thumbnails/aa1d5bb09020fa6804d00e3e57c83081-aba785176e22ca563efe5ea4e8d70269:getPixels
Please wait ...
Data downloaded to C:\Dumps\GEE_city_grids\DOM_Santo Domingo_2020.tif
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1alpha/projects/earthengine-legacy/thumbnails/c00d2dec9576c7941a5faf8cf3e6eb04

In [35]:
%%time
# 2. Information extraction

# Clip cities from countries, format population grids
population_grids = city_grids_format(files,
                                     cities_adj['OSM_area'],
                                     grid_size = 100) # aggregating upwards to i.e. 200m, 300m etc. is possible
print(' ')

# Get road networks
road_networks = road_networks(cities_adj, # Get 'all' (drive,walk,bike) network
                              thresholds,
                              undirected = True)
print(' ')

# Extract urban greenspace (UGS)
UGS = urban_greenspace(cities_adj, 
                       thresholds,
                       one_UGS_buf = 25, # buffer at which UGS is seen as one
                       min_UGS_size = 400) # WHO sees this as minimum UGS size (400m2)

100m resolution grids extraction
Addis Ababa 0.42 mns
Dhaka 0.68 mns
Santo Domingo 1.78 mns
Shijiazhuang 2.14 mns
 
get road networks from OSM
Addis Ababa done 1.66 mns
Dhaka done 2.68 mns
Santo Domingo done 4.66 mns
Shijiazhuang done 5.04 mns
 
get urban greenspaces from OSM
Addis Ababa done
Dhaka done
Santo Domingo done
Shijiazhuang done
Wall time: 7min 14s


In [36]:
%%time
# 3. Preprocess information for route finding

# Get fake entry points (between UGS and buffer limits)
UGS_entry = UGS_fake_entry(UGS, 
                           road_networks['nodes'], 
                           cities_adj['City'], 
                           UGS_entry_buf = 25, # road nodes within 25 meters are seen as fake entry points
                           walk_radius = 500, # assume that the average person only views a UGS up to 500m in radius
                                                # more attractive
                           entry_point_merge = 0) # merges closeby fake UGS entry points within X meters 
                                                    # what may be done for performance
print(' ')
# Checks all potential suitible combinations (points that fall within max threshold Euclidean distance from the ego)
suitible = suitible_combinations(UGS_entry, 
                                 population_grids, 
                                 road_networks['nodes'], 
                                 thresholds,
                                 cities_adj['City'],
                                 chunk_size = 10000000) # calculating per chunk of num UGS entry points * num pop_grids
                                                        # Preventing normal PC meltdown, set lower if PC gets stuck
print(' ')
# Checks if grids are already in a UGS
suitible_InOut_UGS = grids_in_UGS (suitible, UGS, population_grids)

get fake UGS entry points
Addis Ababa 0.0 % done 0.01  mns
Addis Ababa 73.5 % done 0.42  mns
Addis Ababa 100 % done 0.56  mns
Dhaka 0.0 % done 0.57  mns
Dhaka 19.4 % done 0.81  mns
Dhaka 38.8 % done 1.04  mns
Dhaka 58.1 % done 1.3  mns
Dhaka 77.5 % done 1.55  mns
Dhaka 96.9 % done 1.81  mns
Dhaka 100 % done 1.85  mns
Santo Domingo 0.0 % done 1.85  mns
Santo Domingo 25.6 % done 2.22  mns
Santo Domingo 51.2 % done 2.52  mns
Santo Domingo 76.7 % done 2.84  mns
Santo Domingo 100 % done 3.16  mns
Shijiazhuang 0.0 % done 3.16  mns
Shijiazhuang 100 % done 3.25  mns
 
get potential (Euclidean) suitible combinations
Addis Ababa
chunk 1 / 7 131913 suitible comb.
chunk 2 / 7 18619 suitible comb.
chunk 3 / 7 42384 suitible comb.
chunk 4 / 7 17050 suitible comb.
chunk 5 / 7 40319 suitible comb.
chunk 6 / 7 251030 suitible comb.
chunk 7 / 7 202336 suitible comb.
total combinations within distance 703651
0.0 % gridentry done 0.01  mns
35.5 % gridentry done 0.4  mns
71.1 % gridentry done 0.77  mns
100

In [37]:
%%time
# 4. Finding shortest routes.

Routes = route_finding (road_networks['graphs'], # graphs of the road networks
               suitible_InOut_UGS, # potential suitible routes with grid-UGS comb. separated in or out UGS.
               road_networks['nodes'], 
               road_networks['edges'], 
               cities_adj['City'], 
               block_size = 250000, # Chunk to spread dataload.
               nn_iter = 10) # max amount of nearest nodes to be found (both for UGS entry and grid-centroid road entries)

comb. by city
Addis Ababa 703651
Dhaka 106106
Santo Domingo 319591
Shijiazhuang 49555
 
Addis Ababa 1 / 3 range 0 - 250000
0.0 % done 0.08 mns
1.77 % done 0.54 mns
3.55 % done 1.33 mns
5.32 % done 1.84 mns
7.09 % done 2.19 mns
8.87 % done 2.68 mns
10.64 % done 2.83 mns
12.41 % done 3.04 mns
14.19 % done 3.2 mns
15.96 % done 3.32 mns
17.73 % done 3.46 mns
19.5 % done 3.67 mns
21.28 % done 3.81 mns
23.05 % done 3.92 mns
24.82 % done 4.39 mns
26.6 % done 4.68 mns
28.37 % done 4.99 mns
index 166318 No route
index 166319 No route
index 166320 No route
index 166321 No route
index 166322 No route
index 166323 No route
index 166324 No route
index 166325 No route
index 166326 No route
index 166327 No route
index 166328 No route
index 166329 No route
index 166330 No route
index 166331 No route
index 166332 No route
index 166333 No route
index 166334 No route
index 166335 No route
index 166336 No route
index 166337 No route
index 166338 No route
index 166339 No route
index 166340 No route
index 1

In [38]:
%%time
# 5. summarize scores
grid_scores = grid_score_summary (Routes['route summary'], # Shortest routes by the Dijkstra algorithm, with gravity variant distance adj.
                                  cities_adj['City'], 
                                  population_grids, 
                                  ext = '', # At multiple runs, the extention prevents the summarized file to be overwritten.
                                  save_path = 'C:/Dumps/GEE-WP Scores/Gravity/
                                  grid_size = 100) # Size of the grid in meters
grid_scores

Addis Ababa
entrance 0.09 mns
grid  300
grid  600
grid  1000
gravity**(1/2) 2.08 mns
grid  300
grid  600
grid  1000
gravity**(1/3) 4.41 mns
grid  300
grid  600
grid  1000
gravity**(1/5) 6.33 mns
grid  300
grid  600
grid  1000
Addis Ababa done 8.04 mns
Dhaka
entrance 8.09 mns
grid  300
grid  600
grid  1000
gravity**(1/2) 9.07 mns
grid  300
grid  600
grid  1000
gravity**(1/3) 10.19 mns
grid  300
grid  600
grid  1000
gravity**(1/5) 11.23 mns
grid  300
grid  600
grid  1000
Dhaka done 12.15 mns
Santo Domingo
entrance 12.21 mns
grid  300
grid  600
grid  1000
gravity**(1/2) 13.59 mns
grid  300
grid  600
grid  1000
gravity**(1/3) 15.12 mns
grid  300
grid  600
grid  1000
gravity**(1/5) 16.56 mns
grid  300
grid  600
grid  1000
Santo Domingo done 17.87 mns
Shijiazhuang
entrance 17.9 mns
grid  300
grid  600
grid  1000
gravity**(1/2) 19.31 mns
grid  300
grid  600
grid  1000
gravity**(1/3) 20.72 mns
grid  300
grid  600
grid  1000
gravity**(1/5) 22.11 mns
grid  300
grid  600
grid  1000
Shijiazhuang d

Unnamed: 0,City,Addis Ababa,Dhaka,Santo Domingo,Shijiazhuang
entrance_300,1 high,0.054715,0.018963,0.048196,0.017725
entrance_300,2 medium,0.028417,0.038374,0.061172,0.020078
entrance_300,3 low,0.037736,0.080649,0.092689,0.034942
entrance_300,4 no,0.879132,0.862014,0.797944,0.927256
entrance_600,1 high,0.072605,0.063743,0.111227,0.019429
entrance_600,2 medium,0.057362,0.10538,0.120699,0.053866
entrance_600,3 low,0.08857,0.152223,0.147924,0.061785
entrance_600,4 no,0.781464,0.678655,0.62015,0.86492
entrance_1000,1 high,0.107542,0.165167,0.207606,0.022579
entrance_1000,2 medium,0.098166,0.139371,0.156812,0.092906


In [10]:
def gee_worldpop_extract (city_file, iso, save_path = None):
    
    cities = city_file
    
    # Get included city areas
    OSM_incl = [cities[cities['City'] == city]['OSM_area'].tolist()[0].rsplit(', ') for city in cities['City'].tolist()]

    # Get the city geoms
    obj = [city_geo(city).dissolve()['geometry'].tolist()[0] for city in OSM_incl]

    # Get the city countries
    obj_displ = [city_geo(city).dissolve()['display_name'].tolist()[0].rsplit(', ')[-1]for city in OSM_incl]
    obj_displ = np.where(pd.Series(obj_displ).str.contains("Ivoire"),"CIte dIvoire",obj_displ)

    # Get the country's iso-code
    iso_list = [iso[iso['name'] == ob]['alpha3'].tolist()[0] for ob in obj_displ]

    # Based on the iso-code return the worldpop 2020
    ee_worldpop = [ee.ImageCollection("WorldPop/GP/100m/pop")\
        .filter(ee.Filter.date('2020'))\
        .filter(ee.Filter.inList('country', [io])).first() for io in iso_list]

    # Clip the countries with the city geoms.
    clipped = [ee_worldpop[i].clip(shapely.geometry.mapping(obj[i])) for i in range(0,len(obj))]

    # Create path if non-existent
    if save_path == None:
        path = ''
    else:
        path = save_path
        if not os.path.exists(path):
                    os.makedirs(path)

    # Export as TIFF file.
    # Stored in form path + USA_Los Angeles_2020.tif
    filenames = [path+iso_list[i]+'_'+cities['City'][i]+'_2020.tif' for i in range(len(obj))]
    [geemap.ee_export_image(clipped[i], filename = filenames[i]) for i in range(0,len(obj))]
    return(filenames)
    sys.stdout.flush()

In [11]:
# Block 2 population grids extraction
def city_grids_format(city_grids, cities_area, grid_size = 100):
    start_time = time.time()
    grids = []
    print(str(grid_size) + 'm resolution grids extraction')
    for i in range(len(city_grids)):
        
        # Open the raster file
        with rasterio.open(city_grids[i]) as src:
            band= src.read() # the population values
            aff = src.transform # the raster bounds and size (affine)
        
        # Get the rowwise arrays, get a 2D dataframe
        grid = pd.DataFrame()
        for b in enumerate(band[0]):
            grid = pd.concat([grid, pd.Series(b[1],name=b[0])],axis=1)
        grid= grid.unstack().reset_index()
        
        # Unstack df to columns
        grid.columns = ['row','col','value']
        grid['minx'] = aff[2]+aff[0]*grid['col']
        grid['miny'] = aff[5]+aff[4]*grid['row']
        grid['maxx'] = aff[2]+aff[0]*grid['col']+aff[0]
        grid['maxy'] = aff[5]+aff[4]*grid['row']+aff[4]
        
        # Create polygon from affine bounds and row/col indices
        grid['geometry'] = [Polygon([(grid.minx[i],grid.miny[i]),
                                   (grid.maxx[i],grid.miny[i]),
                                   (grid.maxx[i],grid.maxy[i]),
                                   (grid.minx[i],grid.maxy[i])])\
                          for i in range(len(grid))]
        
        # Set the df as geo-df
        grid = gpd.GeoDataFrame(grid, crs = 4326) 

        # Get dissolvement_key for dissolvement. 
        grid['row3'] = np.floor(grid['row']/(grid_size/100)).astype(int)
        grid['col3'] = np.floor(grid['col']/(grid_size/100)).astype(int)
        grid['dissolve_key'] = grid['row3'].astype(str) +'-'+ grid['col3'].astype(str)
        
        # Define a city's OSM area as Polygon.
        geo_ls = gpd.GeoSeries(city_geo(cities_area[i].split(', ')).dissolve().geometry)
        
        # Intersect grids with the city boundary Polygon.
        insec = grid.intersection(geo_ls.tolist()[0])
        
        # Exclude grids outside the specified city boundaries
        insec = insec[insec.area > 0]
        
        # Join in other information.
        insec = gpd.GeoDataFrame(geometry = insec, crs = 4326).join(grid.loc[:, grid.columns != 'geometry'])
        
        # Dissolve into block by block grids
        popgrid = insec[['dissolve_key','geometry','row3','col3']].dissolve('dissolve_key')
        
        # Get those grids populations and area. Only blocks with population and full blocks
        popgrid['population'] = round(insec.groupby('dissolve_key')['value'].sum()).astype(int)
        popgrid['area_m'] = round(gpd.GeoSeries(popgrid['geometry'], crs = 4326).to_crs(3043).area).astype(int)
        popgrid = popgrid[popgrid['population'] > 0]
        popgrid = popgrid[popgrid['area_m'] / popgrid['area_m'].max() > 0.95]

        # Get centroids and coords
        popgrid['centroid'] = popgrid['geometry'].centroid
        popgrid['centroid_m'] = gpd.GeoSeries(popgrid['centroid'], crs = 4326).to_crs(3043)
        popgrid['grid_lon'] = popgrid['centroid_m'].x
        popgrid['grid_lat'] = popgrid['centroid_m'].y
        popgrid = popgrid.reset_index()

        minx = popgrid.bounds['minx']
        maxx = popgrid.bounds['maxx']
        miny = popgrid.bounds['miny']
        maxy = popgrid.bounds['maxy']

        # Some geometries result in a multipolygon when dissolving (like i.e. 0.05 meters), coords error.
        # Therefore recreate the polygon.
        Poly = []
        for k in range(len(popgrid)):
            Poly.append(Polygon([(minx[k],maxy[k]),(maxx[k],maxy[k]),(maxx[k],miny[k]),(minx[k],miny[k])]))
        popgrid['geometry'] = Poly

        grids.append(popgrid)

        print(city_grids[i].rsplit('_')[3], round((time.time() - start_time)/60,2),'mns')
    return(grids)

In [12]:
# Block 3 Road networks
def road_networks (cities, thresholds, undirected = False):
    print('get road networks from OSM')
    start_time = time.time()
    graphs = list()
    road_nodes = list()
    road_edges = list()
    road_conn = list()

    for i in enumerate(cities['OSM_area']):
        # Get graph, road nodes and edges
        road_node = pd.DataFrame()
        roads = pd.DataFrame()
        
        # For each included OSM_area get the roads
        for district in i[1].rsplit(', '):
            graph = ox.graph_from_place(district, network_type = "all", buffer_dist = (np.max(thresholds)+1000))
            node, edge = ox.graph_to_gdfs(graph)
            road_node = pd.concat([road_node, node], axis = 0)
            roads = pd.concat([roads, edge], axis = 0)
        
        # Eliminate lists in the df which prevents drop of duplicate columns
        road_edge = pd.DataFrame([[c[0] if isinstance(c,list) else c for c in roads[col]]\
                              for col in roads]).transpose()
        road_edge.columns = roads.columns
        road_edge.index = roads.index
        road_edge = gpd.GeoDataFrame(road_edge, crs = 4326)
        
        # Return the unique nodes and edges of the (often) adjacent OSM_areas.
        road_node = road_node.drop_duplicates()
        road_edge = road_edge.drop_duplicates()
        
        # Road nodes format
        road_node = road_node.to_crs(4326)
        road_node['geometry_m'] = gpd.GeoSeries(road_node['geometry'], crs = 4326).to_crs(3043)
        road_node['osmid_var'] = road_node.index
        road_node = gpd.GeoDataFrame(road_node, geometry = 'geometry', crs = 4326)

        # format road edges
        road_edge['geometry_m'] = gpd.GeoSeries(road_edge['geometry'], crs = 4326).to_crs(3043)
        road_edge = road_edge.reset_index()
        road_edge.rename(columns={'u':'from', 'v':'to', 'key':'keys'}, inplace=True)
        road_edge['key'] = road_edge['from'].astype(str) + '-' + road_edge['to'].astype(str)
        
        if undirected == True:
            # Apply one-directional to both for walking
            both = road_edge[road_edge['oneway'] == False]
            one = road_edge[road_edge['oneway'] == True]
            rev = pd.DataFrame()
            rev[['from','to']] = one[['to','from']]
            rev = pd.concat([rev,one.iloc[:,2:]],axis = 1)
            edge_bidir = pd.concat([both, one, rev])
            edge_bidir = edge_bidir.reset_index()
            edge_bidir['oneway'] = False
        else:
            edge_bidir = road_edge

        # Exclude highways and ramps on edges    
        edge_filter = edge_bidir[(edge_bidir['highway'].str.contains('motorway') | 
              (edge_bidir['highway'].str.contains('trunk') & 
               edge_bidir['maxspeed'].astype(str).str.contains(
                   '40 mph|45 mph|50 mph|55 mph|60 mph|65|70|75|80|85|90|95|100|110|120|130|140'))) == False]
        road_edges.append(edge_filter)

        # Exclude isolated nodes
        fltrnodes = pd.Series(list(edge_filter['from']) + list(edge_filter['to'])).unique()
        newnodes = road_node[road_node['osmid_var'].isin(fltrnodes)]
        road_nodes.append(newnodes)

        # Get only necessary road connections columns for network performance
        road_con = edge_filter[['osmid','key','length','geometry']]
        road_con = road_con.set_index('key')

        road_conn.append(road_con)

        # formatting to graph again.
        newnodes = newnodes.loc[:, ~newnodes.columns.isin(['geometry_m', 'osmid_var'])]
        edge_filter = edge_filter.set_index(['from','to','keys'])
        edge_filter = edge_filter.loc[:, ~edge_filter.columns.isin(['geometry_m', 'key'])]

        graph2 = ox.graph_from_gdfs(newnodes, edge_filter)

        graphs.append(graph2)
        print(cities['City'][i[0]].rsplit(',')[0], 'done', round((time.time() - start_time) / 60,2),'mns')
    return({'graphs':graphs,'nodes':road_nodes,'edges':road_conn,'edges long':road_edges})

In [13]:
# Block 4 city greenspace
def urban_greenspace (cities, thresholds, one_UGS_buf = 25, min_UGS_size = 400):
    print('get urban greenspaces from OSM')
    parks_in_range = list()
    for i in enumerate(cities['OSM_area']):
        # Tags seen as Urban Greenspace (UGS) require the following:
        # 1. Tag represent an area
        # 2. The area is outdoor
        # 3. The area is (semi-)publically available
        # 4. The area is likely to contain trees, grass and/or greenery
        # 5. The area can reasonable be used for walking or recreational activities
        tags = {'landuse':['allotments','forest','greenfield','village_green'],\
                'leisure':['garden','fitness_station','nature_reserve','park','playground'],\
                'natural':'grassland'}
        gdf = ox.geometries_from_place(i[1].rsplit(', '),tags = tags,buffer_dist = np.max(thresholds))
        gdf = gdf[(gdf.geom_type == 'Polygon') | (gdf.geom_type == 'MultiPolygon')]
        greenspace = gdf.reset_index()    
        warnings.filterwarnings("ignore")

        green_buffer = gpd.GeoDataFrame(geometry = greenspace.to_crs(3043).buffer(one_UGS_buf).to_crs(4326))
        greenspace['geometry_w_buffer'] = green_buffer
        greenspace['geometry_w_buffer'] = gpd.GeoSeries(greenspace['geometry_w_buffer'], crs = 4326)
        greenspace['geom buffer diff'] = greenspace['geometry_w_buffer'].difference(greenspace['geometry'])

        # This function group components in itself that overlap (with the buffer set of 25 metres)
        # https://stackoverflow.com/questions/68036051/geopandas-self-intersection-grouping
        W = libpysal.weights.fuzzy_contiguity(greenspace['geometry_w_buffer'])
        greenspace['components'] = W.component_labels
        parks = greenspace.dissolve('components')

        # Exclude parks below 0.04 ha.
        parks = parks[parks.to_crs(3043).area > min_UGS_size]
        print(cities['City'][i[0]], 'done')
        parks = parks.reset_index()
        parks['geometry_m'] = parks['geometry'].to_crs(3043)
        parks['park_area'] = parks['geometry_m'].area
        parks_in_range.append(parks)
    return(parks_in_range)

In [14]:
# Block 5 park entry points
def UGS_fake_entry(UGS, road_nodes, cities, UGS_entry_buf = 25, walk_radius = 500, entry_point_merge = 0):
    print('get fake UGS entry points')
    start_time = time.time()
    ParkRoads = list()
    for j in range(len(cities)):
        ParkRoad = pd.DataFrame()
        mat = list()
        # For all
        for i in range(len(UGS[j])):
            dist = road_nodes[j]['geometry'].to_crs(3043).distance(UGS[j]['geometry'].to_crs(
                3043)[i])
            buf_nodes = road_nodes[j][(dist < UGS_entry_buf) & (dist > 0)]
            mat.append(list(np.repeat(i, len(buf_nodes))))
            ParkRoad = pd.concat([ParkRoad, buf_nodes])
            if i % 100 == 0: print(cities[j].rsplit(',')[0], round(i/len(UGS[j])*100,1),'% done', 
                                  round((time.time() - start_time) / 60,2),' mns')
        # Park no list conversion
        mat_u = [i for b in map(lambda x:[x] if not isinstance(x, list) else x, mat) for i in b]

        # Format
        ParkRoad['Park_No'] = mat_u
        ParkRoad = ParkRoad.reset_index()
        ParkRoad['park_lon'] = ParkRoad['geometry_m'].x
        ParkRoad['park_lat'] = ParkRoad['geometry_m'].y
        
        # Get the road nodes intersecting with the parks' buffer
        ParkRoad = pd.merge(ParkRoad, UGS[j][['geometry']], left_on = 'Park_No', right_index = True)

        # Get the walkable park size
        ParkRoad['park_size_walkable'] = ParkRoad['geometry_m'].buffer(walk_radius).to_crs(4326).intersection(ParkRoad['geometry_y'])
        ParkRoad['walk_area'] = ParkRoad['park_size_walkable'].to_crs(3043).area
        ParkRoad['park_area'] = ParkRoad['geometry_y'].to_crs(3043).area
        ParkRoad['share_walked'] = ParkRoad['walk_area'] / ParkRoad['park_area']
        
        # Get size inflation factors for the gravity model
        ParkRoad['size_infl_factor'] = ParkRoad['walk_area'] / ParkRoad['walk_area'].median()
        ParkRoad['size_infl_sqr2'] = ParkRoad['size_infl_factor']**(1/2)
        ParkRoad['size_infl_sqr3'] = ParkRoad['size_infl_factor']**(1/3)
        ParkRoad['size_infl_sqr5'] = ParkRoad['size_infl_factor']**(1/5)
                
        # Merge fake UGS entry points if within X meters of each other for better system performance
        # Standard no merging
        ParkRoad = simplify_UGS_entry(ParkRoad, entry_point_merge = 0)
                
        ParkRoads.append(ParkRoad)

        print(cities[j].rsplit(',')[0],'100 % done', 
                                  round((time.time() - start_time) / 60,2),' mns')
        
    return(ParkRoads)

In [15]:
# Block 5.5 (not in use, buffer is 0, thus retains all the park entry points as is)
def simplify_UGS_entry(fake_UGS_entry, entry_point_merge = 0):
    # Get buffer of nodes close to each other.
    # Get the buffer
    ParkComb = fake_UGS_entry
    ParkComb['geometry_m_buffer'] = ParkComb['geometry_m'].buffer(entry_point_merge)

    # Get and merge components
    M = libpysal.weights.fuzzy_contiguity(ParkComb['geometry_m_buffer'])
    ParkComb['components'] = M.component_labels

    # Take centroid of merged components
    centr = gpd.GeoDataFrame(ParkComb, geometry = 'geometry_x', crs = 4326).dissolve('components')['geometry_x'].centroid
    centr = gpd.GeoDataFrame(centr)
    centr.columns = ['comp_centroid']

    # Get node closest to the centroid of all merged nodes, which accesses the road network.
    ParkComb = pd.merge(ParkComb, centr, left_on = 'components', right_index = True)
    ParkComb['centr_dist'] = ParkComb['geometry_x'].distance(ParkComb['comp_centroid'])
    ParkComb = ParkComb.iloc[ParkComb.groupby('components')['centr_dist'].idxmin()]
    return(ParkComb)

In [16]:
# Block 6 grid-parkentry combinations within euclidean threshold distance
def suitible_combinations(UGS_entry, pop_grids, road_nodes, thresholds, cities, chunk_size = 10000000):
    print('get potential (Euclidean) suitible combinations')
    start_time = time.time()
    RoadComb = list()
    for l in range(len(cities)):
        #blockA = block_combinations
        print(cities[l])
        len1 = len(pop_grids[l])
        len2 = len(UGS_entry[l])

        # Reduce the size of combinations per iteration
        len4 = 1
        len5 = len1 * len2
        blockC = len5
        while blockC > chunk_size:
            blockC = len5 / len4
            #print(blockC, len4)
            len4 = len4+1

        # Amount of grids taken per iteration block
        block = round(len1 / len4)

        output = pd.DataFrame()
        # Checking all the combinations at once is too performance intensive, it is broken down per 1000 (or what you want)
        for i in range(len4):
            # Check all grid-park combinations per block
            l1, l2 = range(i*block,(i+1)*block), range(0,len2)
            listed = pd.DataFrame(list(product(l1, l2)))

            # Merge grid and park information
            grid_merged = pd.merge(listed, 
                                   pop_grids[l][['grid_lon','grid_lat','centroid','centroid_m']],
                                   left_on = 0, right_index = True)
            node_merged = pd.merge(grid_merged, 
                                   UGS_entry[l][['Park_No','osmid','geometry_x','geometry_y','geometry_m','park_lon','park_lat',
                                       'size_infl_sqr2','size_infl_sqr3','size_infl_sqr5','share_walked','park_area','walk_area']], 
                                   left_on = 1, right_index = True)

            # Preset index for merging
            node_merged['key'] = range(0,len(node_merged))
            node_merged = node_merged.set_index('key')
            node_merged = node_merged.loc[:, ~node_merged.columns.isin(['index'])]

            # Create lists for better computational performance
            glon = list(node_merged['grid_lon'])
            glat = list(node_merged['grid_lat'])
            plon = list(node_merged['park_lon'])
            plat = list(node_merged['park_lat'])
            infl2 = list(node_merged['size_infl_sqr2'])
            infl3 = list(node_merged['size_infl_sqr3'])
            infl5 = list(node_merged['size_infl_sqr5'])

            # Get the euclidean distances
            mat = list()
            mat2 = list()
            mat3 = list()
            mat4 = list()
            for j in range(len(node_merged)):
                mat.append(math.sqrt(abs(plon[j] - glon[j])**2 + abs(plat[j] - glat[j])**2))
                mat2.append(math.sqrt(abs(plon[j] - glon[j])**2 + abs(plat[j] - glat[j])**2) / infl2[j])
                mat3.append(math.sqrt(abs(plon[j] - glon[j])**2 + abs(plat[j] - glat[j])**2) / infl3[j])
                mat4.append(math.sqrt(abs(plon[j] - glon[j])**2 + abs(plat[j] - glat[j])**2) / infl5[j])

            # Check if distances are within 1000m and join remaining info and concat in master df per 1000.
            mat_df = pd.DataFrame(mat3)[(np.array(mat) <= np.max(thresholds)) | 
                                        (np.array(mat2) <= np.max(thresholds)) | 
                                        (np.array(mat3) <= np.max(thresholds)) | 
                                        (np.array(mat4) <= np.max(thresholds))]

            # join the other gravity euclidean scores and other information
            mat_df = mat_df.join(pd.DataFrame(mat), lsuffix='_infl', rsuffix='_entr', how = 'left')
            mat_df = mat_df.join(pd.DataFrame(mat2), lsuffix='_entry', rsuffix='_pwr', how = 'left')
            mat_df = mat_df.join(pd.DataFrame(mat4), lsuffix='_pwr', rsuffix='_root', how = 'left')
            mat_df.columns = ['size_infl_eucl2','raw euclidean','size_infl_eucl3','size_infl_eucl5']    
            mat_df = mat_df.join(node_merged)

            output = pd.concat([output, mat_df])

            print('chunk',(i+1),'/',len4,len(mat_df),'suitible comb.')
        # Renaming columns
        print('total combinations within distance',len(output))

        output.columns = ['size_infl_eucl3','raw euclidean','size_infl_eucl2','size_infl_eucl5',
                          'Grid_No','Park_entry_No','grid_lon','grid_lat','Grid_coords_centroid','Grid_m_centroid',
                          'Park_No','Parkroad_osmid','Park_geom','Parkroad_coords_centroid','Parkroad_m_centroid',
                          'park_lon','park_lat','size_infl_sqr2','size_infl_sqr3','size_infl_sqr5',
                          'parkshare_walked','park_area','walk_area_m2']

        output = output[['raw euclidean','size_infl_eucl2','size_infl_eucl3','size_infl_eucl5',
                         'Grid_No','Park_entry_No','Grid_coords_centroid','Grid_m_centroid',
                          'Park_No','Parkroad_osmid','Park_geom','Parkroad_coords_centroid','Parkroad_m_centroid',
                         'walk_area_m2','size_infl_sqr2','size_infl_sqr3','size_infl_sqr5']]

        # Reinstate geographic elements
        output = gpd.GeoDataFrame(output, geometry = 'Grid_coords_centroid', crs = 4326)
        output['Grid_m_centroid'] = gpd.GeoSeries(output['Grid_m_centroid'], crs = 3043)
        output['Parkroad_coords_centroid'] = gpd.GeoSeries(output['Parkroad_coords_centroid'], crs = 4326)
        output['Parkroad_m_centroid'] = gpd.GeoSeries(output['Parkroad_m_centroid'], crs = 3043)

        # Get the nearest entrance point for the grid centroids
        output = gridroad_entry(output, road_nodes[l])

        print('100 % gridentry done', round((time.time() - start_time) / 60,2),' mns')
        RoadComb.append(output)
    return (RoadComb)

In [17]:
def gridroad_entry (suitible_comb, road_nodes):    
    start_time = time.time()
    mat5 = list()
    for i in range(len(suitible_comb)):
        try:
            nearest = int(road_nodes['geometry'].sindex.nearest(suitible_comb['Grid_coords_centroid'].iloc[i])[1])
            mat5.append(road_nodes['osmid_var'].iloc[nearest])
        except: 
            # sometimes two nodes are the exact same distance, then the first in the list is taken.
            nearest = int(road_nodes['geometry'].sindex.nearest(suitible_comb['Grid_coords_centroid'].iloc[i])[1][0])
            mat5.append(road_nodes['osmid_var'].iloc[nearest])
        if i % 250000 == 0: print(round(i/len(suitible_comb)*100,1),'% gridentry done', round((time.time() - start_time) / 60,2),' mns')
    # format resulting dataframe
    suitible_comb['grid_osm'] = mat5
    suitible_comb = pd.merge(suitible_comb, road_nodes['geometry'], left_on = 'grid_osm', right_index = True)
    suitible_comb['geometry_m'] = gpd.GeoSeries(suitible_comb['geometry'], crs = 4326).to_crs(3043)
    suitible_comb = suitible_comb.reset_index()
    return(suitible_comb)

In [1]:
def grids_in_UGS (suitible_comb, UGS, pop_grid): 
    print('grids in UGS')
    start_time = time.time()
    RoadInOut = list()
    for i in range(len(suitible_comb)):
        gridUGS = pop_grid[i]['centroid'].intersection(UGS[i].dissolve().geometry[0]).is_empty == False
        gridUGS.name = 'in_out_UGS'
        merged = pd.merge(suitible_comb[i], gridUGS, left_on = 'Grid_No', right_index = True)
        RoadInOut.append(merged)
        print(i)
    return(RoadInOut) 

In [19]:
# Block 7 calculate route networks of all grid-parkentry combinations within euclidean threshold distance
def route_finding (graphs, combinations, road_nodes, road_edges, cities, block_size = 250000, nn_iter = 10):
    warnings.filterwarnings("ignore")
    start_time = time.time()
    
    print('comb. by city')
    for n in enumerate(cities): # Know how much comb. need to be calculcated.
        print(n[1], len(combinations[n[0]]))
    print(' ')
    
    Routes = list()
    Routes_detail = list()
    for j in range(len(cities)):
        suit_raw = combinations[j]

        In_UGS = suit_raw[suit_raw['in_out_UGS'] == True] # Check if a grid centroid is in an UGS
        suitible = suit_raw[suit_raw['in_out_UGS'] == False].reset_index(drop = True) # recreate a subsequential index
        
        len2 = int(np.ceil(len(suitible)/block_size)) # get number of blocks (chunks)
        Route_parts = pd.DataFrame()
        Route_dparts = pd.DataFrame()

        # Divide in chunks of block for computational load
        for k in range(len2):    
            suitible_chunk = suitible.iloc[k*block_size:k*block_size+block_size] # Get block ids

            parknode = list(suitible_chunk['Parkroad_osmid']) # UGS road entry ids
            gridnode = list(suitible_chunk['grid_osm']) # grid centroid road entry ids

            s_mat = list([]) # osmid from
            s_mat1 = list([]) # osmid to
            s_mat2 = list([]) # route id
            s_mat3 = list([]) # step id
            s_mat4 = list([]) # way calculated
            s_mat5 = list([]) # way calculated id
            mat_nn = [] # sums number of routes containing nearest nodes.
            len1 = len(suitible_chunk)

            print(cities[j].rsplit(',')[0], k+1,'/',len2, 
                  'range',k*block_size,'-',k*block_size+np.where(k*block_size+block_size >= len1,len1,block_size))
            
            for i in range(len(suitible_chunk)):
                try:
                    shortest = nx.shortest_path(graphs[j], gridnode[i], parknode[i], 'travel_dist', method = 'dijkstra')
                    s_mat.append(shortest)
                    shortest_to = list(shortest[1:len(shortest)])
                    shortest_to.append(-1)
                    s_mat1.append(shortest_to)
                    s_mat2.append(list(np.repeat(i+block_size*k, len(shortest))))
                    s_mat3.append(list(np.arange(0, len(shortest))))
                    s_mat4.append('normal way')
                    s_mat5.append(1)
                except:
                    try:
                        # Check the reverse
                        shortest = nx.shortest_path(graphs[j], parknode[i], gridnode[i], 'travel_dist', method = 'dijkstra')
                        s_mat.append(shortest)
                        shortest_to = list(shortest[1:len(shortest)])
                        shortest_to.append(-1)
                        s_mat1.append(shortest_to)
                        s_mat2.append(list(np.repeat(i+block_size*k, len(shortest))))
                        s_mat3.append(list(np.arange(0, len(shortest))))
                        s_mat4.append('reverse way')
                        s_mat5.append(0)
                    except:
                        # Otherwise the nearest node is taken, which is iterated X times at max, check assumptions, block #0 
                        nn_route_finding(graphs[j], suitible_chunk, road_nodes[j],
                                   s_mat, s_mat1, s_mat2, s_mat3, s_mat4, s_mat5, mat_nn, # matrice info see above
                                   it = i, block = k, block_size = block_size, 
                                         nn_iter = 10) # max nearest nodes to be found
                        
                if i % 10000 == 0: print(round((i+block_size*k)/len(suitible)*100,2),'% done',
                                         round((time.time() - start_time) / 60,2),'mns')
            print(len(mat_nn),'nearest nodes found')

            print(round((i+block_size*k)/len(suitible)*100,2),'% pathfinding done', round((time.time() - start_time) / 60,2),'mns')
            
            # Formats route information by route and step (detailed)
            routes = route_formatting(s_mat, s_mat1, s_mat2, s_mat3, road_edges[j])
            print('formatting done', round((time.time() - start_time) / 60,2), 'mns')
            
            # Summarizes information by route
            routes2 = route_summarization(routes, suitible_chunk, road_nodes[j], s_mat4, s_mat5)
            print('dissolving done', round((time.time() - start_time) / 60,2), 'mns')
            
            # Concats chunk with others already calculated
            Route_parts = pd.concat([Route_parts, routes2])
            Route_dparts = pd.concat([Route_dparts, routes])

        # Format grids in UGS to enable smooth df concat
        In_UGS = In_UGS.set_geometry(In_UGS['Grid_coords_centroid'])
        In_UGS = In_UGS[['geometry','Grid_No','grid_osm','Park_No','Park_entry_No','Parkroad_osmid',
                                   'Grid_m_centroid','walk_area_m2','size_infl_sqr2','size_infl_sqr3','size_infl_sqr5',
                                   'raw euclidean','geometry_m']]

        In_UGS['realG_osmid'] = suit_raw['Parkroad_osmid']
        In_UGS['realP_osmid'] = suit_raw['grid_osm']
        In_UGS['way_calc'] = 'grid in UGS'

        Route_parts = pd.concat([Route_parts,In_UGS])
        Route_parts = Route_parts.reset_index(drop = True)

        Route_parts['gridpark_no'] = Route_parts['Grid_No'].astype(str) +'-'+ Route_parts['Park_No'].astype(str)

        # All fill value 0 because no routes are calculated for grid centroids in UGSs
        to_fill = ['way-id','route_cost','steps','real_G-entry','raw_Tcost','grav2_Tcost','grav3_Tcost','grav5_Tcost']                                   
        Route_parts[to_fill] = Route_parts[to_fill].fillna(0)  

        Routes.append(Route_parts)
        Routes_detail.append(Route_dparts)
    return({'route summary':Routes,'route detail':Routes_detail})

In [20]:
def nn_route_finding (Graph, comb, nodes, 
                mat_from, mat_to, mat_route, mat_step, mat_way, mat_wbin, mat_nn, 
                it, block, block_size = 250000, nn_iter = 10):
    # Order in route for nearest node:
    # 1. gridnode to nearest to the original failed parknode
    # 2. The reverse of 1.
    # 3. nearest gridnode to the failed one and route to park
    # 4. The reverse of 3.
    
    len3 = 0
    alt_route = list([])
    
    gosm = comb['grid_osm'] # grid osmids (origin)
    posm = comb['Parkroad_osmid'] # UGS osmids (destination)
    node = nodes['geometry'] # road node geoms
    node_osm = nodes['osmid_var'] # road node osmids
    
    while len3 < nn_iter and len(alt_route) < 1: # continue if no more than 10 nearest nodes or if a route is found
        
        len3 = len3 +1
        # Finds nearest node per iteration.
        nn = nn_finding(gosm, posm, node, node_osm, it, len3)
        
         # routing within graph and current and found nearest nodes of grids and UGS
        nn_routing(Graph, nn['curr_park'], nn['near_park'], nn['curr_grid'], nn['near_grid'],
                        mat_way, mat_wbin, alt_route, len3)
        
    if len(alt_route) == 0:
        alt = alt_route 
    else: 
        alt = alt_route[0]
    len4 = len(alt)
    if len4 > 0: # If a route is found append
        mat_nn.append(it+block_size*block)
        mat_from.append(alt)
        shortest_to = list(alt[1:len(alt)])
        shortest_to.append(-1)
        mat_to.append(shortest_to)
        mat_route.append(list(np.repeat(it+block_size*block,len4)))
        mat_step.append(list(np.arange(0, len4)))
    else: # if no route is found fill values.
        mat_from.append(-1)
        mat_to.append(-1)
        mat_route.append(it+block_size*block)
        mat_step.append(-1)
        mat_way.append('no way')
        mat_wbin.append(2)
        print('index',it+block_size*block,'No route')

In [21]:
def nn_finding (grid_osmid, UGS_osmid, node_geom, node_osmid, it, nn_i):
    # Grid nearest
    g_geom = node_geom[node_osmid == int(grid_osmid[it:it+1])] # Get current grid road entry geometry
    g_nearest = pd.DataFrame((abs(float(g_geom.x) - node_geom.x)**2 # Find nearest.
                              +abs(float(g_geom.y) - node_geom.y)**2)**(1/2)
                            ).join(node_osmid).sort_values(0)

    g_grid = g_nearest.iloc[nn_i,1] # Take '1' because 0 will get the current node with distance 0.
    g_park = list(UGS_osmid)[it]

    p_geom = node_geom[node_osmid == int(UGS_osmid[it:it+1])] # Get current UGS raod entry geometry
    p_nearest = pd.DataFrame((abs(float(p_geom.x) - node_geom.x)**2 # Find nearest
                              +abs(float(p_geom.y) - node_geom.y)**2)**(1/2)
                            ).join(node_osmid).sort_values(0)

    p_grid = list(grid_osmid)[it]
    p_park = p_nearest.iloc[nn_i,1] # Take '1' because 0 will get the current node with distance 0.
    
    return({'curr_park':p_grid, 'near_park':p_park, 'curr_grid':g_park, 'near_grid':g_grid}) # return as dict

In [22]:
def nn_routing (Graph, curr_park, near_park, curr_grid, near_grid, mat_way, mat_wbin, found_route, nn_i):
    try: # First try from current grid to nearest UGS id.
        found_route.append(nx.shortest_path(Graph, curr_park, near_park, 
                                          'travel_dist', method = 'dijkstra'))
        mat_way.append(str(nn_i)+'grid > n-park')
        mat_wbin.append(1)
    except:
        try: # Else try the reverse.
            found_route.append(nx.shortest_path(Graph, near_park, curr_park, 
                                              'travel_dist', method = 'dijkstra'))
            mat_way.append(str(nn_i)+'n-park > grid')
            mat_wbin.append(0)
        except:
            try: # If no success try from current UGS id to nearest grid id
                found_route.append(nx.shortest_path(Graph, near_grid, curr_grid, 
                                                  'travel_dist', method = 'dijkstra'))
                mat_way.append(str(nn_i)+'n-grid > park')
                mat_wbin.append(1)
            except:
                try: # Else try the reverse
                    found_route.append(nx.shortest_path(Graph, curr_grid, near_grid, 
                                                      'travel_dist', method = 'dijkstra'))
                    mat_way.append(str(nn_i)+'park > n-grid')
                    mat_wbin.append(0)
                except: # if no routes are found pass.
                    pass

In [23]:
def route_formatting(mat_from, mat_to, mat_route, mat_step, road_edges):
    # Unpack lists
    s_mat_u = [i for b in map(lambda x:[x] if not isinstance(x, list) else x, mat_from) for i in b]
    s_mat_u1 = [i for b in map(lambda x:[x] if not isinstance(x, list) else x, mat_to) for i in b]
    s_mat_u2 = [i for b in map(lambda x:[x] if not isinstance(x, list) else x, mat_route) for i in b]
    s_mat_u3 = [i for b in map(lambda x:[x] if not isinstance(x, list) else x, mat_step) for i in b]

    # Format df
    routes = pd.DataFrame([s_mat_u,s_mat_u1,s_mat_u2,s_mat_u3]).transpose()
    routes.columns = ['from','to','route','step']
    mat_key = list([])
    for n in range(len(routes)):
        mat_key.append(str(int(s_mat_u[n])) + '-' + str(int(s_mat_u1[n])))
    routes['key'] = mat_key
    routes = routes.set_index('key')

    # Add route information
    routes = routes.join(road_edges, how = 'left')
    routes = gpd.GeoDataFrame(routes, geometry = 'geometry', crs = 4326)
    routes = routes.sort_values(by = ['route','step'])
    return(routes)

In [24]:
def route_summarization(routes, suitible_comb, road_nodes, mat_way, mat_wbin):
    # dissolve route
    routes2 = routes[['route','geometry']].dissolve('route')

    # get used grid- and parkosm. Differs at NN-route.
    route_reset = routes.reset_index()
    origin = route_reset['from'].iloc[list(route_reset.groupby('route')['step'].idxmin()),]
    origin = origin.reset_index().iloc[:,-1]
    dest = route_reset['from'].iloc[list(route_reset.groupby('route')['step'].idxmax()),]
    dest = dest.reset_index().iloc[:,-1]

    # grid > park = 1, park > grid = 0, no way = 2, detailed way in way_calc.
    routes2['way-id'] = mat_wbin
    routes2['realG_osmid'] = np.where(routes2['way-id'] == 1, origin, dest)
    routes2['realP_osmid'] = np.where(routes2['way-id'] == 1, dest, origin)
    routes2['way_calc'] = mat_way

    # get route cost, steps, additional information.
    routes2['route_cost'] = routes.groupby('route')['length'].sum()
    routes2['steps'] = routes.groupby('route')['step'].max()
    routes2['index'] = suitible_comb.index
    routes2 = routes2.set_index(['index'])
    routes2.index = routes2.index.astype(int)
    routes2 = pd.merge(routes2, suitible_comb[['Grid_No','grid_osm','Park_No','Park_entry_No','Parkroad_osmid',
                                          'Grid_m_centroid','walk_area_m2','size_infl_sqr2','size_infl_sqr3',
                                          'size_infl_sqr5','raw euclidean']],
                                            left_index = True, right_index = True)
    routes2 = pd.merge(routes2, road_nodes['geometry_m'], how = 'left', left_on = 'realG_osmid', right_index = True)
    # calculate distance of used road-entry for grid-centroid.
    routes2['real_G-entry'] = round(gpd.GeoSeries(routes2['Grid_m_centroid'], crs = 3043).distance(routes2['geometry_m']),3)
                                    
    # Calculcate total route cost for the four gravity variants
    routes2['raw_Tcost'] = routes2['route_cost'] + routes2['real_G-entry']
    routes2['grav2_Tcost'] = (routes2['route_cost'] + routes2['real_G-entry']) / routes2['size_infl_sqr2']
    routes2['grav3_Tcost'] = (routes2['route_cost'] + routes2['real_G-entry']) / routes2['size_infl_sqr3']
    routes2['grav5_Tcost'] = (routes2['route_cost'] + routes2['real_G-entry']) / routes2['size_infl_sqr5']
    return(routes2)

In [25]:
# Block 8 determine best parkentry points from each grid, then calculate grid scores
# and finally aggregate city access in categories (high, medium, low and no access)
def grid_score_summary (routes, cities, pop_grids, ext = '', grid_size = 100, save_path = 'C:/Dumps/GEE-WP Scores/Gravity/'):
    start_time = time.time()
    popg_acc = pd.DataFrame()
    grid_scores = list([])
    gridpark = list([])
    for n in range(len(cities)):    
        print(cities[n])

        # For the four distance decay variants regarding park size.
        l1 = list(['raw','grav2','grav3','grav5'])
        m1 = list(['entrance','gravity**(1/2)','gravity**(1/3)','gravity**(1/5)'])
        grid_score = list([])
        gridparks = list([])
        gridpark.append(gridparks)
        popgrid_access = pd.DataFrame()
        for i in range(len(l1)):
            # Get the lowest indices grouped by a key consisting of grid no and park no (best entry point from a grid to a park)
            var_best_routes = best_gridpark_comb (routes[n], l1[i], pop_grids[n])

            grdsc = pd.DataFrame()
            gridsc = pd.DataFrame()
            print(m1[i], round((time.time() - start_time) / 60,2), 'mns')

            # For each threshold given, calculate a score
            for k in range(len(thresholds)):
                
                t = thresholds[k]
                score = 'tr_'+ str(t)
                scores = determine_scores(var_best_routes, pop_grids[n], thresholds[k], l1[i], cities[n], 
                                          save_path, grid_size = 100)
                
                grdsc = pd.concat([grdsc, scores['score_w_route']], axis = 1)
                gridsc = pd.concat([gridsc, scores['grid_score']])
                                
                # Group according to the categories just created and sum the populations living in those grids
                popgacc = pd.DataFrame()
                popgacc[m1[i]+'_'+str(t)] = scores['score_w_route'].groupby(score+'_access')['population'].sum()
                popgrid_access = pd.concat([popgrid_access, popgacc],axis=1)   

                print('grid ',t)

            grid_score.append(grdsc)

            gridsc = gridsc.join(pop_grids[n]['geometry'])
            gridsc = gpd.GeoDataFrame(gridsc, geometry = 'geometry', crs = 4326)

            if not os.path.exists(save_path+str(grid_size)+'m grids/Grid_geoms/'):
                os.makedirs(save_path+str(grid_size)+'m grids/Grid_geoms/')

            gridsc.to_file(save_path+str(grid_size)+'m grids/Grid_geoms/gridscore_'+ l1[i] + '_' + cities[n] + '.gpkg')

            # Detailed scores to files number of cities * ways to measure = number of files.
            # Different threshold-scores are in the same dataframe
            gridsc = gridsc.loc[:, gridsc.columns!='geometry']

            if not os.path.exists(save_path+str(grid_size)+'m grids/Grid_csv/'):
                os.makedirs(save_path+str(grid_size)+'m grids/Grid_csv/')

            gridsc.to_csv(save_path+str(grid_size)+'m grids/Grid_csv/gridscore_'+ l1[i] + '_' + cities[n] + '.csv')
            gridparks.append(var_best_routes)

        grid_scores.append(grid_score)

        # For each city, divide the population access by group by the total to get its share.
        popgrid_access = popgrid_access / popgrid_access.sum()
        popgrid_access = pd.DataFrame(popgrid_access.unstack())
        popg_acc = pd.concat([popg_acc, popgrid_access], axis = 1)

        print(cities[n],'done', round((time.time() - start_time) / 60,2), 'mns')
    popg_acc.columns = cities
    popg_acc.to_csv(save_path+str(grid_size)+'m grids/popgrid_access.csv')
    return(popg_acc)    


In [26]:
def best_gridpark_comb (routes, var_abbr, pop_grid):
    Rclean = routes[routes['way_calc'] != 'no way'].reset_index()
    str1 = 'gridpark_' + var_abbr
    locals()[str1] = Rclean.iloc[Rclean.groupby('gridpark_no')[(str(var_abbr) +'_Tcost')].idxmin()]  

    # Get grid information
    locals()[str1] = pd.merge(locals()[str1], pop_grid[['population','geometry']],
                            left_on = 'Grid_No', right_index = True, how = 'outer')
    locals()[str1] = locals()[str1].reset_index()

    # formatting
    locals()[str1]['Park_No'] = locals()[str1]['Park_No'].fillna(-1)
    locals()[str1]['Park_No'] = locals()[str1]['Park_No'].astype(int)
    locals()[str1]['Park_entry_No'] = locals()[str1]['Park_entry_No'].fillna(-1)
    locals()[str1]['Park_entry_No'] = locals()[str1]['Park_entry_No'].astype(int)
    return(locals()[str1])

In [27]:
def determine_scores(var_df, pop_grid, thresholds, var_abbr, city, save_path, grid_size = 100):
    t = thresholds
    str2 = str(t)
    score = 'tr_'+ str2

    #Only get routes within the threshold given (it loops over every threshold) and calculate the scores
    thold = var_df[var_df[var_abbr + '_Tcost'] <= t]
    thold[score] = t - thold[var_abbr + '_Tcost']
    thold['pop' + score] = thold[score] * thold['population']
    thold['walk_area_ha' + str2] = var_df['walk_area_m2'] /10000
    thold['walkha_person' + str2] = thold['population'] / thold['walk_area_ha' + str2]

    # Join the gridpark information from before.
    var_df = var_df.join(thold[[score,'pop' + score,'walk_area_ha' + str2, 'walkha_person' + str2]])
    # get the grid_scores
    gs = pd.DataFrame()
    gs[[score,'pop_' + score,'walkha_' + str2]] = var_df.groupby(
            'Grid_No')[score,'pop' + score, 'walk_area_ha' + str2].sum()

    gs['walkha_person_' + score] = var_df.groupby('Grid_No')['walkha_person' + str2].mean()

    trstr = var_df[var_df[score] > 0]
    gs[score + '_parks'] = trstr.groupby('Grid_No')['gridpark_no'].count()

    # Add the routes as a dissolved line_geom
    gs[score + '_routes'] = gpd.GeoDataFrame(trstr[['Grid_No','geometry_x']],
                                                  geometry = 'geometry_x', crs = 4326).dissolve('Grid_No')

    # Add parks which grids have access to with its closest access point
    gs[score+'Park:entry'] = trstr[trstr['Park_No'] >=0].groupby('Grid_No')['Park_No'].apply(list).astype(str
    ) + ':' + trstr[trstr['Park_entry_No'] >=0].groupby('Grid_No')['Park_entry_No'].apply(list).astype(str)
                
    # determine the thresholds category-score. 
    # High >= threshold (perfect score to one park), medium is above half perfect, 
    # low is below this and no is no access to a park for a certain grid within the threshold given
    gs[score+'_access'] = np.select([gs[score] >= t, (gs[score] < t) & (
    gs[score]>= t/2), (gs[score] < t/2) & (gs[score]> 0), gs[score] <= 0],
          ['1 high','2 medium','3 low','4 no'])
    gs = gs.join(pop_grid['population'], how = 'outer')
            
    gs = gpd.GeoDataFrame(gs, geometry = score + '_routes', crs = 4326)
            
    if not os.path.exists(save_path+str(grid_size)+'m grids/Grid_lines/'):
        os.makedirs(save_path+str(grid_size)+'m grids/Grid_lines/')
                
    gs.to_file(save_path+str(grid_size)+'m grids/Grid_lines/gridscore_'+ var_abbr + '_' + str2 + '_' + city + '.gpkg')
            
    gsc = gs.loc[:,~gs.columns.isin([score + '_routes'])]

    return({'grid_score':gsc,'score_w_route':gs})