In [1]:
# Import packages
import geopandas as gpd
import numpy as np
import pandas as pd
import libpysal
import networkx as nx
import osmnx as ox
import time
import os
from shapely import geometry
from shapely.geometry import Point, MultiLineString, LineString, Polygon, MultiPolygon
from shapely.ops import nearest_points, polygonize
import shapely
from itertools import product, combinations
import math
import warnings
import socket
from wpgpDownload.utils.dl import wpFtp
from wpgpDownload.utils.isos import Countries
from wpgpDownload.utils.convenience_functions import download_country_covariates as dl
from wpgpDownload.utils.wpcsv import Product
import georasters as gr
from wpgpDownload.utils.convenience_functions import refresh_csv

In [9]:
# Block 0 cities and assumptions

start = time.time()

cities = ['Dhaka Metropolitan']

# idea to convert to dask-pandas and dask-geopandas
# https://towardsdatascience.com/pandas-with-dask-for-an-ultra-fast-notebook-e2621c3769f
# Or with Koalas (Spark-like pandas)

# Assumptions
thresholds = [300, 600, 1000] # route threshold in metres. WHO guideline speaks of access within 300m

In [10]:
# 1. Required preprocess for information extraction

# Let's ignore depreciation warnings
warnings.filterwarnings("ignore")

# Get the city boundaries
bound_df = ox.geocoder.geocode_to_gdf(cities) # gets city boundaries from OSM

# Get unique iso-codes of selected cities (only load country raster once)
unique_iso = iso_countries(bound_df, # Finding the country of the bounded city
                           cities)
print(' ')

print('downloaded:')
# Get raster of countries (if automatic download is preferred (standard))
raster = countries_grids(unique_iso,
                         r'D:\Dumps\WorldPoP_Grids') # custom path, where grid files can be stored without downloading

if prefer dwnl from terminal: 
wpgpDownload download -i BGD --id 5004
 
downloaded:
BGD downloaded 0.01 mns


In [11]:
# 2. Information extraction

# Clip cities from countries, format population grids
population_grids = city_grids_format(bound_df, # city boundaries
                                     unique_iso,
                                     raster, # country raster
                                     cities, 
                                     grid_size = 100)
print(' ')

# Get road networks
road_networks = road_networks(cities, # Get 'all' (drive,walk,bike) network
                                 thresholds,
                                 undirected = True)

# Road network returns a dict of keys consisting of graphs, nodes, edges and edges_full
road_networks.keys()

print(' ')
# Extracting UGS
UGS = urban_greenspace(cities, 
                       thresholds,
                       one_UGS_buf = 25, # buffer at which UGS is seen as one
                       min_UGS_size = 400) # WHO sees this as minimum UGS size (400m2)

100m resolution grids extraction
Dhaka Metropolitan 0.13 mns
 
get road networks from OSM
Dhaka Metropolitan done 0.86 mns
 
get urban greenspaces from OSM
Dhaka Metropolitan done


In [32]:
# 3. Preprocess information for route finding

# Get fake entry points (between UGS and buffer limits)
UGS_entry = UGS_fake_entry(UGS, 
                           road_networks['nodes'], 
                           cities, 
                           UGS_entry_buf = 25, # road nodes within 25 meters are seen as fake entry points
                           walk_radius = 500, # assume that the average person only views a UGS up to 500m in radius
                                                # more attractive
                           entry_point_merge = 0) # merges closeby fake UGS entry points within X meters 
                                                    # what may be done for performance
print(' ')
# Checks all potential suitible combinations (points that fall within max threshold Euclidean distance from the ego)
suitible = suitible_combinations(UGS_entry, 
                                 population_grids, 
                                 road_networks['nodes'], 
                                 thresholds,
                                 cities,
                                 chunk_size = 10000000) # calculating per chunk of num UGS entry points * num pop_grids
                                                        # Preventing normal PC meltdown, set lower if PC gets stuck
print(' ')
# Checks if grids are already in a UGS
suitible_InOut_UGS = grids_in_UGS (suitible, UGS, population_grids)

get fake UGS entry points
Dhaka Metropolitan 0.0 % done 0.0  mns
Dhaka Metropolitan 59.5 % done 0.21  mns
Dhaka Metropolitan 100 % done 0.36  mns
 
get potential (Euclidean) suitible combinations
Dhaka Metropolitan
chunk 1 / 3 21890 suitible comb.
chunk 2 / 3 25070 suitible comb.
chunk 3 / 3 5533 suitible comb.
total combinations within distance 52493
0.0 % gridentry done 0.0  mns
100 % gridentry done 1.1  mns
 
Check grids within UGS
0 0.0  mns
100 0.25  mns


In [36]:
UGS_entry[0]['park_size_walkable'].to_crs(3043).area.mean()

77999.2338892479

In [33]:
# 4. Finding shortest routes.

Routes = route_finding (road_networks['graphs'], # graphs of the road networks
               suitible_InOut_UGS, # potential suitible routes with grid-UGS comb. separated in or out UGS.
               road_networks['nodes'], 
               road_networks['edges'], 
               cities, 
               block_size = 250000, # Chunk to spread dataload.
               nn_iter = 10) # max amount of nearest nodes to be found (both for UGS entry and grid-centroid road entries)

comb. by city
Dhaka Metropolitan 52493
 
Dhaka Metropolitan 1 / 1 range 0 - 50265
0.0 % done 0.0 mns
19.89 % done 0.06 mns
39.79 % done 0.13 mns
59.68 % done 0.21 mns
79.58 % done 0.4 mns
99.47 % done 0.47 mns
0 nearest nodes found
100.0 % pathfinding done 0.48 mns
formatting done 0.71 mns
dissolving done 0.98 mns


In [37]:
# 5. summarize scores
grid_scores = grid_score_summary (Routes['route summary'], # Shortest routes by the Dijkstra algorithm, with gravity variant distance adj.
                                  cities, 
                                  population_grids, 
                                  ext = '', # At multiple runs, the extention prevents the summarized file to be overwritten.
                                  grid_size = 100) # Size of the grid in meters
grid_scores

Dhaka Metropolitan
entrance 0.13 mns


KeyboardInterrupt: 

In [2]:
def iso_countries(bounds, cities):
    # bound_df = ox.geocoder.geocode_to_gdf(cities)
    # The 'Countries' is a list of iso-countries and descriptions from the package wpgpDownload.utils.isos
    C = pd.DataFrame(Countries)
    start_time = time.time()
    iso_countries = []
    print('if prefer dwnl from terminal: ')
    
    # Check the display name in the city boundaries to get the country name (enabling only specifying city in front)
    for i in bounds['display_name']:
        country = i.rsplit(',')[-1][1:]
        iso = C[C['name'] == country].iloc[0,1]
        # Get unique ISO countries, so all country-grids are only loaded once
        if iso not in iso_countries:
            iso_countries.append(iso)
            
            # List data and extract raster file download string with 2020 population (if download manually is preferred)
            products = Product(iso)
            Results = products.description_contains('people per grid-cell 2020')
            list1 = []
            for p in Results:
                prints = '%s/%s\t%s\t%s' % (p.idx, p.country_name,p.dataset_name,p.path)
                list1.append(prints)
            print('wpgpDownload download -i',iso,'--id',list1[0].split("\t")[0].split('/')[0])
    
    return(iso_countries)

In [3]:
def countries_grids(iso_countries, download_dir = ' '):
    start_time = time.time()
    blocks = []
    for iso in iso_countries:
        # Check if raster files already exist on the system path or a manually specified path
        path1 = os.getcwd() +'\\'+ iso.lower() + '_ppp_2020.tif'
        path2 = download_dir +'\\'+ iso.lower() + '_ppp_2020.tif'
        # First check the manual path
        if os.path.exists(path2): 
            block = gr.from_file(path2)
            blocks.append(block)
        else:
            # Then the system path
            if os.path.exists(path1): 
                block = gr.from_file(path1)
                blocks.append(block)
            else:
                # Otherwise run a suprocess (spr.run) command to download via the terminal in notebook.
                runstr = 'wpgpDownload download -i '+ iso+ ' -f people --datasets'
                p1 = spr.run('wpgpDownload download -i '+ iso+ ' -f people --datasets', 
                                    shell = True, 
                                    capture_output = True)
                # decode the output to a list of available datasets from WorldPoP
                datasets = p1.stdout.decode().rsplit('\n')

                # The first population raster grid (id-sorted) is the general one, without specifying to demographic groups
                for i in enumerate(datasets):
                    if '2020' in i[1]:
                        ds = datasets[i[0]].rsplit('\t')[0]
                        print(ds)
                        # if we found the file, we can stop the loop (we don't need the demograhically specified files)
                        break
                # Construct the download string
                dwnl = 'wpgpDownload download -i '+iso+' --id '+str(ds)
                # Get the specified file (terminal)
                spr.run(dwnl, shell = True)
                # Extract the file
                block = gr.from_file(path1)
                blocks.append(block)
        print(iso,'downloaded', round((time.time() - start_time)/60,2),'mns')
    return(blocks)

In [4]:
# Block 2 population grids extraction
def city_grids_format(bounds, iso_countries, country_grids, cities, grid_size = 100):
    start_time = time.time()
    grids = []
    print(str(grid_size) + 'm resolution grids extraction')
    for i in range(len(cities)):
        C = pd.DataFrame(Countries)
        iso = C[bounds['display_name'][i].rsplit(',')[-1][1:] == C['name']].iloc[0,1]
        contains = [j for j, x in enumerate(iso_countries) if x == iso][0]

        # Clip the city from the country
        clipped = country_grids[contains].clip(bounds['geometry'][i])
        clipped = clipped[0].to_geopandas()

        # Get dissolvement_key for dissolvement. 
        clipped['row3'] = np.floor(clipped['row']/(grid_size/100)).astype(int)
        clipped['col3'] = np.floor(clipped['col']/(grid_size/100)).astype(int)
        clipped['dissolve_key'] = clipped['row3'].astype(str) +'-'+ clipped['col3'].astype(str)

        # Dissolve into block by block grids
        popgrid = clipped[['dissolve_key','geometry','row3','col3']].dissolve('dissolve_key')

        # Get those grids populations and area. Only blocks with population and full blocks
        popgrid['population'] = round(clipped.groupby('dissolve_key')['value'].sum()).astype(int)
        popgrid['area_m'] = round(gpd.GeoSeries(popgrid['geometry'], crs = 4326).to_crs(3043).area).astype(int)
        popgrid = popgrid[popgrid['population'] > 0]
        popgrid = popgrid[popgrid['area_m'] / popgrid['area_m'].max() > 0.95]

        # Get centroids and coords
        popgrid['centroid'] = popgrid['geometry'].centroid
        popgrid['centroid_m'] = gpd.GeoSeries(popgrid['centroid'], crs = 4326).to_crs(3043)
        popgrid['grid_lon'] = popgrid['centroid_m'].x
        popgrid['grid_lat'] = popgrid['centroid_m'].y
        popgrid = popgrid.reset_index()

        minx = popgrid.bounds['minx']
        maxx = popgrid.bounds['maxx']
        miny = popgrid.bounds['miny']
        maxy = popgrid.bounds['maxy']

        # Some geometries result in a multipolygon when dissolving (like i.e. 0.05 meters) which is in my mind an coords error
        # I therefore create one polygon
        Poly = []
        for k in range(len(popgrid)):
            Poly.append(Polygon([(minx[k],maxy[k]),(maxx[k],maxy[k]),(maxx[k],miny[k]),(minx[k],miny[k])]))
        popgrid['geometry'] = Poly

        grids.append(popgrid)

        print(cities[i].rsplit(',')[0], round((time.time() - start_time)/60,2),'mns')
    return(grids)

In [5]:
# Block 3 Road networks
def road_networks (cities, thresholds, undirected = False):
    print('get road networks from OSM')
    start_time = time.time()
    graphs = list()
    road_nodes = list()
    road_edges = list()
    road_conn = list()

    for i in cities:
        # Get graph, road nodes and edges
        graph = ox.graph_from_place(i, network_type = "all", buffer_dist = (np.max(thresholds)+1000))
        #graphs.append(graph)

        road_node, road_edge = ox.graph_to_gdfs(graph)

        # Road nodes format
        road_node = road_node.to_crs(4326)
        road_node['geometry_m'] = gpd.GeoSeries(road_node['geometry'], crs = 4326).to_crs(3043)
        road_node['osmid_var'] = road_node.index
        road_node = gpd.GeoDataFrame(road_node, geometry = 'geometry', crs = 4326)

        # format road edges
        road_edge = road_edge.to_crs(4326)
        road_edge['geometry_m'] = gpd.GeoSeries(road_edge['geometry'], crs = 4326).to_crs(3043)
        road_edge = road_edge.reset_index()
        road_edge.rename(columns={'u':'from', 'v':'to', 'key':'keys'}, inplace=True)
        road_edge['key'] = road_edge['from'].astype(str) + '-' + road_edge['to'].astype(str)
        
        if undirected == True:
            # Apply one-directional to both for walking
            both = road_edge[road_edge['oneway'] == False]
            one = road_edge[road_edge['oneway'] == True]
            rev = pd.DataFrame()
            rev[['from','to']] = one[['to','from']]
            rev = pd.concat([rev,one.iloc[:,2:]],axis = 1)
            edge_bidir = pd.concat([both, one, rev])
            edge_bidir = edge_bidir.reset_index()
            edge_bidir['oneway'] = False
        else:
            edge_bidir = road_edge

        # Exclude highways and ramps on edges    
        edge_filter = edge_bidir[(edge_bidir['highway'].str.contains('motorway') | 
              (edge_bidir['highway'].str.contains('trunk') & 
               edge_bidir['maxspeed'].astype(str).str.contains(
                   '40 mph|45 mph|50 mph|55 mph|60 mph|65|70|75|80|85|90|95|100|110|120|130|140'))) == False]
        road_edges.append(edge_filter)

        # Exclude isolated nodes
        fltrnodes = pd.Series(list(edge_filter['from']) + list(edge_filter['to'])).unique()
        newnodes = road_node[road_node['osmid_var'].isin(fltrnodes)]
        road_nodes.append(newnodes)

        # Get only necessary road connections columns for network performance
        road_con = edge_filter[['osmid','key','length','geometry']]
        road_con = road_con.set_index('key')

        road_conn.append(road_con)

        # formatting to graph again.
        newnodes = newnodes.loc[:, ~newnodes.columns.isin(['geometry_m', 'osmid_var'])]
        edge_filter = edge_filter.set_index(['from','to','keys'])
        edge_filter = edge_filter.loc[:, ~edge_filter.columns.isin(['geometry_m', 'key'])]

        graph2 = ox.graph_from_gdfs(newnodes, edge_filter)

        graphs.append(graph2)
        print(i.rsplit(',')[0], 'done', round((time.time() - start_time) / 60,2),'mns')
    return({'graphs':graphs,'nodes':road_nodes,'edges':road_conn,'edges long':road_edges})

In [13]:
# Block 4 city greenspace
def urban_greenspace (cities, thresholds, one_UGS_buf = 25, min_UGS_size = 400):
    print('get urban greenspaces from OSM')
    parks_in_range = list()
    for i in cities:
        gdf = ox.geometries_from_place(i, tags={'leisure':'park'}, buffer_dist = np.max(thresholds))
        gdf = gdf[(gdf.geom_type == 'Polygon') | (gdf.geom_type == 'MultiPolygon')]
        greenspace = gdf.reset_index()    
        warnings.filterwarnings("ignore")

        green_buffer = gpd.GeoDataFrame(geometry = greenspace.to_crs(3043).buffer(one_UGS_buf).to_crs(4326))
        greenspace['geometry_w_buffer'] = green_buffer
        greenspace['geometry_w_buffer'] = gpd.GeoSeries(greenspace['geometry_w_buffer'], crs = 4326)
        greenspace['geom buffer diff'] = greenspace['geometry_w_buffer'].difference(greenspace['geometry'])

        # This function group components in itself that overlap (with the buffer set of 25 metres)
        # https://stackoverflow.com/questions/68036051/geopandas-self-intersection-grouping
        W = libpysal.weights.fuzzy_contiguity(greenspace['geometry_w_buffer'])
        greenspace['components'] = W.component_labels
        parks = greenspace.dissolve('components')

        # Exclude parks below 0.04 ha.
        parks = parks[parks.to_crs(3043).area > min_UGS_size]
        print(i, 'done')
        parks = parks.reset_index()
        parks['geometry_m'] = parks['geometry'].to_crs(3043)
        parks_in_range.append(parks)
    return(parks_in_range)

In [14]:
# Block 5 park entry points
def UGS_fake_entry(UGS, road_nodes, cities, UGS_entry_buf = 25, walk_radius = 500, entry_point_merge = 0):
    print('get fake UGS entry points')
    start_time = time.time()
    ParkRoads = list()
    for j in range(len(cities)):
        ParkRoad = pd.DataFrame()
        mat = list()
        # For all
        for i in range(len(UGS[j])):
            dist = road_nodes[j]['geometry'].to_crs(3043).distance(UGS[j]['geometry'].to_crs(
                3043)[i])
            buf_nodes = road_nodes[j][(dist < UGS_entry_buf) & (dist > 0)]
            mat.append(list(np.repeat(i, len(buf_nodes))))
            ParkRoad = pd.concat([ParkRoad, buf_nodes])
            if i % 100 == 0: print(cities[j].rsplit(',')[0], round(i/len(UGS[j])*100,1),'% done', 
                                  round((time.time() - start_time) / 60,2),' mns')
        # Park no list conversion
        mat_u = [i for b in map(lambda x:[x] if not isinstance(x, list) else x, mat) for i in b]

        # Format
        ParkRoad['Park_No'] = mat_u
        ParkRoad = ParkRoad.reset_index()
        ParkRoad['park_lon'] = ParkRoad['geometry_m'].x
        ParkRoad['park_lat'] = ParkRoad['geometry_m'].y
        
        # Get the road nodes intersecting with the parks' buffer
        ParkRoad = pd.merge(ParkRoad, UGS[j][['geometry']], left_on = 'Park_No', right_index = True)

        # Get the walkable park size
        ParkRoad['park_size_walkable'] = ParkRoad['geometry_m'].buffer(walk_radius).to_crs(4326).intersection(ParkRoad['geometry_y'])
        ParkRoad['walk_area'] = ParkRoad['park_size_walkable'].to_crs(3043).area
        ParkRoad['park_area'] = ParkRoad['geometry_y'].to_crs(3043).area
        ParkRoad['share_walked'] = ParkRoad['walk_area'] / ParkRoad['park_area']
        
        # Get size inflation factors for the gravity model
        ParkRoad['size_infl_factor'] = ParkRoad['walk_area'] / ParkRoad['walk_area'].median()
        ParkRoad['size_infl_sqr2'] = ParkRoad['size_infl_factor']**(1/2)
        ParkRoad['size_infl_sqr3'] = ParkRoad['size_infl_factor']**(1/3)
        ParkRoad['size_infl_sqr5'] = ParkRoad['size_infl_factor']**(1/5)
                
        # Merge fake UGS entry points if within X meters of each other for better system performance
        # Standard no merging
        ParkRoad = simplify_UGS_entry(ParkRoad, entry_point_merge = 0)
                
        ParkRoads.append(ParkRoad)

        print(cities[j].rsplit(',')[0],'100 % done', 
                                  round((time.time() - start_time) / 60,2),' mns')
        
    return(ParkRoads)

In [15]:
# Block 5.5 (not in use, buffer is 0, thus retains all the park entry points as is)
def simplify_UGS_entry(fake_UGS_entry, entry_point_merge = 0):
    # Get buffer of nodes close to each other.
    # Get the buffer
    ParkComb = fake_UGS_entry
    ParkComb['geometry_m_buffer'] = ParkComb['geometry_m'].buffer(entry_point_merge)

    # Get and merge components
    M = libpysal.weights.fuzzy_contiguity(ParkComb['geometry_m_buffer'])
    ParkComb['components'] = M.component_labels

    # Take centroid of merged components
    centr = gpd.GeoDataFrame(ParkComb, geometry = 'geometry_x', crs = 4326).dissolve('components')['geometry_x'].centroid
    centr = gpd.GeoDataFrame(centr)
    centr.columns = ['comp_centroid']

    # Get node closest to the centroid of all merged nodes, which accesses the road network.
    ParkComb = pd.merge(ParkComb, centr, left_on = 'components', right_index = True)
    ParkComb['centr_dist'] = ParkComb['geometry_x'].distance(ParkComb['comp_centroid'])
    ParkComb = ParkComb.iloc[ParkComb.groupby('components')['centr_dist'].idxmin()]
    return(ParkComb)

In [16]:
# Block 6 grid-parkentry combinations within euclidean threshold distance
def suitible_combinations(UGS_entry, pop_grids, road_nodes, thresholds, cities, chunk_size = 10000000):
    print('get potential (Euclidean) suitible combinations')
    start_time = time.time()
    RoadComb = list()
    for l in range(len(cities)):
        #blockA = block_combinations
        print(cities[l])
        len1 = len(pop_grids[l])
        len2 = len(UGS_entry[l])

        # Reduce the size of combinations per iteration
        len4 = 1
        len5 = len1 * len2
        blockC = len5
        while blockC > chunk_size:
            blockC = len5 / len4
            #print(blockC, len4)
            len4 = len4+1

        # Amount of grids taken per iteration block
        block = round(len1 / len4)

        output = pd.DataFrame()
        # Checking all the combinations at once is too performance intensive, it is broken down per 1000 (or what you want)
        for i in range(len4):
            # Check all grid-park combinations per block
            l1, l2 = range(i*block,(i+1)*block), range(0,len2)
            listed = pd.DataFrame(list(product(l1, l2)))

            # Merge grid and park information
            grid_merged = pd.merge(listed, 
                                   pop_grids[l][['grid_lon','grid_lat','centroid','centroid_m']],
                                   left_on = 0, right_index = True)
            node_merged = pd.merge(grid_merged, 
                                   UGS_entry[l][['Park_No','osmid','geometry_x','geometry_y','geometry_m','park_lon','park_lat',
                                       'size_infl_sqr2','size_infl_sqr3','size_infl_sqr5','share_walked','park_area','walk_area']], 
                                   left_on = 1, right_index = True)

            # Preset index for merging
            node_merged['key'] = range(0,len(node_merged))
            node_merged = node_merged.set_index('key')
            node_merged = node_merged.loc[:, ~node_merged.columns.isin(['index'])]

            # Create lists for better computational performance
            glon = list(node_merged['grid_lon'])
            glat = list(node_merged['grid_lat'])
            plon = list(node_merged['park_lon'])
            plat = list(node_merged['park_lat'])
            infl2 = list(node_merged['size_infl_sqr2'])
            infl3 = list(node_merged['size_infl_sqr3'])
            infl5 = list(node_merged['size_infl_sqr5'])

            # Get the euclidean distances
            mat = list()
            mat2 = list()
            mat3 = list()
            mat4 = list()
            for j in range(len(node_merged)):
                mat.append(math.sqrt(abs(plon[j] - glon[j])**2 + abs(plat[j] - glat[j])**2))
                mat2.append(math.sqrt(abs(plon[j] - glon[j])**2 + abs(plat[j] - glat[j])**2) / infl2[j])
                mat3.append(math.sqrt(abs(plon[j] - glon[j])**2 + abs(plat[j] - glat[j])**2) / infl3[j])
                mat4.append(math.sqrt(abs(plon[j] - glon[j])**2 + abs(plat[j] - glat[j])**2) / infl5[j])

            # Check if distances are within 1000m and join remaining info and concat in master df per 1000.
            mat_df = pd.DataFrame(mat3)[(np.array(mat) <= np.max(thresholds)) | 
                                        (np.array(mat2) <= np.max(thresholds)) | 
                                        (np.array(mat3) <= np.max(thresholds)) | 
                                        (np.array(mat4) <= np.max(thresholds))]

            # join the other gravity euclidean scores and other information
            mat_df = mat_df.join(pd.DataFrame(mat), lsuffix='_infl', rsuffix='_entr', how = 'left')
            mat_df = mat_df.join(pd.DataFrame(mat2), lsuffix='_entry', rsuffix='_pwr', how = 'left')
            mat_df = mat_df.join(pd.DataFrame(mat4), lsuffix='_pwr', rsuffix='_root', how = 'left')
            mat_df.columns = ['size_infl_eucl2','raw euclidean','size_infl_eucl3','size_infl_eucl5']    
            mat_df = mat_df.join(node_merged)

            output = pd.concat([output, mat_df])

            print('chunk',(i+1),'/',len4,len(mat_df),'suitible comb.')
        # Renaming columns
        print('total combinations within distance',len(output))

        output.columns = ['size_infl_eucl3','raw euclidean','size_infl_eucl2','size_infl_eucl5',
                          'Grid_No','Park_entry_No','grid_lon','grid_lat','Grid_coords_centroid','Grid_m_centroid',
                          'Park_No','Parkroad_osmid','Park_geom','Parkroad_coords_centroid','Parkroad_m_centroid',
                          'park_lon','park_lat','size_infl_sqr2','size_infl_sqr3','size_infl_sqr5',
                          'parkshare_walked','park_area','walk_area_m2']

        output = output[['raw euclidean','size_infl_eucl2','size_infl_eucl3','size_infl_eucl5',
                         'Grid_No','Park_entry_No','Grid_coords_centroid','Grid_m_centroid',
                          'Park_No','Parkroad_osmid','Park_geom','Parkroad_coords_centroid','Parkroad_m_centroid',
                         'walk_area_m2','size_infl_sqr2','size_infl_sqr3','size_infl_sqr5']]

        # Reinstate geographic elements
        output = gpd.GeoDataFrame(output, geometry = 'Grid_coords_centroid', crs = 4326)
        output['Grid_m_centroid'] = gpd.GeoSeries(output['Grid_m_centroid'], crs = 3043)
        output['Parkroad_coords_centroid'] = gpd.GeoSeries(output['Parkroad_coords_centroid'], crs = 4326)
        output['Parkroad_m_centroid'] = gpd.GeoSeries(output['Parkroad_m_centroid'], crs = 3043)

        # Get the nearest entrance point for the grid centroids
        output = gridroad_entry(output, road_nodes[l])

        print('100 % gridentry done', round((time.time() - start_time) / 60,2),' mns')
        RoadComb.append(output)
    return (RoadComb)

In [17]:
def gridroad_entry (suitible_comb, road_nodes):    
    start_time = time.time()
    mat5 = list()
    for i in range(len(suitible_comb)):
        try:
            nearest = int(road_nodes['geometry'].sindex.nearest(suitible_comb['Grid_coords_centroid'].iloc[i])[1])
            mat5.append(road_nodes['osmid_var'].iloc[nearest])
        except: 
            # sometimes two nodes are the exact same distance, then the first in the list is taken.
            nearest = int(road_nodes['geometry'].sindex.nearest(suitible_comb['Grid_coords_centroid'].iloc[i])[1][0])
            mat5.append(road_nodes['osmid_var'].iloc[nearest])
        if i % 250000 == 0: print(round(i/len(suitible_comb)*100,1),'% gridentry done', round((time.time() - start_time) / 60,2),' mns')
    # format resulting dataframe
    suitible_comb['grid_osm'] = mat5
    suitible_comb = pd.merge(suitible_comb, road_nodes['geometry'], left_on = 'grid_osm', right_index = True)
    suitible_comb['geometry_m'] = gpd.GeoSeries(suitible_comb['geometry'], crs = 4326).to_crs(3043)
    suitible_comb = suitible_comb.reset_index()
    return(suitible_comb)

In [18]:
# Check grids in or out of UGS
def grids_in_UGS (suitible_comb, UGS, pop_grid): 
    start_time = time.time()
    RoadInOut = list()
    for i in range(len(suitible_comb)):
        UGS_geoms = UGS[i]['geometry']
        grid = pop_grid[i]['centroid']
        lst = list()
        print('Check grids within UGS')
        for l in enumerate(UGS_geoms):
            lst.append(grid.intersection(l[1]).is_empty == False)
            if l[0] % 100 == 0: print(l[0], round((time.time() - start_time) / 60,2),' mns')

        dfGrUGS = pd.DataFrame(pd.DataFrame(np.array(lst)).unstack())
        dfGrUGS.columns = ['in_out_UGS']
        merged = pd.merge(suitible_comb[i], dfGrUGS, left_on = ['Grid_No','Park_No'], right_index = True, how = 'left')
        RoadInOut.append(merged)
    return(RoadInOut)    

In [19]:
# Block 7 calculate route networks of all grid-parkentry combinations within euclidean threshold distance
def route_finding (graphs, combinations, road_nodes, road_edges, cities, block_size = 250000, nn_iter = 10):
    warnings.filterwarnings("ignore")
    start_time = time.time()
    
    print('comb. by city')
    for n in enumerate(cities): # Know how much comb. need to be calculcated.
        print(n[1], len(combinations[n[0]]))
    print(' ')
    
    Routes = list()
    Routes_detail = list()
    for j in range(len(cities)):
        suit_raw = combinations[j]

        In_UGS = suit_raw[suit_raw['in_out_UGS'] == True] # Check if a grid centroid is in an UGS
        suitible = suit_raw[suit_raw['in_out_UGS'] == False].reset_index(drop = True) # recreate a subsequential index
        
        len2 = int(np.ceil(len(suitible)/block_size)) # get number of blocks (chunks)
        Route_parts = pd.DataFrame()
        Route_dparts = pd.DataFrame()

        # Divide in chunks of block for computational load
        for k in range(len2):    
            suitible_chunk = suitible.iloc[k*block_size:k*block_size+block_size] # Get block ids

            parknode = list(suitible_chunk['Parkroad_osmid']) # UGS road entry ids
            gridnode = list(suitible_chunk['grid_osm']) # grid centroid road entry ids

            s_mat = list([]) # osmid from
            s_mat1 = list([]) # osmid to
            s_mat2 = list([]) # route id
            s_mat3 = list([]) # step id
            s_mat4 = list([]) # way calculated
            s_mat5 = list([]) # way calculated id
            mat_nn = [] # sums number of routes containing nearest nodes.
            len1 = len(suitible_chunk)

            print(cities[j].rsplit(',')[0], k+1,'/',len2, 
                  'range',k*block_size,'-',k*block_size+np.where(k*block_size+block_size >= len1,len1,block_size))
            
            for i in range(len(suitible_chunk)):
                try:
                    shortest = nx.shortest_path(graphs[j], gridnode[i], parknode[i], 'travel_dist', method = 'dijkstra')
                    s_mat.append(shortest)
                    shortest_to = list(shortest[1:len(shortest)])
                    shortest_to.append(-1)
                    s_mat1.append(shortest_to)
                    s_mat2.append(list(np.repeat(i+block_size*k, len(shortest))))
                    s_mat3.append(list(np.arange(0, len(shortest))))
                    s_mat4.append('normal way')
                    s_mat5.append(1)
                except:
                    try:
                        # Check the reverse
                        shortest = nx.shortest_path(graphs[j], parknode[i], gridnode[i], 'travel_dist', method = 'dijkstra')
                        s_mat.append(shortest)
                        shortest_to = list(shortest[1:len(shortest)])
                        shortest_to.append(-1)
                        s_mat1.append(shortest_to)
                        s_mat2.append(list(np.repeat(i+block_size*k, len(shortest))))
                        s_mat3.append(list(np.arange(0, len(shortest))))
                        s_mat4.append('reverse way')
                        s_mat5.append(0)
                    except:
                        # Otherwise the nearest node is taken, which is iterated X times at max, check assumptions, block #0 
                        nn_route_finding(graphs[j], suitible_chunk, road_nodes[j],
                                   s_mat, s_mat1, s_mat2, s_mat3, s_mat4, s_mat5, mat_nn, # matrice info see above
                                   it = i, block = k, block_size = block_size, 
                                         nn_iter = 10) # max nearest nodes to be found
                        
                if i % 10000 == 0: print(round((i+block_size*k)/len(suitible)*100,2),'% done',
                                         round((time.time() - start_time) / 60,2),'mns')
            print(len(mat_nn),'nearest nodes found')

            print(round((i+block_size*k)/len(suitible)*100,2),'% pathfinding done', round((time.time() - start_time) / 60,2),'mns')
            
            # Formats route information by route and step (detailed)
            routes = route_formatting(s_mat, s_mat1, s_mat2, s_mat3, road_edges[j])
            print('formatting done', round((time.time() - start_time) / 60,2), 'mns')
            
            # Summarizes information by route
            routes2 = route_summarization(routes, suitible_chunk, road_nodes[j], s_mat4, s_mat5)
            print('dissolving done', round((time.time() - start_time) / 60,2), 'mns')
            
            # Concats chunk with others already calculated
            Route_parts = pd.concat([Route_parts, routes2])
            Route_dparts = pd.concat([Route_dparts, routes])

        # Format grids in UGS to enable smooth df concat
        In_UGS = In_UGS.set_geometry(In_UGS['Grid_coords_centroid'])
        In_UGS = In_UGS[['geometry','Grid_No','grid_osm','Park_No','Park_entry_No','Parkroad_osmid',
                                   'Grid_m_centroid','walk_area_m2','size_infl_sqr2','size_infl_sqr3','size_infl_sqr5',
                                   'raw euclidean','geometry_m']]

        In_UGS['realG_osmid'] = suit_raw['Parkroad_osmid']
        In_UGS['realP_osmid'] = suit_raw['grid_osm']
        In_UGS['way_calc'] = 'grid in UGS'

        Route_parts = pd.concat([Route_parts,In_UGS])
        Route_parts = Route_parts.reset_index(drop = True)

        Route_parts['gridpark_no'] = Route_parts['Grid_No'].astype(str) +'-'+ Route_parts['Park_No'].astype(str)

        # All fill value 0 because no routes are calculated for grid centroids in UGSs
        to_fill = ['way-id','route_cost','steps','real_G-entry','raw_Tcost','grav2_Tcost','grav3_Tcost','grav5_Tcost']                                   
        Route_parts[to_fill] = Route_parts[to_fill].fillna(0)  

        Routes.append(Route_parts)
        Routes_detail.append(Route_dparts)
    return({'route summary':Routes,'route detail':Routes_detail})

In [20]:
def nn_route_finding (Graph, comb, nodes, 
                mat_from, mat_to, mat_route, mat_step, mat_way, mat_wbin, mat_nn, 
                it, block, block_size = 250000, nn_iter = 10):
    # Order in route for nearest node:
    # 1. gridnode to nearest to the original failed parknode
    # 2. The reverse of 1.
    # 3. nearest gridnode to the failed one and route to park
    # 4. The reverse of 3.
    
    len3 = 0
    alt_route = list([])
    
    gosm = comb['grid_osm'] # grid osmids (origin)
    posm = comb['Parkroad_osmid'] # UGS osmids (destination)
    node = nodes['geometry'] # road node geoms
    node_osm = nodes['osmid_var'] # road node osmids
    
    while len3 < nn_iter and len(alt_route) < 1: # continue if no more than 10 nearest nodes or if a route is found
        
        len3 = len3 +1
        # Finds nearest node per iteration.
        nn = nn_finding(gosm, posm, node, node_osm, it, len3)
        
         # routing within graph and current and found nearest nodes of grids and UGS
        nn_routing(Graph, nn['curr_park'], nn['near_park'], nn['curr_grid'], nn['near_grid'],
                        mat_way, mat_wbin, alt_route, len3)
        
    if len(alt_route) == 0:
        alt = alt_route 
    else: 
        alt = alt_route[0]
    len4 = len(alt)
    if len4 > 0: # If a route is found append
        mat_nn.append(it+block_size*block)
        mat_from.append(alt)
        shortest_to = list(alt[1:len(alt)])
        shortest_to.append(-1)
        mat_to.append(shortest_to)
        mat_route.append(list(np.repeat(it+block_size*block,len4)))
        mat_step.append(list(np.arange(0, len4)))
    else: # if no route is found fill values.
        mat_from.append(-1)
        mat_to.append(-1)
        mat_route.append(it+block_size*block)
        mat_step.append(-1)
        mat_way.append('no way')
        mat_wbin.append(2)
        print(it+block_size*block,'No route between grid and park-entry and their both',nn_iter,'alternatives')

In [21]:
def nn_finding (grid_osmid, UGS_osmid, node_geom, node_osmid, it, nn_i):
    # Grid nearest
    g_geom = node_geom[node_osmid == int(grid_osmid[it:it+1])] # Get current grid road entry geometry
    g_nearest = pd.DataFrame((abs(float(g_geom.x) - node_geom.x)**2 # Find nearest.
                              +abs(float(g_geom.y) - node_geom.y)**2)**(1/2)
                            ).join(node_osmid).sort_values(0)

    g_grid = g_nearest.iloc[nn_i,1] # Take '1' because 0 will get the current node with distance 0.
    g_park = list(UGS_osmid)[it]

    p_geom = node_geom[node_osmid == int(UGS_osmid[it:it+1])] # Get current UGS raod entry geometry
    p_nearest = pd.DataFrame((abs(float(p_geom.x) - node_geom.x)**2 # Find nearest
                              +abs(float(p_geom.y) - node_geom.y)**2)**(1/2)
                            ).join(node_osmid).sort_values(0)

    p_grid = list(grid_osmid)[it]
    p_park = p_nearest.iloc[nn_i,1] # Take '1' because 0 will get the current node with distance 0.
    
    return({'curr_park':p_grid, 'near_park':p_park, 'curr_grid':g_park, 'near_grid':g_grid}) # return as dict

In [22]:
def nn_routing (Graph, curr_park, near_park, curr_grid, near_grid, mat_way, mat_wbin, found_route, nn_i):
    try: # First try from current grid to nearest UGS id.
        found_route.append(nx.shortest_path(Graph, curr_park, near_park, 
                                          'travel_dist', method = 'dijkstra'))
        mat_way.append(str(nn_i)+'grid > n-park')
        mat_wbin.append(1)
    except:
        try: # Else try the reverse.
            found_route.append(nx.shortest_path(Graph, near_park, curr_park, 
                                              'travel_dist', method = 'dijkstra'))
            mat_way.append(str(nn_i)+'n-park > grid')
            mat_wbin.append(0)
        except:
            try: # If no success try from current UGS id to nearest grid id
                found_route.append(nx.shortest_path(Graph, near_grid, curr_grid, 
                                                  'travel_dist', method = 'dijkstra'))
                mat_way.append(str(nn_i)+'n-grid > park')
                mat_wbin.append(1)
            except:
                try: # Else try the reverse
                    found_route.append(nx.shortest_path(Graph, curr_grid, near_grid, 
                                                      'travel_dist', method = 'dijkstra'))
                    mat_way.append(str(nn_i)+'park > n-grid')
                    mat_wbin.append(0)
                except: # if no routes are found pass.
                    pass

In [23]:
def route_formatting(mat_from, mat_to, mat_route, mat_step, road_edges):
    # Unpack lists
    s_mat_u = [i for b in map(lambda x:[x] if not isinstance(x, list) else x, mat_from) for i in b]
    s_mat_u1 = [i for b in map(lambda x:[x] if not isinstance(x, list) else x, mat_to) for i in b]
    s_mat_u2 = [i for b in map(lambda x:[x] if not isinstance(x, list) else x, mat_route) for i in b]
    s_mat_u3 = [i for b in map(lambda x:[x] if not isinstance(x, list) else x, mat_step) for i in b]

    # Format df
    routes = pd.DataFrame([s_mat_u,s_mat_u1,s_mat_u2,s_mat_u3]).transpose()
    routes.columns = ['from','to','route','step']
    mat_key = list([])
    for n in range(len(routes)):
        mat_key.append(str(int(s_mat_u[n])) + '-' + str(int(s_mat_u1[n])))
    routes['key'] = mat_key
    routes = routes.set_index('key')

    # Add route information
    routes = routes.join(road_edges, how = 'left')
    routes = gpd.GeoDataFrame(routes, geometry = 'geometry', crs = 4326)
    routes = routes.sort_values(by = ['route','step'])
    return(routes)

In [24]:
def route_summarization(routes, suitible_comb, road_nodes, mat_way, mat_wbin):
    # dissolve route
    routes2 = routes[['route','geometry']].dissolve('route')

    # get used grid- and parkosm. Differs at NN-route.
    route_reset = routes.reset_index()
    origin = route_reset['from'].iloc[list(route_reset.groupby('route')['step'].idxmin()),]
    origin = origin.reset_index().iloc[:,-1]
    dest = route_reset['from'].iloc[list(route_reset.groupby('route')['step'].idxmax()),]
    dest = dest.reset_index().iloc[:,-1]

    # grid > park = 1, park > grid = 0, no way = 2, detailed way in way_calc.
    routes2['way-id'] = mat_wbin
    routes2['realG_osmid'] = np.where(routes2['way-id'] == 1, origin, dest)
    routes2['realP_osmid'] = np.where(routes2['way-id'] == 1, dest, origin)
    routes2['way_calc'] = mat_way

    # get route cost, steps, additional information.
    routes2['route_cost'] = routes.groupby('route')['length'].sum()
    routes2['steps'] = routes.groupby('route')['step'].max()
    routes2['index'] = suitible_comb.index
    routes2 = routes2.set_index(['index'])
    routes2.index = routes2.index.astype(int)
    routes2 = pd.merge(routes2, suitible_comb[['Grid_No','grid_osm','Park_No','Park_entry_No','Parkroad_osmid',
                                          'Grid_m_centroid','walk_area_m2','size_infl_sqr2','size_infl_sqr3',
                                          'size_infl_sqr5','raw euclidean']],
                                            left_index = True, right_index = True)
    routes2 = pd.merge(routes2, road_nodes['geometry_m'], how = 'left', left_on = 'realG_osmid', right_index = True)
    # calculate distance of used road-entry for grid-centroid.
    routes2['real_G-entry'] = round(gpd.GeoSeries(routes2['Grid_m_centroid'], crs = 3043).distance(routes2['geometry_m']),3)
                                    
    # Calculcate total route cost for the four gravity variants
    routes2['raw_Tcost'] = routes2['route_cost'] + routes2['real_G-entry']
    routes2['grav2_Tcost'] = (routes2['route_cost'] + routes2['real_G-entry']) / routes2['size_infl_sqr2']
    routes2['grav3_Tcost'] = (routes2['route_cost'] + routes2['real_G-entry']) / routes2['size_infl_sqr3']
    routes2['grav5_Tcost'] = (routes2['route_cost'] + routes2['real_G-entry']) / routes2['size_infl_sqr5']
    return(routes2)

In [25]:
# Block 8 determine best parkentry points from each grid, then calculate grid scores
# and finally aggregate city access in categories (high, medium, low and no access)
def grid_score_summary (routes, cities, pop_grids, ext = '', grid_size = 100):
    start_time = time.time()
    popg_acc = pd.DataFrame()
    grid_scores = list([])
    gridpark = list([])
    for n in range(len(cities)):    
        print(cities[n])

        # For the four distance decay variants regarding park size.
        l1 = list(['raw','grav2','grav3','grav5'])
        m1 = list(['entrance','gravity**(1/2)','gravity**(1/3)','gravity**(1/5)'])
        grid_score = list([])
        gridparks = list([])
        gridpark.append(gridparks)
        popgrid_access = pd.DataFrame()
        for i in range(len(l1)):
            # Get the lowest indices grouped by a key consisting of grid no and park no (best entry point from a grid to a park)
            var_best_routes = best_gridpark_comb (routes[n], l1[i], pop_grids[n])

            grdsc = pd.DataFrame()
            gridsc = pd.DataFrame()
            print(m1[i], round((time.time() - start_time) / 60,2), 'mns')

            # For each threshold given, calculate a score
            for k in range(len(thresholds)):
                
                t = thresholds[k]
                score = 'tr_'+ str(t)
                scores = determine_scores(var_best_routes, pop_grids[n], thresholds[k], l1[i], cities[n], grid_size = 100)
                
                grdsc = pd.concat([grdsc, scores['score_w_route']], axis = 1)
                gridsc = pd.concat([gridsc, scores['grid_score']])
                                
                # Group according to the categories just created and sum the populations living in those grids
                popgacc = pd.DataFrame()
                popgacc[m1[i]+'_'+str(t)] = scores['score_w_route'].groupby(score+'_access')['population'].sum()
                popgrid_access = pd.concat([popgrid_access, popgacc],axis=1)   

                print('grid ',t)

            grid_score.append(grdsc)

            gridsc = gridsc.join(pop_grids[n]['geometry'])
            gridsc = gpd.GeoDataFrame(gridsc, geometry = 'geometry', crs = 4326)

            if not os.path.exists('D:Dumps/Scores output WP2-OSM/'+str(grid_size)+'m grids/Grid_geoms/'):
                os.makedirs('D:Dumps/Scores output WP2-OSM/'+str(grid_size)+'m grids/Grid_geoms/')

            gridsc.to_file('D:Dumps/Scores output WP2-OSM/'+str(grid_size)+'m grids/Grid_geoms/gridscore_'+ l1[i] + '_' + cities[n] + '.gpkg')

            # Detailed scores to files number of cities * ways to measure = number of files.
            # Different threshold-scores are in the same dataframe
            gridsc = gridsc.loc[:, gridsc.columns!='geometry']

            if not os.path.exists('D:Dumps/Scores output WP2-OSM/'+str(grid_size)+'m grids/Grid_csv/'):
                os.makedirs('D:Dumps/Scores output WP2-OSM/'+str(grid_size)+'m grids/Grid_csv/')

            gridsc.to_csv('D:/Dumps/Scores output WP2-OSM/'+str(grid_size)+'m grids/Grid_csv/gridscore_'+ l1[i] + '_' + cities[n] + '.csv')
            gridparks.append(var_best_routes)

        grid_scores.append(grid_score)

        # For each city, divide the population access by group by the total to get its share.
        popgrid_access = popgrid_access / popgrid_access.sum()
        popgrid_access = pd.DataFrame(popgrid_access.unstack())
        popg_acc = pd.concat([popg_acc, popgrid_access], axis = 1)

        print(cities[n],'done', round((time.time() - start_time) / 60,2), 'mns')
    popg_acc.columns = cities
    popg_acc.to_csv('D:/Dumps/Scores output WP2-OSM/'+str(grid_size)+'m grids/popgrid_access.csv')
    return(popg_acc)    


In [26]:
def best_gridpark_comb (routes, var_abbr, pop_grid):
    Rclean = routes[routes['way_calc'] != 'no way'].reset_index()
    str1 = 'gridpark_' + var_abbr
    locals()[str1] = Rclean.iloc[Rclean.groupby('gridpark_no')[(str(var_abbr) +'_Tcost')].idxmin()]  

    # Get grid information
    locals()[str1] = pd.merge(locals()[str1], pop_grid[['population','geometry']],
                            left_on = 'Grid_No', right_index = True, how = 'outer')
    locals()[str1] = locals()[str1].reset_index()

    # formatting
    locals()[str1]['Park_No'] = locals()[str1]['Park_No'].fillna(-1)
    locals()[str1]['Park_No'] = locals()[str1]['Park_No'].astype(int)
    locals()[str1]['Park_entry_No'] = locals()[str1]['Park_entry_No'].fillna(-1)
    locals()[str1]['Park_entry_No'] = locals()[str1]['Park_entry_No'].astype(int)
    return(locals()[str1])

In [27]:
def determine_scores(var_df, pop_grid, thresholds, var_abbr, city, grid_size = 100):
    t = thresholds
    str2 = str(t)
    score = 'tr_'+ str2

    #Only get routes within the threshold given (it loops over every threshold) and calculate the scores
    thold = var_df[var_df[var_abbr + '_Tcost'] <= t]
    thold[score] = t - thold[var_abbr + '_Tcost']
    thold['pop' + score] = thold[score] * thold['population']
    thold['walk_area_ha' + str2] = var_df['walk_area_m2'] /10000
    thold['walkha_person' + str2] = thold['population'] / thold['walk_area_ha' + str2]

    # Join the gridpark information from before.
    var_df = var_df.join(thold[[score,'pop' + score,'walk_area_ha' + str2, 'walkha_person' + str2]])
    # get the grid_scores
    gs = pd.DataFrame()
    gs[[score,'pop_' + score,'walkha_' + str2]] = var_df.groupby(
            'Grid_No')[score,'pop' + score, 'walk_area_ha' + str2].sum()

    gs['walkha_person_' + score] = var_df.groupby('Grid_No')['walkha_person' + str2].mean()

    trstr = var_df[var_df[score] > 0]
    gs[score + '_parks'] = trstr.groupby('Grid_No')['gridpark_no'].count()

    # Add the routes as a dissolved line_geom
    gs[score + '_routes'] = gpd.GeoDataFrame(trstr[['Grid_No','geometry_x']],
                                                  geometry = 'geometry_x', crs = 4326).dissolve('Grid_No')

    # Add parks which grids have access to with its closest access point
    gs[score+'Park:entry'] = trstr[trstr['Park_No'] >=0].groupby('Grid_No')['Park_No'].apply(list).astype(str
    ) + ':' + trstr[trstr['Park_entry_No'] >=0].groupby('Grid_No')['Park_entry_No'].apply(list).astype(str)
                
    # determine the thresholds category-score. 
    # High >= threshold (perfect score to one park), medium is above half perfect, 
    # low is below this and no is no access to a park for a certain grid within the threshold given
    gs[score+'_access'] = np.select([gs[score] >= t, (gs[score] < t) & (
    gs[score]>= t/2), (gs[score] < t/2) & (gs[score]> 0), gs[score] <= 0],
          ['1 high','2 medium','3 low','4 no'])
    gs = gs.join(pop_grid['population'], how = 'outer')
            
    gs = gpd.GeoDataFrame(gs, geometry = score + '_routes', crs = 4326)
            
    if not os.path.exists('D:Dumps/Scores output WP2-OSM/'+str(grid_size)+'m grids/Grid_lines/'):
        os.makedirs('D:Dumps/Scores output WP2-OSM/'+str(grid_size)+'m grids/Grid_lines/')
                
    gs.to_file('D:Dumps/Scores output WP2-OSM/'+str(grid_size)+'m grids/Grid_lines/gridscore_'+ var_abbr + '_' + str2 + '_' + city + '.gpkg')
            
    gsc = gs.loc[:,~gs.columns.isin([score + '_routes'])]

    return({'grid_score':gsc,'score_w_route':gs})

In [28]:
# OPTIONAL
# Block 8 determine best parkentry points from each grid, then calculate grid scores
# and finally aggregate city access in categories (high, medium, low and no access)
def grid_score_summary_ranks (routes, UGS_entry, cities, pop_grids, ext = '', grid_size = 100, ranks = 1):
    start_time = time.time()
    popg_acc = pd.DataFrame()
    grid_scores = list([])
    gridpark = list([])
    for n in range(len(cities)):    
        print(cities[n])

        # For the four distance decay variants regarding park size.
        l1 = list(['raw','grav2','grav3','grav5'])
        m1 = list(['entrance','gravity**(1/2)','gravity**(1/3)','gravity**(1/5)'])
        grid_score = list([])
        gridparks = list([])
        gridpark.append(gridparks)
        popgrid_access = pd.DataFrame()
        for i in range(len(l1)):
            # Get the lowest indices grouped by a key consisting of grid no and park no (best entry point from a grid to a park)
            best_gridpark_comb_ranks = best_gridpark_comb (routes[n], UGS_entry[n], l1[i], pop_grids[n], ranks = 1)

            grdsc = pd.DataFrame()
            gridsc = pd.DataFrame()
            print(m1[i], round((time.time() - start_time) / 60,2), 'mns')

            # For each threshold given, calculate a score
            for k in range(len(thresholds)):
                
                t = thresholds[k]
                score = 'tr_'+ str(t)
                scores = determine_scores(var_best_routes, pop_grids[n], thresholds[k], l1[i], cities[n], grid_size = 100)
                
                grdsc = pd.concat([grdsc, scores['score_w_route']], axis = 1)
                gridsc = pd.concat([gridsc, scores['grid_score']])
                                
                # Group according to the categories just created and sum the populations living in those grids
                popgacc = pd.DataFrame()
                popgacc[m1[i]+'_'+str(t)] = scores['score_w_route'].groupby(score+'_access')['population'].sum()
                popgrid_access = pd.concat([popgrid_access, popgacc],axis=1)   

                print('grid ',t)

            grid_score.append(grdsc)

            gridsc = gridsc.join(pop_grids[n]['geometry'])
            gridsc = gpd.GeoDataFrame(gridsc, geometry = 'geometry', crs = 4326)

            if not os.path.exists('D:Dumps/Scores output WP2-OSM/'+str(grid_size)+'m grids/Grid_geoms/'):
                os.makedirs('D:Dumps/Scores output WP2-OSM/'+str(grid_size)+'m grids/Grid_geoms/')

            gridsc.to_file('D:Dumps/Scores output WP2-OSM/'+str(grid_block_size)+'m grids/Grid_geoms/gridscore_'+ l1[i] + '_' + cities[n] + '.gpkg')

            # Detailed scores to files number of cities * ways to measure = number of files.
            # Different threshold-scores are in the same dataframe
            gridsc = gridsc.loc[:, gridsc.columns!='geometry']

            if not os.path.exists('D:Dumps/Scores output WP2-OSM/'+str(grid_size)+'m grids/Grid_csv/'):
                os.makedirs('D:Dumps/Scores output WP2-OSM/'+str(grid_size)+'m grids/Grid_csv/')

            gridsc.to_csv('D:/Dumps/Scores output WP2-OSM/'+str(grid_size)+'m grids/Grid_csv/gridscore_'+ l1[i] + '_' + cities[n] + '.csv')
            gridparks.append(var_best_routes)

        grid_scores.append(grid_score)

        # For each city, divide the population access by group by the total to get its share.
        popgrid_access = popgrid_access / popgrid_access.sum()
        popgrid_access = pd.DataFrame(popgrid_access.unstack())
        popg_acc = pd.concat([popg_acc, popgrid_access], axis = 1)

        print(cities[n],'done', round((time.time() - start_time) / 60,2), 'mns')
    popg_acc.columns = cities
    popg_acc.to_csv('D:/Dumps/Scores output WP2-OSM/'+str(grid_block_size)+'m grids/popgrid_access.csv')
    return(popg_acc)    


In [29]:
# OPTIONAL
def best_gridpark_comb_ranks (routes, UGS_entry, var_abbr, pop_grid, ranks = 1):
    str1 = 'gridpark_' + var_abbr
    locals()[str1] = ranked_route_scoring (routes, UGS_entry, cities, var_abbr, ranks) 

    # Get grid information
    locals()[str1] = pd.merge(locals()[str1], pop_grid[['population','geometry']],
                            left_on = 'Grid_No', right_index = True, how = 'outer')
    locals()[str1] = locals()[str1].reset_index()

    # formatting
    locals()[str1]['Park_No'] = locals()[str1]['Park_No'].fillna(-1)
    locals()[str1]['Park_No'] = locals()[str1]['Park_No'].astype(int)
    locals()[str1]['Park_entry_No'] = locals()[str1]['Park_entry_No'].fillna(-1)
    locals()[str1]['Park_entry_No'] = locals()[str1]['Park_entry_No'].astype(int)
    return(locals()[str1])

In [None]:
def determine_scores(var_df, pop_grid, thresholds, var_abbr, city, grid_size = 100):
    t = thresholds
    str2 = str(t)
    score = 'tr_'+ str2

    #Only get routes within the threshold given (it loops over every threshold) and calculate the scores
    thold = var_df[var_df[var_abbr + '_Tcost'] <= t]
    thold[score] = t - thold[var_abbr + '_Tcost']
    thold['pop' + score] = thold[score] * thold['population']
    thold['walk_area_ha' + str2] = var_df['walk_area_m2'] /10000
    thold['walkha_person' + str2] = thold['population'] / thold['walk_area_ha' + str2]

    # Join the gridpark information from before.
    var_df = var_df.join(thold[[score,'pop' + score,'walk_area_ha' + str2, 'walkha_person' + str2]])
    # get the grid_scores
    gs = pd.DataFrame()
    gs[[score,'pop_' + score,'walkha_' + str2]] = var_df.groupby(
            'Grid_No')[score,'pop' + score, 'walk_area_ha' + str2].sum()

    gs['walkha_person_' + score] = var_df.groupby('Grid_No')['walkha_person' + str2].mean()

    trstr = var_df[var_df[score] > 0]
    gs[score + '_parks'] = trstr.groupby('Grid_No')['gridpark_no'].count()

    # Add the routes as a dissolved line_geom
    gs[score + '_routes'] = gpd.GeoDataFrame(trstr[['Grid_No','geometry_x']],
                                                  geometry = 'geometry_x', crs = 4326).dissolve('Grid_No')

    # Add parks which grids have access to with its closest access point
    gs[score+'Park:entry'] = trstr[trstr['Park_No'] >=0].groupby('Grid_No')['Park_No'].apply(list).astype(str
    ) + ':' + trstr[trstr['Park_entry_No'] >=0].groupby('Grid_No')['Park_entry_No'].apply(list).astype(str)
                
    # determine the thresholds category-score. 
    # High >= threshold (perfect score to one park), medium is above half perfect, 
    # low is below this and no is no access to a park for a certain grid within the threshold given
    gs[score+'_access'] = np.select([gs[score] >= t, (gs[score] < t) & (
    gs[score]>= t/2), (gs[score] < t/2) & (gs[score]> 0), gs[score] <= 0],
          ['1 high','2 medium','3 low','4 no'])
    gs = gs.join(pop_grid['population'], how = 'outer')
            
    gs = gpd.GeoDataFrame(gs, geometry = score + '_routes', crs = 4326)
            
    if not os.path.exists('D:Dumps/Scores output WP2-OSM/'+str(grid_size)+'m grids local/Grid_lines/'):
        os.makedirs('D:Dumps/Scores output WP2-OSM/'+str(grid_size)+'m grids local/Grid_lines/')
                
    gs.to_file('D:Dumps/Scores output WP2-OSM/'+str(grid_size)+'m grids local/Grid_lines/gridscore_'+ var_abbr + '_' + str2 + '_' + city + '.gpkg')
            
    gsc = gs.loc[:,~gs.columns.isin([score + '_routes'])]

    return({'grid_score':gsc,'score_w_route':gs})

In [32]:
print(round((time.time() - start) / 60,2),'mns')

99.65 mns
