### 1. Initialization

1.1 Importing packages

In [1]:
# System packages
import sys
import time
import warnings
import os
import fiona
from collections import Counter

# Non-geo numeric packages
import numpy as np
import math
from itertools import product, combinations
import pandas as pd

# Network and OSM packages
import networkx as nx
import osmnx as ox
city_geo = ox.geocoder.geocode_to_gdf

# Earth engine packages
import ee
import geemap

# General geo-packages
from rasterstats import zonal_stats
from pyproj import CRS
import libpysal
import rasterio
import rioxarray
import geopandas as gpd
import shapely
from shapely import geometry
from shapely.geometry import Point, MultiLineString, LineString, Polygon, MultiPolygon

# Multiprocessing packages
import multiprocessing
from joblib import Parallel, delayed


import os
os.environ['USE_PYGEOS'] = '0'
import geopandas

In a future release, GeoPandas will switch to using Shapely by default. If you are using PyGEOS directly (calling PyGEOS functions on geometries from GeoPandas), this will then stop working and you are encouraged to migrate from PyGEOS to Shapely 2.0 (https://shapely.readthedocs.io/en/latest/migration_pygeos.html).
  import geopandas as gpd


1.2 Importing and initializing data

In [2]:
# Import population grid of respective urban area
popgrid = gpd.read_file('popgridmanchesternew.gpkg')

In [3]:
# Define local parameters for the EPSG and area for respective urban area
epsg = "EPSG:27700"
area = "Greater Manchester"

In [3]:
# Add grid_lat/lon from EPSG 4326 for OSMNX before reverting it to local ESPG
popgrid = popgrid.to_crs("EPSG:4326")
popgrid['centroid'] = popgrid.geometry.centroid
popgrid['grid_lon_4326'] = popgrid['centroid'].x
popgrid['grid_lat_4326'] = popgrid['centroid'].y
popgrid = popgrid.to_crs(epsg)


  popgridmanchester['centroid'] = popgridmanchester.geometry.centroid


In [4]:
# Create df with lat/lon values to get their nearest nodes for truncation
popgrid = gpd.GeoDataFrame(popgrid, geometry='geometry')
popgrid = popgrid.reset_index()

# Create 'center' variable for the ox.graph_from_point()
popgrid['center'] = list(zip(popgrid.grid_lat_4326, popgrid.grid_lon_4326))

### 2. Distance Calculations

2.1 Either load or download street network of the respective urban area

In [6]:
# Optional: Load in street network graph if local file is already available
Gproj = ox.load_graphml('network.graphml')

In [None]:
# Otherwise: Download street network of the urban area from osmnx and project to local EPSG
G = ox.graph_from_place(area, network_type='all', simplify=True)
Gproj = ox.project_graph(G, to_crs=epsg)

# Save to graphml
ox.io.save_graphml(Gproj, 'network.graphml')

2.2 Calculate the nearest node to each grid centroid with a threshold of 1000 meters

In [7]:
# List of grid centroid nearest nodes and their distances
nearest_nodes, dist = ox.distance.nearest_nodes(Gproj, popgrid['grid_lon'],
                                                popgrid['grid_lat'], return_dist=True)

# Apply vectorization to get the nearest grid nodes with a threshold
mask = np.vectorize(isinstance)(nearest_nodes, int)
nearest_nodes_g = np.where(mask, nearest_nodes, np.nan)
dist_g = np.where(mask, dist, np.nan)
valid_nodes_grid = np.array([nearest_nodes_g, dist_g], np.float64)
valid_nodes_grid = np.where(valid_nodes_grid[1]<1000, valid_nodes_grid[0], np.nan)

2.3 Calculate the nearest node to each POI, allowing distance calculations between grid centroids and POI using the street network

2.3.1 Bus Stops

In [None]:
# Extracting bus stops
bus_stops = ox.geometries_from_place(area, tags={'highway': 'bus_stop'})
bus_stops = bus_stops.to_crs(epsg)
bus_stops = bus_stops[['geometry']]
bus_stops = bus_stops.reset_index()

# Initiate function
def calculate_shortest_path(row):
    try:
        return nx.shortest_path_length(Gproj, valid_nodes_grid[row], valid_nodes_bus_stops[row], weight='length')
    except (nx.NetworkXNoPath, nx.NodeNotFound):
        return None

# Function that is called in the multiprocessing joblib Parallel function
def get_nearest_polygon(row):
    return bus_stops.distance(row, align=True).sort_values().index[0]

# Apply multiprocessing using joblib to get the nearest bus stop to each grid polygon
if __name__ == '__main__':
    # initialization with less threads and one row due to kernel crashing otherwise
    polygon_index = Parallel(n_jobs=2, prefer="threads")(delayed(get_nearest_polygon)(row)
                                                         for row in popgrid['geometry'][0:1])
    # all cores and rows
    polygon_index = Parallel(n_jobs=-1, prefer="threads")(delayed(get_nearest_polygon)(row) 
                                                          for row in popgrid['geometry'])

nearest_bus_stops = bus_stops.loc[polygon_index].geometry.centroid

# List of bus stop nearest nodes and their distances
nearest_nodes_bus_stops, dist_bus_stops = ox.distance.nearest_nodes(Gproj, nearest_bus_stops.geometry.x,
                                                nearest_bus_stops.geometry.y, return_dist=True)

# Apply vectorization to get the nearest grid nodes with a threshold
mask = np.vectorize(isinstance)(nearest_nodes_bus_stops, int)
nearest_nodes_b = np.where(mask, nearest_nodes_bus_stops, np.nan)
dist_b = np.where(mask, dist_bus_stops, np.nan)
valid_nodes_bus_stops = np.array([nearest_nodes_b, dist_b], np.float64)
valid_nodes_bus_stops = np.where(valid_nodes_bus_stops[1]<1000, valid_nodes_bus_stops[0], np.nan)

# Get the shortest distance by calling the function one cell above that uses nx.shortest_path_length()
popgrid['dist_to_bus_stops'] = [calculate_shortest_path(x) for x in popgrid.index]

2.3.2 Train Stations

In [10]:
# Extracting train stations
train_stations = ox.geometries_from_place(area, tags={'railway': 'station'})
train_stations = train_stations.to_crs(epsg)
train_stations = train_stations[['geometry']]
train_stations = train_stations.reset_index()

# Initiate function
def calculate_shortest_path(row):
    try:
        return nx.shortest_path_length(Gproj, valid_nodes_grid[row], valid_nodes_train_stations[row], weight='length')
    except (nx.NetworkXNoPath, nx.NodeNotFound):
        return None
    
# Function that is called in the multiprocessing joblib Parallel function
def get_nearest_polygon(row):
    return train_stations.distance(row, align=True).sort_values().index[0]

# Apply multiprocessing using joblib to get the nearest bus stop to each grid polygon
if __name__ == '__main__':
    # initialization with less threads and one row due to kernel crashing otherwise
    polygon_index = Parallel(n_jobs=2, prefer="threads")(delayed(get_nearest_polygon)(row)
                                                         for row in popgrid['geometry'][0:1])
    # all cores and rows
    polygon_index = Parallel(n_jobs=-1, prefer="threads")(delayed(get_nearest_polygon)(row) 
                                                          for row in popgrid['geometry'])

nearest_train_stations = train_stations.loc[polygon_index].geometry.centroid

# List of bus stop nearest nodes and their distances
nearest_nodes_train_stations, dist_train_stations = ox.distance.nearest_nodes(Gproj, nearest_train_stations.geometry.x,
                                                nearest_train_stations.geometry.y, return_dist=True)

# Apply vectorization to get the nearest grid nodes with a threshold
mask = np.vectorize(isinstance)(nearest_nodes_train_stations, int)
nearest_nodes_b = np.where(mask, nearest_nodes_train_stations, np.nan)
dist_b = np.where(mask, dist_train_stations, np.nan)
valid_nodes_train_stations = np.array([nearest_nodes_b, dist_b], np.float64)
valid_nodes_train_stations = np.where(valid_nodes_train_stations[1]<1000, valid_nodes_train_stations[0], np.nan)

# Get the shortest distance by calling the function one cell above that uses nx.shortest_path_length()
popgrid['dist_to_train_stations'] = [calculate_shortest_path(x) for x in popgrid.index]

2.3.3 Restaurants

In [18]:
# Extracting restaurants
restaurants = ox.geometries_from_place(area, tags={'amenity': ['bar', 'pub', 'restaurant', 'cafe']})
restaurants = restaurants.to_crs(epsg)
restaurants = restaurants[['geometry']]
restaurants = restaurants.reset_index()

# Initiate function
def calculate_shortest_path(row):
    try:
        return nx.shortest_path_length(Gproj, valid_nodes_grid[row], valid_nodes_restaurants[row], weight='length')
    except (nx.NetworkXNoPath, nx.NodeNotFound):
        return None

# Function that is called in the multiprocessing joblib Parallel function
def get_nearest_polygon(row):
    return restaurants.distance(row, align=True).sort_values().index[0]

# Apply multiprocessing using joblib to get the nearest bus stop to each grid polygon
if __name__ == '__main__':
    # initialization with less threads and one row due to kernel crashing otherwise
    polygon_index = Parallel(n_jobs=2, prefer="threads")(delayed(get_nearest_polygon)(row)
                                                         for row in popgrid['geometry'][0:1])
    # all cores and rows
    polygon_index = Parallel(n_jobs=-1, prefer="threads")(delayed(get_nearest_polygon)(row) 
                                                          for row in popgrid['geometry'])

nearest_restaurants = restaurants.loc[polygon_index].geometry.centroid

# List of bus stop nearest nodes and their distances
nearest_nodes_restaurants, dist_restaurants = ox.distance.nearest_nodes(Gproj, nearest_restaurants.geometry.x,
                                                nearest_restaurants.geometry.y, return_dist=True)

# Apply vectorization to get the nearest grid nodes with a threshold
mask = np.vectorize(isinstance)(nearest_nodes_restaurants, int)
nearest_nodes_b = np.where(mask, nearest_nodes_restaurants, np.nan)
dist_b = np.where(mask, dist_restaurants, np.nan)
valid_nodes_restaurants = np.array([nearest_nodes_b, dist_b], np.float64)
valid_nodes_restaurants = np.where(valid_nodes_restaurants[1]<1000, valid_nodes_restaurants[0], np.nan)

# Get the shortest distance by calling the function one cell above that uses nx.shortest_path_length()
popgrid['dist_to_restaurants'] = [calculate_shortest_path(x) for x in popgrid.index]

2.3.4 Fast Food

In [9]:
# Extracting fast food
fast_food = ox.geometries_from_place(area, tags={'amenity': 'fast_food'})
fast_food = fast_food.to_crs(epsg)
fast_food = fast_food[['geometry']]
fast_food = fast_food.reset_index()

# Initiate function
def calculate_shortest_path(row):
    try:
        return nx.shortest_path_length(Gproj, valid_nodes_grid[row], valid_nodes_fast_food[row], weight='length')
    except (nx.NetworkXNoPath, nx.NodeNotFound):
        return None

# Function that is called in the multiprocessing joblib Parallel function
def get_nearest_polygon(row):
    return fast_food.distance(row, align=True).sort_values().index[0]

# Apply multiprocessing using joblib to get the nearest bus stop to each grid polygon
if __name__ == '__main__':
    # initialization with less threads and one row due to kernel crashing otherwise
    polygon_index = Parallel(n_jobs=2, prefer="threads")(delayed(get_nearest_polygon)(row)
                                                         for row in popgrid['geometry'][0:1])
    # all cores and rows
    polygon_index = Parallel(n_jobs=-1, prefer="threads")(delayed(get_nearest_polygon)(row) 
                                                          for row in popgrid['geometry'])

nearest_fast_food = fast_food.loc[polygon_index].geometry.centroid

# List of bus stop nearest nodes and their distances
nearest_nodes_fast_food, dist_fast_food= ox.distance.nearest_nodes(Gproj, nearest_fast_food.geometry.x,
                                                nearest_fast_food.geometry.y, return_dist=True)

# Apply vectorization to get the nearest grid nodes with a threshold
mask = np.vectorize(isinstance)(nearest_nodes_fast_food, int)
nearest_nodes_b = np.where(mask, nearest_nodes_fast_food, np.nan)
dist_b = np.where(mask, dist_fast_food, np.nan)
valid_nodes_fast_food = np.array([nearest_nodes_b, dist_b], np.float64)
valid_nodes_fast_food = np.where(valid_nodes_fast_food[1]<1000, valid_nodes_fast_food[0], np.nan)

# Get the shortest distance by calling the function one cell above that uses nx.shortest_path_length()
popgrid['dist_to_fast_food'] = [calculate_shortest_path(x) for x in popgrid.index]

2.3.5 Daily Shops

In [42]:
#Extracting daily shops
daily_shops = ox.geometries_from_place(area, tags={'shop': ['department_store', 'supermarket', 'convenience']})
daily_shops = daily_shops.to_crs(epsg)
daily_shops = daily_shops[['geometry']]
daily_shops = daily_shops.reset_index()

# Initiate function
def calculate_shortest_path(row):
    try:
        return nx.shortest_path_length(Gproj, valid_nodes_grid[row], valid_nodes_daily_shops[row], weight='length')
    except (nx.NetworkXNoPath, nx.NodeNotFound):
        return None
    
# Function that is called in the multiprocessing joblib Parallel function
def get_nearest_polygon(row):
    return daily_shops.distance(row, align=True).sort_values().index[0]

# Apply multiprocessing using joblib to get the nearest bus stop to each grid polygon
if __name__ == '__main__':
    # initialization with less threads and one row due to kernel crashing otherwise
    polygon_index = Parallel(n_jobs=2, prefer="threads")(delayed(get_nearest_polygon)(row)
                                                         for row in popgrid['geometry'][0:1])
    # all cores and rows
    polygon_index = Parallel(n_jobs=-1, prefer="threads")(delayed(get_nearest_polygon)(row) 
                                                          for row in popgrid['geometry'])

nearest_daily_shops = daily_shops.loc[polygon_index].geometry.centroid

# List of bus stop nearest nodes and their distances
nearest_nodes_daily_shops, dist_daily_shops = ox.distance.nearest_nodes(Gproj, nearest_daily_shops.geometry.x,
                                                nearest_daily_shops.geometry.y, return_dist=True)

# Apply vectorization to get the nearest grid nodes with a threshold
mask = np.vectorize(isinstance)(nearest_nodes_daily_shops, int)
nearest_nodes_b = np.where(mask, nearest_nodes_daily_shops, np.nan)
dist_b = np.where(mask, dist_daily_shops, np.nan)
valid_nodes_daily_shops = np.array([nearest_nodes_b, dist_b], np.float64)
valid_nodes_daily_shops = np.where(valid_nodes_daily_shops[1]<1000, valid_nodes_daily_shops[0], np.nan)

# Get the shortest distance by calling the function one cell above that uses nx.shortest_path_length()
popgrid['dist_to_daily_shops'] = [calculate_shortest_path(x) for x in popgrid.index]

2.3.6 Business Shops

In [53]:
# Extracting business shops
business_shops = ox.geometries_from_place(area, tags={'shop': ['clothes', 'jewelry', 'shoes', 'tailor', 'beauty', 'cosmetics', 'hairdresser',
                                                    'doityourself', 'garden_center', 'hardware', 'mall', 'department_store']})
business_shops = business_shops.to_crs(epsg)
business_shops = business_shops[['geometry']]
business_shops = business_shops.reset_index()

# Initiate function
def calculate_shortest_path(row):
    try:
        return nx.shortest_path_length(Gproj, valid_nodes_grid[row], valid_nodes_business_shops[row], weight='length')
    except (nx.NetworkXNoPath, nx.NodeNotFound):
        return None
    
# Function that is called in the multiprocessing joblib Parallel function
def get_nearest_polygon(row):
    return business_shops.distance(row, align=True).sort_values().index[0]

# Apply multiprocessing using joblib to get the nearest bus stop to each grid polygon
if __name__ == '__main__':
    # initialization with less threads and one row due to kernel crashing otherwise
    polygon_index = Parallel(n_jobs=2, prefer="threads")(delayed(get_nearest_polygon)(row)
                                                         for row in popgrid['geometry'][0:1])
    # all cores and rows
    polygon_index = Parallel(n_jobs=-1, prefer="threads")(delayed(get_nearest_polygon)(row) 
                                                          for row in popgrid['geometry'])

nearest_business_shops= business_shops.loc[polygon_index].geometry.centroid

# List of bus stop nearest nodes and their distances
nearest_nodes_business_shops, dist_business_shops = ox.distance.nearest_nodes(Gproj, nearest_business_shops.geometry.x,
                                                nearest_business_shops.geometry.y, return_dist=True)

# Apply vectorization to get the nearest grid nodes with a threshold
mask = np.vectorize(isinstance)(nearest_nodes_business_shops, int)
nearest_nodes_b = np.where(mask, nearest_nodes_business_shops, np.nan)
dist_b = np.where(mask, dist_business_shops, np.nan)
valid_nodes_business_shops = np.array([nearest_nodes_b, dist_b], np.float64)
valid_nodes_business_shops = np.where(valid_nodes_business_shops[1]<1000, valid_nodes_business_shops[0], np.nan)

# Get the shortest distance by calling the function one cell above that uses nx.shortest_path_length()
popgrid['dist_to_business_shops'] = [calculate_shortest_path(x) for x in popgrid.index]

2.3.7 Greenspace

In [65]:
# Extracting greenspace
greenspace = ox.geometries_from_place(area, tags={'leisure': ['garden', 'nature_reserve', 'park', 'pitch']})
greenspace = greenspace.to_crs(epsg)
greenspace = greenspace[['geometry']]
greenspace = greenspace.reset_index()

# Initiate function
def calculate_shortest_path(row):
    try:
        return nx.shortest_path_length(Gproj, valid_nodes_grid[row], valid_nodes_greenspace[row], weight='length')
    except (nx.NetworkXNoPath, nx.NodeNotFound):
        return None
    
# Function that is called in the multiprocessing joblib Parallel function
def get_nearest_polygon(row):
    return greenspace.distance(row, align=True).sort_values().index[0]

# Apply multiprocessing using joblib to get the nearest bus stop to each grid polygon
if __name__ == '__main__':
    # initialization with less threads and one row due to kernel crashing otherwise
    polygon_index = Parallel(n_jobs=2, prefer="threads")(delayed(get_nearest_polygon)(row)
                                                         for row in popgrid['geometry'][0:1])
    # all cores and rows
    polygon_index = Parallel(n_jobs=-1, prefer="threads")(delayed(get_nearest_polygon)(row) 
                                                          for row in popgrid['geometry'])

nearest_greenspace = greenspace.loc[polygon_index].geometry.centroid

# List of bus stop nearest nodes and their distances
nearest_nodes_greenspace, dist_greenspace = ox.distance.nearest_nodes(Gproj, nearest_greenspace.geometry.x,
                                                nearest_greenspace.geometry.y, return_dist=True)

# Apply vectorization to get the nearest grid nodes with a threshold
mask = np.vectorize(isinstance)(nearest_nodes_greenspace, int)
nearest_nodes_b = np.where(mask, nearest_nodes_greenspace, np.nan)
dist_b = np.where(mask, dist_greenspace, np.nan)
valid_nodes_greenspace = np.array([nearest_nodes_b, dist_b], np.float64)
valid_nodes_greenspace = np.where(valid_nodes_greenspace[1]<1000, valid_nodes_greenspace[0], np.nan)

# Get the shortest distance by calling the function one cell above that uses nx.shortest_path_length()
popgrid['dist_to_greenspace'] = [calculate_shortest_path(x) for x in popgrid.index]

2.3.8 Water Bodies

In [11]:
# Extracting water bodies
water_bodies = ox.geometries_from_place(area, tags={'water': ['lake', 'river', 'canal', 'rapids', 'lagoon']})
water_bodies = water_bodies.to_crs(epsg)
water_bodies = water_bodies[['geometry']]
water_bodies = water_bodies.reset_index()

# Initiate function
def calculate_shortest_path(row):
    try:
        return nx.shortest_path_length(Gproj, valid_nodes_grid[row], valid_nodes_water_bodies[row], weight='length')
    except (nx.NetworkXNoPath, nx.NodeNotFound):
        return None

# Function that is called in the multiprocessing joblib Parallel function
def get_nearest_polygon(row):
    return water_bodies.distance(row, align=True).sort_values().index[0]

# Apply multiprocessing using joblib to get the nearest bus stop to each grid polygon
if __name__ == '__main__':
    # initialization with less threads and one row due to kernel crashing otherwise
    polygon_index = Parallel(n_jobs=2, prefer="threads")(delayed(get_nearest_polygon)(row)
                                                         for row in popgrid['geometry'][0:1])
    # all cores and rows
    polygon_index = Parallel(n_jobs=-1, prefer="threads")(delayed(get_nearest_polygon)(row) 
                                                          for row in popgrid['geometry'])

nearest_water_bodies = water_bodies.loc[polygon_index].geometry.centroid

# List of bus stop nearest nodes and their distances
nearest_nodes_water_bodies, dist_water_bodies= ox.distance.nearest_nodes(Gproj, nearest_water_bodies.geometry.x,
                                                nearest_water_bodies.geometry.y, return_dist=True)

# Apply vectorization to get the nearest grid nodes with a threshold
mask = np.vectorize(isinstance)(nearest_nodes_water_bodies, int)
nearest_nodes_b = np.where(mask, nearest_nodes_water_bodies, np.nan)
dist_b = np.where(mask, dist_water_bodies, np.nan)
valid_nodes_water_bodies = np.array([nearest_nodes_b, dist_b], np.float64)
valid_nodes_water_bodies = np.where(valid_nodes_water_bodies[1]<1000, valid_nodes_water_bodies[0], np.nan)

# Get the shortest distance by calling the function one cell above that uses nx.shortest_path_length()
popgrid['dist_to_water_bodies'] = [calculate_shortest_path(x) for x in popgrid.index]

2.3.9 Gyms

In [24]:
# Extracting gyms
gym = ox.geometries_from_place(area, tags={'leisure': 'fitness_centre'})
gym = gym.to_crs(epsg)
gym = gym[['geometry']]
gym = gym.reset_index()

# Initiate function
def calculate_shortest_path(row):
    try:
        return nx.shortest_path_length(Gproj, valid_nodes_grid[row], valid_nodes_gym[row], weight='length')
    except (nx.NetworkXNoPath, nx.NodeNotFound):
        return None
    
# Function that is called in the multiprocessing joblib Parallel function
def get_nearest_polygon(row):
    return gym.distance(row, align=True).sort_values().index[0]

# Apply multiprocessing using joblib to get the nearest bus stop to each grid polygon
if __name__ == '__main__':
    # initialization with less threads and one row due to kernel crashing otherwise
    polygon_index = Parallel(n_jobs=2, prefer="threads")(delayed(get_nearest_polygon)(row)
                                                         for row in popgrid['geometry'][0:1])
    # all cores and rows
    polygon_index = Parallel(n_jobs=-1, prefer="threads")(delayed(get_nearest_polygon)(row) 
                                                          for row in popgrid['geometry'])

nearest_gym = gym.loc[polygon_index].geometry.centroid

# List of bus stop nearest nodes and their distances
nearest_nodes_gym, dist_gym = ox.distance.nearest_nodes(Gproj, nearest_gym.geometry.x,
                                                nearest_gym.geometry.y, return_dist=True)

# Apply vectorization to get the nearest grid nodes with a threshold
mask = np.vectorize(isinstance)(nearest_nodes_gym, int)
nearest_nodes_b = np.where(mask, nearest_nodes_gym, np.nan)
dist_b = np.where(mask, dist_gym, np.nan)
valid_nodes_gym = np.array([nearest_nodes_b, dist_b], np.float64)
valid_nodes_gym = np.where(valid_nodes_gym[1]<1000, valid_nodes_gym[0], np.nan)

# Get the shortest distance by calling the function one cell above that uses nx.shortest_path_length()
popgrid['dist_to_gym'] = [calculate_shortest_path(x) for x in popgrid.index]

2.3.10 Sport Fields

In [None]:
# Extracting sport fields
sport_fields = ox.geometries_from_place("Greater Manchester", tags={'sport': ['soccer', 'tennis', 'athletics,', 'baseball', 'basketball', 'field_hockey', 'handball',
                                                                        'ice_hockey', 'cricket', 'rugby_league', 'rugby_union', 'softball', 'volleyball']})
sport_fields = sport_fields.to_crs(epsg)
sport_fields = sport_fields[['geometry']]
sport_fields = sport_fields.reset_index()

# Initiate function
def calculate_shortest_path(row):
    try:
        return nx.shortest_path_length(Gproj, valid_nodes_grid[row], valid_nodes_sport_fields[row], weight='length')
    except (nx.NetworkXNoPath, nx.NodeNotFound):
        return None
    
# Function that is called in the multiprocessing joblib Parallel function
def get_nearest_polygon(row):
    return sport_fields.distance(row, align=True).sort_values().index[0]

# Apply multiprocessing using joblib to get the nearest bus stop to each grid polygon
if __name__ == '__main__':
    # initialization with less threads and one row due to kernel crashing otherwise
    polygon_index = Parallel(n_jobs=2, prefer="threads")(delayed(get_nearest_polygon)(row)
                                                         for row in popgrid['geometry'][0:1])
    # all cores and rows
    polygon_index = Parallel(n_jobs=-1, prefer="threads")(delayed(get_nearest_polygon)(row) 
                                                          for row in popgrid['geometry'])

nearest_sport_fields = sport_fields.loc[polygon_index].geometry.centroid

# List of bus stop nearest nodes and their distances
nearest_nodes_sport_fields, dist_sport_fields = ox.distance.nearest_nodes(Gproj, nearest_sport_fields.geometry.x,
                                                nearest_sport_fields.geometry.y, return_dist=True)

# Apply vectorization to get the nearest grid nodes with a threshold
mask = np.vectorize(isinstance)(nearest_nodes_sport_fields, int)
nearest_nodes_b = np.where(mask, nearest_nodes_sport_fields, np.nan)
dist_b = np.where(mask, dist_sport_fields, np.nan)
valid_nodes_sport_fields = np.array([nearest_nodes_b, dist_b], np.float64)
valid_nodes_sport_fields = np.where(valid_nodes_sport_fields[1]<1000, valid_nodes_sport_fields[0], np.nan)

# Get the shortest distance by calling the function one cell above that uses nx.shortest_path_length()
popgrid['dist_to_sport_fields'] = [calculate_shortest_path(x) for x in popgrid.index]

2.3.11 Schools

In [27]:
# Extracting schools
schools = ox.geometries_from_place("Greater Manchester", tags={'amenity': 'school'})
schools = schools.to_crs(epsg)
schools = schools[['geometry']]
schools = schools.reset_index()

# Initiate function
def calculate_shortest_path(row):
    try:
        return nx.shortest_path_length(Gproj, valid_nodes_grid[row], valid_nodes_schools[row], weight='length')
    except (nx.NetworkXNoPath, nx.NodeNotFound):
        return None
    
# Function that is called in the multiprocessing joblib Parallel function
def get_nearest_polygon(row):
    return schools.distance(row, align=True).sort_values().index[0]

# Apply multiprocessing using joblib to get the nearest bus stop to each grid polygon
if __name__ == '__main__':
    # initialization with less threads and one row due to kernel crashing otherwise
    polygon_index = Parallel(n_jobs=2, prefer="threads")(delayed(get_nearest_polygon)(row)
                                                         for row in popgrid['geometry'][0:1])
    # all cores and rows
    polygon_index = Parallel(n_jobs=-1, prefer="threads")(delayed(get_nearest_polygon)(row) 
                                                          for row in popgrid['geometry'])

nearest_schools = schools.loc[polygon_index].geometry.centroid

# List of bus stop nearest nodes and their distances
nearest_nodes_schools, dist_schools = ox.distance.nearest_nodes(Gproj, nearest_schools.geometry.x,
                                                nearest_schools.geometry.y, return_dist=True)

# Apply vectorization to get the nearest grid nodes with a threshold
mask = np.vectorize(isinstance)(nearest_nodes_schools, int)
nearest_nodes_b = np.where(mask, nearest_nodes_schools, np.nan)
dist_b = np.where(mask, dist_schools, np.nan)
valid_nodes_schools = np.array([nearest_nodes_b, dist_b], np.float64)
valid_nodes_schools = np.where(valid_nodes_schools[1]<1000, valid_nodes_schools[0], np.nan)

# Get the shortest distance by calling the function one cell above that uses nx.shortest_path_length()
popgrid['dist_to_schools'] = [calculate_shortest_path(x) for x in popgrid.index]

2.3.12 College / Universities

In [40]:
# Extracting college / universities
coluni = ox.geometries_from_place("Greater Manchester", tags={'amenity': ['college', 'university']})
coluni = coluni.to_crs(epsg)
coluni = coluni[['geometry']]
coluni = coluni.reset_index()

# Initiate function
def calculate_shortest_path(row):
    try:
        return nx.shortest_path_length(Gproj, valid_nodes_grid[row], valid_nodes_coluni[row], weight='length')
    except (nx.NetworkXNoPath, nx.NodeNotFound):
        return None
    
# Function that is called in the multiprocessing joblib Parallel function
def get_nearest_polygon(row):
    return coluni.distance(row, align=True).sort_values().index[0]

# Apply multiprocessing using joblib to get the nearest bus stop to each grid polygon
if __name__ == '__main__':
    # initialization with less threads and one row due to kernel crashing otherwise
    polygon_index = Parallel(n_jobs=2, prefer="threads")(delayed(get_nearest_polygon)(row)
                                                         for row in popgrid['geometry'][0:1])
    # all cores and rows
    polygon_index = Parallel(n_jobs=-1, prefer="threads")(delayed(get_nearest_polygon)(row) 
                                                          for row in popgrid['geometry'])

nearest_coluni = coluni.loc[polygon_index].geometry.centroid

# List of bus stop nearest nodes and their distances
nearest_nodes_coluni, dist_coluni = ox.distance.nearest_nodes(Gproj, nearest_coluni.geometry.x,
                                                nearest_coluni.geometry.y, return_dist=True)

# Apply vectorization to get the nearest grid nodes with a threshold
mask = np.vectorize(isinstance)(nearest_nodes_coluni, int)
nearest_nodes_b = np.where(mask, nearest_nodes_coluni, np.nan)
dist_b = np.where(mask, dist_coluni, np.nan)
valid_nodes_coluni = np.array([nearest_nodes_b, dist_b], np.float64)
valid_nodes_coluni = np.where(valid_nodes_coluni[1]<1000, valid_nodes_coluni[0], np.nan)

# Get the shortest distance by calling the function one cell above that uses nx.shortest_path_length()
popgrid['dist_to_coluni'] = [calculate_shortest_path(x) for x in popgrid.index]

2.3.13 Places of Worship

In [8]:
# Extracting places of worship (e.g., churches, mosques, temples, etc)
place_of_worship = ox.geometries_from_place("Greater Manchester", tags={'amenity': 'place_of_worship'})
place_of_worship = place_of_worship.to_crs(epsg)
place_of_worship = place_of_worship[['geometry']]
place_of_worship = place_of_worship.reset_index()

# Initiate function
def calculate_shortest_path(row):
    try:
        return nx.shortest_path_length(Gproj, valid_nodes_grid[row], valid_nodes_place_of_worship[row], weight='length')
    except (nx.NetworkXNoPath, nx.NodeNotFound):
        return None
    
# Function that is called in the multiprocessing joblib Parallel function
def get_nearest_polygon(row):
    return place_of_worship.distance(row, align=True).sort_values().index[0]

# Apply multiprocessing using joblib to get the nearest bus stop to each grid polygon
if __name__ == '__main__':
    # initialization with less threads and one row due to kernel crashing otherwise
    polygon_index = Parallel(n_jobs=2, prefer="threads")(delayed(get_nearest_polygon)(row)
                                                         for row in popgrid['geometry'][0:1])
    # all cores and rows
    polygon_index = Parallel(n_jobs=-1, prefer="threads")(delayed(get_nearest_polygon)(row) 
                                                          for row in popgrid['geometry'])

nearest_place_of_worship = place_of_worship.loc[polygon_index].geometry.centroid

# List of bus stop nearest nodes and their distances
nearest_nodes_place_of_worship, dist_place_of_worship = ox.distance.nearest_nodes(Gproj, nearest_place_of_worship.geometry.x,
                                                nearest_place_of_worship.geometry.y, return_dist=True)

# Apply vectorization to get the nearest grid nodes with a threshold
mask = np.vectorize(isinstance)(nearest_nodes_place_of_worship, int)
nearest_nodes_b = np.where(mask, nearest_nodes_place_of_worship, np.nan)
dist_b = np.where(mask, dist_place_of_worship, np.nan)
valid_nodes_place_of_worship = np.array([nearest_nodes_b, dist_b], np.float64)
valid_nodes_place_of_worship = np.where(valid_nodes_place_of_worship[1]<1000, valid_nodes_place_of_worship[0], np.nan)

# Get the shortest distance by calling the function one cell above that uses nx.shortest_path_length()
popgrid['dist_to_place_of_worship'] = [calculate_shortest_path(x) for x in popgrid.index]

2.3.14 Hospitals

In [6]:
# Extracting hospitals
hospitals = ox.geometries_from_place("Greater Manchester", tags={'amenity': 'hospital'})
hospitals = hospitals.to_crs("EPSG:27700")
hospitals = hospitals[['geometry']]
hospitals = hospitals.reset_index()

# Initiate function
def calculate_shortest_path(row):
    try:
        return nx.shortest_path_length(Gproj, valid_nodes_grid[row], valid_nodes_hospitals[row], weight='length')
    except (nx.NetworkXNoPath, nx.NodeNotFound):
        return None
    
# Function that is called in the multiprocessing joblib Parallel function
def get_nearest_polygon(row):
    return hospitals.distance(row, align=True).sort_values().index[0]

# Apply multiprocessing using joblib to get the nearest bus stop to each grid polygon
if __name__ == '__main__':
    # initialization with less threads and one row due to kernel crashing otherwise
    polygon_index = Parallel(n_jobs=2, prefer="threads")(delayed(get_nearest_polygon)(row)
                                                         for row in popgrid['geometry'][0:1])
    # all cores and rows
    polygon_index = Parallel(n_jobs=-1, prefer="threads")(delayed(get_nearest_polygon)(row) 
                                                          for row in popgrid['geometry'])

nearest_hospitals = hospitals.loc[polygon_index].geometry.centroid

# List of bus stop nearest nodes and their distances
nearest_nodes_hospitals, dist_hospitals = ox.distance.nearest_nodes(Gproj, nearest_hospitals.geometry.x,
                                                nearest_hospitals.geometry.y, return_dist=True)

# Apply vectorization to get the nearest grid nodes with a threshold
mask = np.vectorize(isinstance)(nearest_nodes_hospitals, int)
nearest_nodes_b = np.where(mask, nearest_nodes_hospitals, np.nan)
dist_b = np.where(mask, dist_hospitals, np.nan)
valid_nodes_hospitals= np.array([nearest_nodes_b, dist_b], np.float64)
valid_nodes_hospitals = np.where(valid_nodes_hospitals[1]<1000, valid_nodes_hospitals[0], np.nan)

# Get the shortest distance by calling the function one cell above that uses nx.shortest_path_length()
popgrid['dist_to_hospitals'] = [calculate_shortest_path(x) for x in popgrid.index]

*Old Code*

In [38]:
# Initiate function
def calculate_shortest_path(row):
    try:
        return nx.shortest_path_length(Gproj, valid_nodes_grid[row], valid_nodes_hospitals[row], weight='length')
    except (nx.NetworkXNoPath, nx.NodeNotFound):
        return None

In [39]:
start_time = time.time()

# Get nearest bus stops based on euclid distance, now via map() and lambda functions
polygon_index = popgrid['geometry'].map(lambda row: hospitals.distance(row, align=True).sort_values().index[0])
nearest_hospitals = hospitals.loc[polygon_index].geometry.centroid

# List of grid centroid nearest nodes and their distances
nearest_nodes, dist = ox.distance.nearest_nodes(Gproj, popgrid['grid_lon'],
                                                popgrid['grid_lat'], return_dist=True)

# Apply vectorization to get the nearest grid nodes with a threshold
mask = np.vectorize(isinstance)(nearest_nodes, int)
nearest_nodes_g = np.where(mask, nearest_nodes, np.nan)
dist_g = np.where(mask, dist, np.nan)
valid_nodes_grid = np.array([nearest_nodes_g, dist_g], np.float64)
valid_nodes_grid = np.where(valid_nodes_grid[1]<1000, valid_nodes_grid[0], np.nan)

# List of bus stop nearest nodes and their distances
nearest_nodes_hospitals, dist_hospitals = ox.distance.nearest_nodes(Gproj, nearest_hospitals.geometry.x,
                                                nearest_hospitals.geometry.y, return_dist=True)

# Apply vectorization to get the nearest grid nodes with a threshold
mask = np.vectorize(isinstance)(nearest_nodes_hospitals, int)
nearest_nodes_b = np.where(mask, nearest_nodes_hospitals, np.nan)
dist_b = np.where(mask, dist_hospitals, np.nan)
valid_nodes_hospitals = np.array([nearest_nodes_b, dist_b], np.float64)
valid_nodes_hospitals = np.where(valid_nodes_hospitals[1]<1000, valid_nodes_hospitals[0], np.nan)

# Get the shortest distance by calling the function one cell above that uses nx.shortest_path_length()
popgrid['dist_to_hospitals'] = [calculate_shortest_path(x) for x in popgrid.index]

end_time = time.time()
execution_time = end_time - start_time

print(f"Execution time: {execution_time} seconds")

Execution time: 10229.70056772232 seconds


### 3. Analyzing and exporting the output

In [13]:
popgrid

Unnamed: 0,level_0,index,dissolve_key,row3,col3,population,grid_lon,grid_lat,grid_lon_4326,grid_lat_4326,...,dist_to_gym,dist_to_schools,dist_to_coluni,dist_to_place_of_worship,dist_to_sport_fields,dist_to_hospitals,geometry,center,dist_to_fast_food,dist_to_water_bodies
0,0,0,10-621,10,621,0,390427.904831,420645.529660,-2.146435,53.682200,...,11395.616,4949.030,15775.918,4621.159,5797.481,6023.933,"POLYGON ((390417.254 420647.897, 390433.850 42...","(53.68220010828345, -2.1464348064045695)",5154.379,7924.513
1,1,1,10-628,10,628,0,390871.732026,420671.933032,-2.139770,53.682474,...,11395.616,4949.030,15775.918,4621.159,5797.481,6023.933,"POLYGON ((390837.929 420707.144, 390893.725 42...","(53.682474316822464, -2.139769932112907)",5154.379,7924.513
2,2,2,10-629,10,629,0,390925.124111,420675.744940,-2.138872,53.682475,...,11119.316,4672.730,15499.618,4344.859,5521.181,5747.633,"POLYGON ((390893.725 420707.144, 390956.523 42...","(53.68247535958708, -2.1388721697636233)",4878.079,7648.213
3,3,3,10-630,10,630,0,390987.922453,420675.744940,-2.137921,53.682476,...,11119.316,4672.730,15499.618,4344.859,5521.181,5747.633,"POLYGON ((390956.523 420707.144, 391019.322 42...","(53.682476456797296, -2.1379213962979837)",4878.079,7648.213
4,4,4,10-631,10,631,0,391050.720795,420675.744940,-2.136971,53.682478,...,11119.316,4672.730,15499.618,4344.859,5521.181,5747.633,"POLYGON ((391019.322 420707.144, 391082.120 42...","(53.68247754645913, -2.136970622769079)",4878.079,7648.213
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
324747,324747,324747,99-740,99,740,0,397895.740085,415086.692492,-2.033295,53.632316,...,11393.764,7429.367,12725.301,7288.524,4525.565,9232.092,"POLYGON ((397864.341 415118.092, 397927.139 41...","(53.63231598685202, -2.0332945497947104)",7403.964,5628.122
324748,324748,324748,99-741,99,741,0,397958.538427,415086.692492,-2.032345,53.632316,...,11393.764,7429.367,12725.301,7288.524,4525.565,9232.092,"POLYGON ((397927.139 415118.092, 397989.938 41...","(53.63231623603037, -2.032344900788797)",7403.964,5628.122
324749,324749,324749,99-742,99,742,0,398021.336769,415086.692492,-2.031395,53.632316,...,11393.764,7429.367,12725.301,7288.524,4525.565,9232.092,"POLYGON ((397989.938 415118.092, 398052.736 41...","(53.63231647767457, -2.031395251768364)",7403.964,5628.122
324750,324750,324750,99-743,99,743,0,398084.135111,415086.692492,-2.030446,53.632317,...,11393.764,7429.367,12725.301,7288.524,4525.565,9232.092,"POLYGON ((398052.736 415118.092, 398115.534 41...","(53.632316711784654, -2.0304456027338516)",7403.964,5628.122


In [14]:
popgrid.dtypes

level_0                        int64
index                          int64
dissolve_key                  object
row3                           int64
col3                           int64
population                     int64
grid_lon                     float64
grid_lat                     float64
grid_lon_4326                float64
grid_lat_4326                float64
buffer_population            float64
buffer_area                  float64
buffer_pop_density           float64
dist_to_restaurant           float64
dist_to_bus_stop             float64
dist_to_daily_shops          float64
dist_to_business_shops       float64
dist_to_greenspace           float64
dist_to_train_stations       float64
dist_to_gym                  float64
dist_to_schools              float64
dist_to_coluni               float64
dist_to_place_of_worship     float64
dist_to_sport_fields         float64
dist_to_hospitals            float64
geometry                    geometry
center                        object
d

In [16]:
gdf = popgrid.drop('center', axis=1)

In [17]:
gdf.to_file('popgridmanchesternew.gpkg', driver='GPKG')