In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import pandana as pdna
import geopandas as gpd
import pandas as pd
import osmnx as ox
import networkx as nx
import sys
#import matplotlib.pyplot as plt


In [3]:
# adding functions 
sys.path.insert(0, '/home/vagvaf/projects/THERE')
from walkability_functions import *

## Config variables 

before proceeding set up various config variables

In [20]:
# working folder
folder = "/home/vagvaf/projects/THERE/#Example Notebooks/Goteborg"

#choose a projected CRS to be used for all distance calculations.
proj_crs = "EPSG:3006"

#study area boundaries
study_area = 'Goteborg_metropolitan_dissolve.gpkg'

#poi dictionary

poi_dictionary = {
    'employment':{
        'category':['employment']
    },
    'shopping':{
        'shop':['bakery', 'clothes', 'supermarket', 'mall', 'greengrocer',
                'seafood', 'wine', 'butcher','convenience',
                'beverages', 'alcohol', 'bicycle_shop', 'department_store', 
                'doityourself', 'beauty_shop', 'outdoor_shop', 
                'stationery', 'bookshop', 'gift_shop', 'newsagent', 
                'car_dealership', 'furniture_shop', 'sports_shop',
                'garden_centre', 'computer_shop', 'shoe_shop', 'florist', 
                'video_shop', 'toy_shop', 'mobile_phone_shop', 'jeweller'],
        # possibly we could pick up all shop=True excluding a few. but not sure how
        # and many options to exclude
        'amenity':['marketplace'],
        'building':['kiosk', 'supermarket',],
    },
    'errands':{
        'amenity':['atm','bank','courthouse','post_box', 'post_office',
                   'clinic', 'dentist', 'doctors', 'hospital',
                   'pharmacy', 'veterinary', 'travel_agent',
                   'place_of_worship'],
        'shop':['optician', 'hairdresser', 'laundry',],
        'healthcare':['physiotherapist'],
        'office':['government'], #### further refine ?
    },
    'recreation':{
        'leisure':['dog_park', 'ice_rink', 'park', 'pitch', 'playground',
                   "fitness_centre","sports_centre", 'stadium', 'swimming_pool',
                   'swimming_area', 'track', 'water_park','golf_course',],
        'club':['social'],
        'amenity':['bar', 'biergarten', 'cafe', 'fast_food', 'food_court',
                   'ice_cream', 'pub', 'restaurant', 'nightclub',
                   'library', 'arts_centre', 'cinema', 'community_centre',
                   'social_centre', 'theatre',],
        'building':['stadium', 'castle', 'ruins',],
        'tourism':['aquarium', 'artwork', 'attraction', 'gallery',
                   'museum', 'picnic_site', 'theme_park', 'viewpoint',
                   'zoo'],
        'natural':['beach'],
    },
    'education':{
        'amenity':['college', 'kindergarten', 'music_school',
                   'school', 'university', 'childcare'],
    },
    'grocery':{
        'category':['grocery']
    }
}

# change this to match the column containting job counts in your employment dataframe
#poi_variables.loc['employment'] = 'Jobs_count'

# change this to make employment score more accurate (higher number) or to reduce index calculation time (low number)
#poi_nums.loc['employment'] = 1200

#network related configs
maximum_dist=2400


#results
output_file = 'Goteborg_results_walk_230723.gpkg'

In [6]:
# based on https://commons.wikimedia.org/wiki/File:Metropolitan_Gothenburg_ver.3.PNG
place_gdf = gpd.read_file(study_area, crs="EPSG:4326").to_crs(proj_crs)

In [7]:
# check that we have the right area
#place_gdf.geometry.plot()

## Import Data

Data sources:
1. Grocery stores data

### Add grocery store data

In [8]:
# import grocery_stores.gpkg from Data/Sweden
grocery_stores = gpd.read_file('grocery_stores.gpkg').to_crs(proj_crs)

In [9]:
# add category column, all values 'grocery' to grocery_stores
grocery_stores['category'] = 'grocery'
                                                                                        

In [10]:
#pois = pd.concat([osm_pois, employment_centrs])
pois = grocery_stores

pois = gpd.clip(pois, place_gdf.to_crs(proj_crs))

### Categorise and weight POIs

Choose walk index weightings, and output the sums of each category and the total to check. The walk index will be out of 100 regardless of this sum, but it is important to note that eg. shopping is only '10% of the walk index' if shopping is 10 out of 100.

In [11]:
poi_parameters = pd.read_csv(("poi_parameters.csv"), index_col=0)

In [12]:
poi_weights = poi_parameters['weight'].copy()

poi_lambdas = poi_parameters['diminishing_returns_constant'].copy()

poi_variables = poi_parameters['variable'].copy()

poi_nums = poi_parameters['num_pois'].copy()

poi_gammas = poi_parameters['distance_constant'].copy()

In [13]:
total = sum(poi_weights)
print("total: ", total, "\n", poi_weights)

total:  100 
 category
employment      0
education       0
shopping        0
errands         0
recreation      0
grocery       100
Name: weight, dtype: int64


### Import network

Pandana expects edges to have a two item index based on the same IDs as the node index. (with thanks to https://github.com/shriv/accessibility-series/blob/master/Accounting%20for%20hills%20in%20accessibility%20analyses.ipynb)

In [14]:
#G = ox.graph_from_place(place, network_type='walk')
G = ox.graph.graph_from_polygon(place_gdf.to_crs('EPSG:4326').geometry[0], network_type='walk')

In [15]:
ox.io.save_graphml(G, filepath=None, gephi=False, encoding='utf-8')

In [16]:
# Get nodes and edges as geodataframes (gdfs) from OSMNX network
graph_df = ox.graph_to_gdfs(G)
nodes_gdfs = graph_df[0].to_crs(proj_crs)
edges_gdfs = graph_df[1].to_crs(proj_crs)

## Pandana network creation.

In [17]:
# with new OSMnx graph from polygon seems to be different
edges_gdfs = edges_gdfs.reset_index()
# Setting indices of Edges gdfs to match expected dataframe for Pandana
edges_gdfs['from_idx'] = edges_gdfs['u']
edges_gdfs['to_idx'] = edges_gdfs['v']
edges_gdfs= edges_gdfs.set_index(['from_idx', 'to_idx'])
edges_gdfs.index.names= ['','']

# Setting indices of Nodes gdfs to match expected dataframe for Pandana
nodes_gdfs.index.name = 'id'
# Create a pandana network with data extracted from an OSMNX graph
distance_network = pdna.Network(nodes_gdfs.geometry.x, nodes_gdfs.geometry.y,
                                   edges_gdfs['u'], edges_gdfs['v'], 
                                   edges_gdfs[['length']])

Generating contraction hierarchies with 8 threads.
Setting CH node vector of size 220082
Setting CH edge vector of size 586106
Range graph removed 607502 edges of 1172212
. 10% . 20% . 30% . 40% . 50% . 60% . 70% . 80% . 90% . 100%


### Pandana network querying. 

In [21]:
results_walk = there_index(distance_network, grocery_stores, poi_dictionary, poi_weights, poi_gammas,
                            poi_nums, poi_lambdas, poi_variables, distance=maximum_dist)

No pois in category: employment
Finished category: employment
Maximum score: 0 out of 0
No pois in category: education
Finished category: education
Maximum score: 0 out of 0
No pois in category: shopping
Finished category: shopping
Maximum score: 0 out of 0
No pois in category: errands
Finished category: errands
Maximum score: 0 out of 0
No pois in category: recreation
Finished category: recreation
Maximum score: 0 out of 0
Finished category: grocery
Maximum score: 72.97092241434756 out of 100


Note that the mean below is based on every network point, in the case of Greater Gothenburg this includes a lot of empty areas. To get a better indicator of average access for the population, need to produce population-weighted results as in section below.

In [22]:
max(results_walk['THERE_Index']), np.mean(results_walk['THERE_Index'])

(72.97092241434756, 12.276192236634376)

The current approach is to find up to x closest nodes for each category within the maximum distance. Then look up the attractiveness at each one (just count for most), apply a distance decay function to each distance, apply diminishing returns to the resulting total opportunity, and sum.

An alternative approach which would be more convenient would be to use the Pandana 'aggregate' function which aggregates from all nodes within the maximum distance including applying a decay function. However, there is limited ability to change the distance decay rate within the aggregation function. It can either be flat (no decay), linear (going to 0 at the max distance), or exponential where beta is set as 1/max distance. For walking I would like a beta of 0.001, but this requires the radius to be 1000m. If the radius is 2400m, beta is only 0.0004. This can be changed in the future if the Pandana function is updated to take a decay parameter.

## Export results

Reduce the number of decimal places before export, distances to destinations do not need to be below 1m.

In [23]:
def result_rounding(results):
    # reduces results size for export
    # score columns such as THERE_Index, employment_subtotal etc -> 3 decimal places
    # distance columns such as employment1 -> 0 decimal places (nearest metre)
    # avoid doing anything to connect_id, x or y
    rounding_dict = {**{k:3 for k in results.columns if "Index" in k or "." in k
                        and 'connect_id' not in k},
                     **{k:0 for k in results.columns if "Index" not in k and "." not in k
                        and k != 'x'
                        and k != 'y'}}
    return results.round(rounding_dict)

In [24]:
small_results = gpd.GeoDataFrame(result_rounding(results_walk), geometry = gpd.GeoSeries.from_xy(results_walk.x, results_walk.y, crs=proj_crs))

In [25]:
small_results.to_file(output_file)