In [2]:
import numpy as np
import pandana as pdna
import geopandas as gpd
import pandas as pd
import math
import networkx as nx
import sys
# adding functions 
sys.path.insert(0, 'C:\\Users\\z3258367\\OneDrive - UNSW\\#PhD\\Walkability\\Other Cities\\Open-Walk-Index')
from walkability_functions import *

Choose a projected CRS to be used for all distance calculations.

In [3]:
proj_crs = "EPSG:7856"

## Import Data

Data sources:
1. Shape of Greater Adelaide - used to clip points if not already clipped to the city
2. Points of interest from OSM
3. Adelaide Metro public transport stops
4. ? additional POIs - haven't found any additional sources for South Australia yet.
5. Employment data - processed from ABS originally

In [4]:
folder = "C:\\Users\\z3258367\\OneDrive - UNSW\\#PhD\\Walkability\\Other Cities\\Adelaide Data\\"
Greater_Adelaide = gpd.read_file((folder + 
    "Adelaide GCCSA.gpkg")
    ).to_crs(proj_crs)

In [5]:
osm_poi_points = gpd.read_file(''.join((folder, 
    "Adelaide OSM POI points.gpkg")))
osm_poi_areas = gpd.read_file(folder + 
    "Adelaide OSM POI area centroids.gpkg")
osm_transport_points = gpd.read_file(folder +
    "Adelaide OSM transport points.gpkg")
osm_transport_areas =  gpd.read_file(folder +
    "Adelaide OSM transport areas.gpkg")
osm_parks_vertices = gpd.read_file(''.join(("C:\\Users\\z3258367\\OneDrive - UNSW\\#PhD\\Walkability\\Other Cities", 
    "\\Shared Aus Data\\OSM parks vertices.gpkg")))

In [6]:
metro = gpd.read_file(folder + "unique_stops_types.csv")
metro.geometry = gpd.points_from_xy(metro['stop_lon'], metro['stop_lat'])
metro.crs = "EPSG:7843"
metro = metro.to_crs(proj_crs)

In [7]:
employment_centrs = gpd.read_file(folder + "SA Employment Points.gpkg").to_crs(proj_crs)

Convert polygonal datasets to points and any multipart datasets to single part.

In [8]:
osm_pois_2 = single_points(osm_poi_areas)
osm_transport_2 = single_points(osm_transport_areas)
osm_parks_vertices = single_points(osm_parks_vertices)

osm_df = pd.concat([osm_poi_points, osm_pois_2, osm_transport_points, 
                    osm_transport_2, osm_parks_vertices]).to_crs(proj_crs)

### Categorise and weight POIs

Categorise POI data - change classes depending on your analysis and your data sources.

In [9]:
metro_categories = {'transport':['bus'], 'trains':['train','tram']}

metro_categorised = categorise_pois(metro, metro_categories, 
                                 category_column='service_type')

osm_categories = {"eating" : ['restaurant', 'pub', 'cafe', 'fast_food', 
                              'food_court', 'bakery', 'bar', 'nightclub', 'biergarten'], 
                  'groceries' : ['supermarket', 'chemist', 'pharmacy', 'greengrocer', 
                                 'convenience', 'butcher', 'beverages', 'alcohol'], 
                  'shopping' : ['mall', 'bicycle_shop', 'clothes', 
                                'department_store', 'doityourself', 
                                'outdoor_shop', 'stationery', 'bookshop', 
                                'gift_shop', 'newsagent', 'car_dealership', 
                                'kiosk', 'furniture_shop', 'sports_shop', 
                                'garden_centre', 'computer_shop', 'shoe_shop', 
                                'beauty_shop', 'florist', 'video_shop', 'toy_shop', 
                                'mobile_phone_shop', 'jeweller', 'travel_agent'], 
                  'errands' : ['post_box', 'post_office', 'bank', 'atm',
                               'doctors', 'dentist', 'laundry', 'hospital',
                               'car_wash', 'veterinary', 'hairdresser', 'optician'], 
                  'parks' : ['viewpoint', 'park', 'playground', 'picnic_site', 
                             'pitch', 'swimming_pool', 'sports_centre', 
                             'golf_course', 'track', 'dog_park'], 
                  'education' : ['college', 'school', 'kindergarten', 'university'], 
                  'entertainment' : ['library', 'attraction', 'stadium', 
                                     'arts_centre', 'theatre', 'artwork', 
                                     'archaeological', 'cinema', 'museum', 
                                     'ruins', 'observation_tower', 
                                     'community_centre', 'zoo', 'castle', 
                                     'theme_park', 'ice_rink'], 
                 'trains' : ['ferry_terminal', 'railway_station', 'bus_station', 
                             'tram_stop', 'railway_halt', 'publictransport'], 
                 'transport' : ['car_sharing', 'bus_stop']}

osm_categorised = categorise_pois(osm_df, osm_categories, 
                                  category_column='fclass')

Tags present in the dataset but not categorised:
[]
Tags present in the dataset but not categorised:
['monument' 'camera_surveillance' 'hotel' 'town_hall' 'toilet' 'police'
 'motel' 'courthouse' 'drinking_water' 'telephone' 'memorial' 'guesthouse'
 'tourist_info' 'comms_tower' 'camp_site' 'car_rental' 'water_tower'
 'recycling_glass' 'bench' 'caravan_site' 'bicycle_rental' 'fire_station'
 'shelter' 'hostel' 'fountain' 'recycling' 'waste_basket' 'tower' 'prison'
 'vending_machine' 'chalet' 'vending_any' 'recycling_paper' 'graveyard'
 'embassy' 'vending_parking' 'windmill' 'public_building' 'water_works'
 'nursing_home' 'fort' 'wastewater_plant' 'lighthouse' 'water_well' 'taxi']


Need to remove potential overlap between different data sources (and inside some data sources). For this dataset it's around 30% because there is overlap of public transport stops between OSM and transport agencies, and overlap of things like post offices between OSM and SSNSW. Then take this combined POI set and clip it to the study area: should be the same area as is covered by the network. This is important otherwise points outside the network may be erroneously linked to the network.

In [10]:
pois = remove_duplicate_pois([osm_categorised, 
                              metro_categorised], buffer=10)

pois = gpd.clip(pois, Greater_Adelaide)

Removed 39.54% duplicate points from dataframes


Choose walk index weightings, and output the sums of each category and the total to check. The walk index will be out of 100 regardless of this sum, but it is important to note that eg. shopping is only '10% of the walk index' if shopping is 10 out of 100.

In [11]:
poi_weights = {
    "employment": [10],
    "eating": [3, 3, 3, 2, 2, 1, 1, 1, 1, 1],
    "groceries": [10, 4],
    "shopping": [2, 2, 2, 2, 2],
    "errands": [6, 2, 4],
    "parks": [6],
    "education": [10],
    "entertainment": [5],
    "trains": [10],
    "transport": [2.5, 2.5]
}

In [12]:
category_sums = {k: sum(v) for k, v in poi_weights.items()}
total = sum(category_sums.values())
print(category_sums)
print("total: ", total)

{'employment': 10, 'eating': 18, 'groceries': 14, 'shopping': 10, 'errands': 12, 'parks': 6, 'education': 10, 'entertainment': 5, 'trains': 10, 'transport': 5.0}
total:  100.0


### Import network

In this case the network is already in the same projected CRS as everything else but I have left in the transformation to be clear.

In [28]:
# reading directly with geopandas.read_file crashes on my computer so I read into pandas then convert to gdf instead
edges_df = pd.read_csv("adelaide_edges_5_parks.csv")
nodes_df = pd.read_csv("adelaide_nodes_5.csv")
edges = gpd.GeoDataFrame(edges_df, 
                         geometry=gpd.GeoSeries.from_wkt(edges_df['geometry'])).set_crs(proj_crs)
nodes = gpd.GeoDataFrame(nodes_df, 
                         geometry=gpd.GeoSeries.from_wkt(nodes_df['geometry'])).set_crs(proj_crs)
edges = edges.to_crs(proj_crs)
nodes = nodes.to_crs(proj_crs)

  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)


<class 'str'>
<class 'str'>


Pandana expects edges to have a two item index based on the same IDs as the node index.

In [29]:
nodes.set_index('connect_id',inplace=True)

edges['from_idx'] = edges['from']
edges['to_idx'] = edges['to']
edges= edges.set_index(['from_idx', 'to_idx'])
edges.index.names= ['from_idx','to_idx']

Pandana network creation.

In [30]:
distance_network = pdna.Network(nodes['x'], nodes['y'],
                                   edges['from'], edges['to'], 
                                   edges[['length']])

maximum_dist = 2400

Pandana network querying. The 'employment' category is empty because we didn't add the employment points to the POI dataset.

In [31]:
results = walk_index(distance_network, pois, poi_weights, distance=maximum_dist)  

Category employment is empty
Finished category: eating
Finished category: groceries
Finished category: shopping
Finished category: errands
Finished category: parks
Finished category: education
Finished category: entertainment
Finished category: trains
Finished category: transport


In [32]:
results['parks1']

connect_id
0          115.042999
1          125.544998
2           62.541000
3           21.211000
4          190.306000
              ...    
1813031     27.712999
1813032      9.666000
1813034     22.007000
1813035     18.525000
1813036      8.803000
Name: parks1, Length: 1722184, dtype: float64

In [33]:
results.to_csv("Adelaide_colournoemployment_180222.csv")

### Employment

The current approach is to find up to 100 closest employment nodes within the maximum distance. Then look up the number of jobs at each one, apply a distance decay function to each distance, multiply these together, and sum.

An alternative approach which would be more convenient would be to use the Pandana 'aggregate' function which aggregates from all nodes within the maximum distance. However, there is limited ability to change the distance decay rate within the aggregation function. It can either be flat (no decay), linear (going to 0 at the max distance), or exponential where beta is set as 1/max distance. For walking I would like a beta of 0.001, but this requires the radius to be 1000m. If the radius is 2400m, beta is only 0.0004. This can be changed in the future if the Pandana function is updated to take a decay parameter.

In [34]:
x, y = (employment_centrs['geometry'].x, employment_centrs['geometry'].y)

distance_network.set_pois(category='employment', maxdist=maximum_dist, maxitems=100, x_col=x, y_col=y)

employment_access = distance_network.nearest_pois(
    distance=maximum_dist, category='employment', num_pois=100, include_poi_ids=True)

The nearest_pois function returns both distances and the IDs of the nearest pois (with include_poi_ids option). The IDs can then be used to retrieve the number of jobs at each point. I found a merge was the fastest way to join this data.

In [23]:
def itermerge(dataframe, jobs):
    i=0
    for column in dataframe:
        dataframe = dataframe.merge(jobs, how='left', left_on = column, right_index = True, suffixes = [None, i])
        i = i + 1
    return dataframe

def access_weight(x):
        beta = -0.001
        if x == maximum_dist:
            return 0
        else:
            return math.exp(beta*x)

In [35]:
jobcounts = itermerge(employment_access.iloc[:,100:200], employment_centrs['MB Job Count'])

results['jobs'] = ((employment_access.iloc[:,0:100].applymap(access_weight))*
                                jobcounts.iloc[:,100:200].values
                                ).sum(axis=1)

weight = 100*sum(poi_weights['employment'])/sum(sum(list(poi_weights.values()),[]))

results['employment'] = weight*results['jobs']/max(results['jobs'])

results['Walk_Index'] = results['Walk_Index'] + results['employment']

## Export results

Filter the results to the original Colouring Sydney buildings only. Optionally export results as a csv.

In [36]:
building_results = results.filter(items=nodes[nodes['connect_type'] == 'poi'].index, axis=0)

In [37]:
building_results.to_csv("Colouring_bf_results_180221.csv")

Import building footprints and join the data to them, then export these polygons.

In [38]:
results_gdf = gpd.GeoDataFrame(building_results, geometry = gpd.GeoSeries.from_xy(building_results.x, building_results.y, crs="EPSG:7856"))

buildings_foot = gpd.read_file(folder + "adelaide_bf.shp").to_crs(proj_crs)

# join to data
buildings_foot = gpd.sjoin(buildings_foot, results_gdf, how='left', predicate='contains')

buildings_foot.to_file("Colouring_bf_results_180221.gpkg")