In [5]:
import numpy as np
import pandana as pdna
import geopandas as gpd
import pandas as pd
import math
import networkx as nx
import sys
# adding functions 
sys.path.insert(0, 'C:\\Users\\z3258367\\OneDrive - UNSW\\#PhD\\Walkability\\Other Cities\\Open-Walk-Index')
from walkability_functions import *

Choose a projected CRS to be used for all distance calculations.

In [6]:
proj_crs = "EPSG:7855"

## Import Data

Data sources:
1. Shape of Greater Melbourne - used to clip points if not already clipped to the city
2. Points of interest from OSM
3. PTV public transport stops
4. Additional POIs from VicMaps Features of Interest collection
5. Employment data - processed from ABS originally

In [7]:
folder = "C:\\Users\\z3258367\\OneDrive - UNSW\\#PhD\\Walkability\\Other Cities\\Colouring data & results\\Melbourne Data\\"
data = "C:\\Users\\z3258367\\OneDrive - UNSW\\#PhD\\Data\\"
Greater_Melbourne = gpd.read_file((folder + 
                                   "Greater_Melbourne_GCCSA_2016.shp")
                                 ).to_crs(proj_crs)

In [8]:
osm_poi_points = gpd.read_file(''.join((data, 
    "OSM-australia-latest-free\\gis_osm_pois_free_1.shp")))
osm_poi_areas = gpd.read_file(data + 
    "OSM-australia-latest-free\\gis_osm_pois_a_free_1.shp")
osm_transport_points = gpd.read_file(data +
    "OSM-australia-latest-free\\gis_osm_transport_free_1.shp")
osm_transport_areas =  gpd.read_file(data +
    "OSM-australia-latest-free\\gis_osm_transport_a_free_1.shp")
osm_parks_vertices = gpd.read_file(''.join((data, 
    "OSM-australia-latest-free\\OSM parks vertices.gpkg")))

Convert polygonal datasets to points and any multipart datasets to single part. Clip OSM data to Greater Melbourne.

In [9]:
osm_pois_2 = single_points(osm_poi_areas)
osm_transport_2 = single_points(osm_transport_areas)
osm_parks_vertices = single_points(osm_parks_vertices)

osm_df = pd.concat([osm_poi_points, osm_pois_2, osm_transport_points, 
                    osm_transport_2, osm_parks_vertices]).to_crs(proj_crs)

osm_df = gpd.clip(osm_df, Greater_Melbourne)

Import PTV data - already clipped to Greater Melbourne area, VicMaps data, and employment data (prepared using Employment points.ipnyb and ABS data).
I have added specific service type columns at this point, even though they will be rolled into 'train' and 'other' for now, to make it easier to change the categories later if desired.

In [10]:
bus = gpd.read_file(folder + "PTV data\\PTV_METRO_BUS_STOP.SHP").assign(fclass='bus')
regional_bus = gpd.read_file(folder + "PTV data\\PTV_REGIONAL_BUS_STOP.SHP").assign(fclass='regional_bus')
tram = gpd.read_file(folder + "PTV data\\PTV_METRO_TRAM_STOP.SHP").assign(fclass='tram')
coach = gpd.read_file(folder + "PTV data\\PTV_REGIONAL_COACH_STOP.SHP").assign(fclass='coach')
train = gpd.read_file(folder + "PTV data\\PTV_REGIONAL_TRAIN_STATION.SHP").assign(fclass='regional_train')
regional_train = gpd.read_file(folder + "PTV data\\PTV_METRO_TRAIN_STATION.SHP").assign(fclass='train')

PTV = pd.concat([bus, regional_bus, tram, coach, train, regional_train]).to_crs(proj_crs)

In [11]:
vicmaps_points = gpd.read_file(folder + "VicMap Features of Interest\\FOI_POINT.shp")
vicmaps_areas = gpd.read_file(folder + "VicMap Features of Interest\\VMFOI.gdb")

vicmaps = pd.concat([vicmaps_points, vicmaps_areas]).to_crs(proj_crs)

vicmaps = gpd.clip(vicmaps, Greater_Melbourne)

In [68]:
employment_centrs = gpd.read_file(folder + "Vic_Employment_meshblocks.gpkg").to_crs(proj_crs)

### Categorise and weight POIs

Categorise POI data - change classes depending on your analysis and your data sources.

In [13]:
metro_categories = {'transport':['bus', 'regional_bus', 'tram'], 
                    'trains':['train', 'regional_train', 'coach']}

metro_categorised = categorise_pois(PTV, metro_categories, 
                                 category_column='fclass')

Tags present in the dataset but not categorised:
[]


In [14]:
osm_categories = {"eating" : ['restaurant', 'pub', 'cafe', 'fast_food', 
                              'food_court', 'bakery', 'bar', 'nightclub', 'biergarten'], 
                  'groceries' : ['supermarket', 'chemist', 'pharmacy', 'greengrocer', 
                                 'convenience', 'butcher', 'beverages', 'alcohol'], 
                  'shopping' : ['mall', 'bicycle_shop', 'clothes', 
                                'department_store', 'doityourself', 
                                'outdoor_shop', 'stationery', 'bookshop', 
                                'gift_shop', 'newsagent', 'car_dealership', 
                                'kiosk', 'furniture_shop', 'sports_shop', 
                                'garden_centre', 'computer_shop', 'shoe_shop', 
                                'beauty_shop', 'florist', 'video_shop', 'toy_shop', 
                                'mobile_phone_shop', 'jeweller', 'travel_agent'], 
                  'errands' : ['post_box', 'post_office', 'bank', 'atm',
                               'doctors', 'dentist', 'laundry', 'hospital',
                               'car_wash', 'veterinary', 'hairdresser', 'optician'], 
                  'parks' : ['viewpoint', 'park', 'playground', 'picnic_site', 
                             'pitch', 'swimming_pool', 'sports_centre', 
                             'golf_course', 'track', 'dog_park'], 
                  'education' : ['college', 'school', 'kindergarten', 'university'], 
                  'entertainment' : ['library', 'attraction', 'stadium', 
                                     'arts_centre', 'theatre', 'artwork', 
                                     'archaeological', 'cinema', 'museum', 
                                     'ruins', 'observation_tower', 
                                     'community_centre', 'zoo', 'castle', 
                                     'theme_park', 'ice_rink'], 
                 'trains' : ['ferry_terminal', 'railway_station', 'bus_station', 
                             'tram_stop', 'railway_halt', 'publictransport'], 
                 'transport' : ['car_sharing', 'bus_stop']}

osm_categorised = categorise_pois(osm_df, osm_categories, 
                                  category_column='fclass')

Tags present in the dataset but not categorised:
['toilet' 'bench' 'drinking_water' 'shelter' 'camp_site' 'monument'
 'memorial' 'fire_station' 'telephone' 'tourist_info' 'hunting_stand'
 'camera_surveillance' 'waste_basket' 'motel' 'caravan_site' 'graveyard'
 'fountain' 'guesthouse' 'water_tower' 'tower' 'police' 'public_building'
 'vending_any' 'hotel' 'nursing_home' 'comms_tower' 'vending_parking'
 'recycling' 'lighthouse' 'wastewater_plant' 'bicycle_rental'
 'bed_and_breakfast' 'courthouse' 'town_hall' 'car_rental'
 'vending_machine' 'taxi' 'hostel' 'water_well' 'water_works'
 'recycling_clothes' 'recycling_glass' 'chalet' 'prison' 'embassy'
 'recycling_paper' 'alpine_hut']


In the VicMaps data, the 'community space' collection is caravan parks, camping areas (generally outside Greater Melbourne) and rest areas, not considered relevant. The 'community venue' collection is community centres, halls, senior clubs, scouts etc.

In [15]:
vicmaps_categories = {"eating" : [], 
                  'groceries' : [], 
                  'shopping' : [], 
                  'errands' : ['hospital', 'health facility', 'place of worship'], 
                  'parks' : ['recreational resource', 'reserve','sport facility'], 
                  'education' : ['education centre'], 
                  'entertainment' : ['cultural centre', 'commercial facility', 'community venue'], 
                 'trains' : [], 
                 'transport' : []}


vicmaps_categorised = categorise_pois(vicmaps, vicmaps_categories, 
                                  category_column='FTYPE')

Tags present in the dataset but not categorised:
['sign' 'landmark' 'care facility' 'emergency facility'
 'communication service' nan 'community space' 'admin facility'
 'dumping ground' 'control point' 'excavation site' 'place'
 'storage facility' 'pipeline facility' 'defence site']


Need to remove potential overlap between different data sources (and inside some data sources). For this dataset it's around 30% because there is overlap of public transport stops between OSM and transport agencies, and overlap of places like parks and schools between OSM and VicMaps. Then take this combined POI set and clip it to the study area: should be the same area as is covered by the network. This is important otherwise points outside the network may be erroneously linked to the network.

In [16]:
pois = remove_duplicate_pois([osm_categorised, vicmaps_categorised,
                              metro_categorised], buffer=10)

pois = gpd.clip(pois, Greater_Melbourne)

Removed 35.67% duplicate points from dataframes


Choose walk index weightings, and output the sums of each category and the total to check. The walk index will be out of 100 regardless of this sum, but it is important to note that eg. shopping is only '10% of the walk index' if shopping is 10 out of 100.

In [17]:
poi_weights = {
    "employment": [10],
    "eating": [3, 3, 3, 2, 2, 1, 1, 1, 1, 1],
    "groceries": [10, 4],
    "shopping": [2, 2, 2, 2, 2],
    "errands": [6, 2, 4],
    "parks": [6],
    "education": [10],
    "entertainment": [5],
    "trains": [10],
    "transport": [2.5, 2.5]
}

In [18]:
category_sums = {k: sum(v) for k, v in poi_weights.items()}
total = sum(category_sums.values())
print(category_sums)
print("total: ", total)

{'employment': 10, 'eating': 18, 'groceries': 14, 'shopping': 10, 'errands': 12, 'parks': 6, 'education': 10, 'entertainment': 5, 'trains': 10, 'transport': 5.0}
total:  100.0


### Import network

In this case the network is already in the same projected CRS as everything else but I have left in the transformation to be clear.

In [19]:
# reading directly with geopandas.read_file crashes on my computer so I read into pandas then convert to gdf instead
edges_df = pd.read_csv(folder + "melbourne_edges.csv")
nodes_df = pd.read_csv(folder + "melbourne_nodes.csv")
edges = gpd.GeoDataFrame(edges_df, 
                         geometry=gpd.GeoSeries.from_wkt(edges_df['geometry'])).set_crs("EPSG:7856")
nodes = gpd.GeoDataFrame(nodes_df, 
                         geometry=gpd.GeoSeries.from_wkt(nodes_df['geometry'])).set_crs("EPSG:7856")
edges = edges.to_crs(proj_crs)
nodes = nodes.to_crs(proj_crs)

  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)


<class 'str'>
<class 'str'>


Pandana expects edges to have a two item index based on the same IDs as the node index.

In [20]:
# nodes.set_index('connect_id',inplace=True)   #this is already the case this time for some reason

edges['from_idx'] = edges['from']
edges['to_idx'] = edges['to']
edges= edges.set_index(['from_idx', 'to_idx'])
edges.index.names= ['from_idx','to_idx']

In [21]:
edges = edges[edges['to'].isin(nodes['Unnamed: 0']) & edges['from'].isin(nodes['Unnamed: 0'])]

Pandana network creation.

In [50]:
distance_network = pdna.Network(nodes['geometry'].x, nodes['geometry'].y,
                                   edges['from'], edges['to'], 
                                   edges[['length']])

maximum_dist = 2400

Pandana network querying. The 'employment' category is empty because we didn't add the employment points to the POI dataset.

In [54]:
results = walk_index(distance_network, pois, poi_weights, distance=maximum_dist)

Category employment is empty
Finished category: eating
Finished category: groceries
Finished category: shopping
Finished category: errands
Finished category: parks
Finished category: education
Finished category: entertainment
Finished category: trains
Finished category: transport


In [23]:
results.to_csv("Melbourne_colournoemployment_220222.csv")

In [93]:
results.iloc[4831719]

x                  3.204843e+05
y                  5.859578e+06
employment_10      0.000000e+00
eating1            2.400000e+03
eating2            2.400000e+03
eating3            2.400000e+03
eating4            2.400000e+03
eating5            2.400000e+03
eating6            2.400000e+03
eating7            2.400000e+03
eating8            2.400000e+03
eating9            2.400000e+03
eating10           2.400000e+03
eating_18          0.000000e+00
groceries1         2.400000e+03
groceries2         2.400000e+03
groceries_14       0.000000e+00
shopping1          2.400000e+03
shopping2          2.400000e+03
shopping3          2.400000e+03
shopping4          2.400000e+03
shopping5          2.400000e+03
shopping_10        0.000000e+00
errands1           2.400000e+03
errands2           2.400000e+03
errands3           2.400000e+03
errands_12         0.000000e+00
parks1             0.000000e+00
parks_6            6.000000e+00
education1         2.400000e+03
education_10       0.000000e+00
entertai

In [91]:
building_results

Unnamed: 0,x,y,employment_10,eating1,eating2,eating3,eating4,eating5,eating6,eating7,...,entertainment1,entertainment_5,trains1,trains_10,transport1,transport2,transport_5,Walk_Index,jobs,employment
1460246,268074.594051,5.829563e+06,0,655.931030,688.497009,835.465027,1199.468018,1292.531006,1471.769043,1508.770996,...,1693.038940,0.919798,1508.770996,2.211816,281.856995,469.033997,3.449970,41.118455,2388.172076,0.341167
1460247,271115.911746,5.831284e+06,0,668.406006,700.971985,847.940002,1211.942993,1305.005981,1484.244019,1521.245972,...,1705.514038,0.908395,1521.245972,2.184395,294.332001,481.509003,3.407199,40.608689,2358.564716,0.336938
1460248,270185.934382,5.825931e+06,0,652.737976,685.304016,832.271973,1196.275024,1289.338013,1468.576050,1505.578003,...,1689.845947,0.922740,1505.578003,2.218890,278.664001,465.841003,3.461003,41.249956,2395.809685,0.342259
1460249,269813.466914,5.829363e+06,0,661.013977,693.580017,840.547974,1204.551025,1297.614014,1476.852051,1513.854004,...,1698.121948,0.915135,1513.854004,2.200602,286.940002,474.117004,3.432478,40.909981,2376.063791,0.339438
1460250,272326.247677,5.829353e+06,0,673.601013,706.166992,853.135010,1217.137939,1310.201050,1489.438965,1526.441040,...,1710.708984,0.903688,1526.441040,2.173077,299.527008,486.704010,3.389544,40.398274,2346.343702,0.335192
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4831717,320473.326838,5.859477e+06,0,1665.452026,2400.000000,2400.000000,2400.000000,2400.000000,2400.000000,2400.000000,...,2400.000000,0.000000,1624.034058,1.971020,158.395996,158.395996,4.267559,23.299406,406.329493,0.058047
4831718,320378.125054,5.859488e+06,0,104.323997,283.550995,680.067017,751.067993,866.624023,866.624023,897.437012,...,679.750000,2.533718,1353.634033,2.582999,284.338989,346.885010,3.648495,44.755285,791.281534,0.113040
4831719,320484.259324,5.859578e+06,0,2400.000000,2400.000000,2400.000000,2400.000000,2400.000000,2400.000000,2400.000000,...,2400.000000,0.000000,355.181000,7.010465,355.181000,1467.515015,2.328860,15.339326,0.000000,0.000000
4831720,320492.045272,5.859622e+06,0,570.848022,570.848022,745.591980,1332.422974,1434.894043,1529.667969,1679.235962,...,1653.838013,0.956571,1875.931030,1.532123,319.570007,462.863007,3.389851,40.956200,2128.813696,0.304116


### Employment

The current approach is to find up to 100 closest employment nodes within the maximum distance. Then look up the number of jobs at each one, apply a distance decay function to each distance, multiply these together, and sum.

An alternative approach which would be more convenient would be to use the Pandana 'aggregate' function which aggregates from all nodes within the maximum distance. However, there is limited ability to change the distance decay rate within the aggregation function. It can either be flat (no decay), linear (going to 0 at the max distance), or exponential where beta is set as 1/max distance. For walking I would like a beta of 0.001, but this requires the radius to be 1000m. If the radius is 2400m, beta is only 0.0004. This can be changed in the future if the Pandana function is updated to take a decay parameter. The aggregate function also seems to be slower than expected with the kind of network & distances I use.

In [69]:
employment_centrs = single_points(employment_centrs)

In [72]:
employment_centrs = employment_centrs.droplevel(1, axis=0)

In [84]:
employment_centrs=employment_centrs.set_index('Jobs')

x, y = (employment_centrs['geometry'].x, employment_centrs['geometry'].y)

distance_network.set_pois(category='employment', maxdist=maximum_dist, maxitems=100, x_col=x, y_col=y)

employment_access = distance_network.nearest_pois(
    distance=maximum_dist, category='employment', num_pois=100, include_poi_ids=True)

jobcounts = employment_access.iloc[:,100:200]

In [85]:
results['jobs'] = ((employment_access.iloc[:,0:100].applymap(access_weight, distance=maximum_dist))*
                                jobcounts.values
                                ).sum(axis=1)

weight = 100*poi_weights['employment'][0]/sum(sum(list(poi_weights.values()),[]))

results['employment'] = weight*results['jobs']/70000

results['Walk_Index'] = results['Walk_Index'] + results['employment']

#### below redundant now

In [57]:
x, y = (employment_centrs['geometry'].x, employment_centrs['geometry'].y)

distance_network.set_pois(category='employment', maxdist=maximum_dist, maxitems=100, x_col=x, y_col=y)

employment_access = distance_network.nearest_pois(
    distance=maximum_dist, category='employment', num_pois=100, include_poi_ids=True)

The nearest_pois function returns both distances and the IDs of the nearest pois (with include_poi_ids option). The IDs can then be used to retrieve the number of jobs at each point. I found a merge was the fastest way to join this data.

In [82]:
def itermerge(dataframe, jobs):
    i=0
    for column in dataframe:
        dataframe = dataframe.merge(jobs, how='left', left_on = column, right_index = True, suffixes = [None, i])
        i = i + 1
    return dataframe

def access_weight(x, distance):
    beta = -0.001
    if x == distance:
        return 0
    else:
        return math.exp(beta*x)

In [73]:
jobcounts = itermerge(employment_access.iloc[:,100:200], employment_centrs['Jobs'])

results['jobs'] = ((employment_access.iloc[:,0:100].applymap(access_weight))*
                                jobcounts.iloc[:,100:200].values
                                ).sum(axis=1)

weight = 100*sum(poi_weights['employment'])/sum(sum(list(poi_weights.values()),[]))

results['employment'] = weight*results['jobs']/max(results['jobs'])

results['Walk_Index'] = results['Walk_Index'] + results['employment']

ValueError: You are trying to merge on object and int64 columns. If you wish to proceed you should use pd.concat

## Export results

Filter the results to the original Colouring Sydney buildings only. Optionally export results as a csv.

In [86]:
building_results = results.filter(items=nodes[nodes['connect_type'] == 'poi'].index, axis=0)

In [87]:
building_results.to_csv("Colouring_bf_results_010522.csv")

Import building footprints and join the data to them, then export these polygons.

In [38]:
results_gdf = gpd.GeoDataFrame(building_results, geometry = gpd.GeoSeries.from_xy(building_results.x, building_results.y, crs="EPSG:7856"))

buildings_foot = gpd.read_file(folder + "adelaide_bf.shp").to_crs(proj_crs)

# join to data
buildings_foot = gpd.sjoin(buildings_foot, results_gdf, how='left', predicate='contains')

buildings_foot.to_file("Colouring_bf_results_180221.gpkg")