# ABS preprocessing

This notebook aggregates the Openspace data such that the relevant fields are extracted and the data is neatly formatted into a csv wherein the rows are associated with an SA2

In [1]:
import pandas as pd
import geopandas as gpd

In [2]:
shape_file = gpd.read_file('../../data/landing/Open_Space/VPA_Draft_Open_Space_Data.shp')
sal_shape_file = gpd.read_file('../../data/landing/SAL_data/SAL_2021_AUST_GDA2020.shp')

shape_file

Unnamed: 0,FID,LGA,VM_PARCEL_,VM_PARCE_1,DATA_SOURC,OS_CATEGOR,OS_CATEG_2,OWNER_TYPE,PARK_NAME,OS_STATUS,...,WATER_BODY,OS_TYPE,COASTAL,MANAGER_NA,OWNER_NAME,Image_URL,VPA_ID,SHAPE_Leng,SHAPE_Area,geometry
0,1,BOROONDARA,3\LP28215,95086,VM Features of Interest,Tertiary institutions,Not applicable,State Government,Swinburne University Of Technology & Tafe Divi...,Existing,...,,Restricted public land,,NO DATA,Swinburne University of Technology,https://lh3.googleusercontent.com/-QP2NylEZuhE...,1500,0.000742,1.698581e-08,"POLYGON ((145.03932 -37.82299, 145.03937 -37.8..."
1,2,BOROONDARA,159\LP5481,203973157,VM Parcels,Parks and gardens,Not applicable,Local government,Kate Campbell Reserve,Existing,...,,Public open space,,NO DATA,Boroondara City Council,https://lh3.googleusercontent.com/-rdTUgqfNyXg...,1501,0.001105,6.526377e-08,"POLYGON ((145.03436 -37.79175, 145.03443 -37.7..."
2,3,BOROONDARA,3\TP84208,203973089,VM Parcels,Parks and gardens,Not applicable,Local government,Kate Campbell Reserve,Existing,...,,Public open space,,NO DATA,Boroondara City Council,https://lh3.googleusercontent.com/-rdTUgqfNyXg...,1502,0.001003,5.615414e-08,"POLYGON ((145.0348 -37.79127, 145.03487 -37.79..."
3,4,BOROONDARA,1\TP84208,203973073,VM Parcels,Parks and gardens,Not applicable,Local government,Kate Campbell Reserve,Existing,...,,Public open space,,NO DATA,Boroondara City Council,https://lh3.googleusercontent.com/-rdTUgqfNyXg...,1503,0.001102,6.530917e-08,"POLYGON ((145.03515 -37.79131, 145.03522 -37.7..."
4,5,BOROONDARA,1\TP802753,203972979,VM Parcels,Parks and gardens,Not applicable,Local government,Kate Campbell Reserve,Existing,...,,Public open space,,NO DATA,Boroondara City Council,https://lh3.googleusercontent.com/-rdTUgqfNyXg...,1504,0.001327,9.438898e-08,"POLYGON ((145.03549 -37.79136, 145.03557 -37.7..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
38805,38806,FRANKSTON,,,Frankston Council,Transport reservations,Median park,Local government,Taverner Square Reserve,Existing,...,,Public open space,,NO DATA,Frankston City Council,https://lh3.googleusercontent.com/-7jagzn1L1NA...,39508,0.002004,2.802900e-07,"POLYGON ((145.15155 -38.17352, 145.15155 -38.1..."
38806,38807,YARRA,2\LP38905,2015149,VM Parcels,Non-government schools,Not applicable,Private,St Kevins College - Waterford,Existing,...,,Private open space,,NO DATA,NO DATA,https://lh3.googleusercontent.com/-uz6koGrYKZE...,39509,0.000825,2.976525e-08,"POLYGON ((144.99825 -37.82099, 144.9983 -37.82..."
38807,38808,FRANKSTON,NO DATA,NO DATA,Melbourne Water,Natural and semi-natural open space,Not applicable,Public authority,Seaford Swamp (MWC Drain),Existing,...,,Public open space,,NO DATA,Melbourne Water Corporation,https://lh3.googleusercontent.com/-mCSb2HRxCUs...,39303,0.008203,4.191129e-06,"POLYGON ((145.13578 -38.09732, 145.1361 -38.09..."
38808,38809,FRANKSTON,CP151911,1114997,VM Parcels,Parks and gardens,Not applicable,Local government,Broughton Reserve,Existing,...,,Public open space,,Frankston Council,Frankston City Council,https://lh3.googleusercontent.com/-rdTUgqfNyXg...,39304,0.003781,4.406187e-07,"POLYGON ((145.12702 -38.10245, 145.12721 -38.1..."


In [3]:
filtered_gdf_private = shape_file[['OS_CATEGOR', 'OWNER_NAME','PARK_NAME', 'geometry']]

filtered_gdf_private


types_in_SA2 = gpd.sjoin(sal_shape_file, filtered_gdf_private , how="inner", predicate="intersects")

Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: EPSG:7844
Right CRS: EPSG:4326

  types_in_SA2 = gpd.sjoin(sal_shape_file, filtered_gdf_private , how="inner", predicate="intersects")


In [4]:
# Aggregate all the columns with the same PARK_NAME by keeping only one element
aggregated_by_park_name = filtered_gdf_private.groupby('PARK_NAME').first().reset_index()

aggregated_by_park_name

Unnamed: 0,PARK_NAME,OS_CATEGOR,OWNER_NAME,geometry
0,"1 Clydebank Rd, Essendon West",Transport reservations,VicRoads,"POLYGON ((144.88756 -37.75395, 144.8878 -37.75..."
1,1 Nepean Highway walkway between Hotel and Mor...,Recreation corridor,Kingston City Council,"POLYGON ((145.08772 -38.01003, 145.08778 -38.0..."
2,"1 Padey Drive, Mulgrave",Parks and gardens,Monash City Council,"POLYGON ((145.19468 -37.92615, 145.19456 -37.9..."
3,1-13 Somerfield Drive Res,Parks and gardens,Greater Dandenong City Council,"POLYGON ((145.17531 -38.0065, 145.17631 -38.00..."
4,"1-3 St Clair Crescent, Mount Waverley",Parks and gardens,Monash City Council,"POLYGON ((145.14731 -37.87291, 145.14712 -37.8..."
...,...,...,...,...
11824,strip of land with vegetation - Cardinia,Natural and semi-natural open space,Crown,"POLYGON ((145.62107 -38.05668, 145.62118 -38.0..."
11825,v,Transport reservations,VicRoads,"POLYGON ((145.09082 -37.70306, 145.09076 -37.7..."
11826,various Retarding Basins - Hume,Natural and semi-natural open space,Melbourne Water Corporation,"POLYGON ((144.89067 -37.67675, 144.89064 -37.6..."
11827,vested in Water Authority,Services and utilities reserves,Crown,"POLYGON ((145.31995 -38.19631, 145.32 -38.1960..."


In [5]:
# Pivot the data to aggregate by 'SA2_CODE21' and count occurrences of each 'OS_CATEGOR'
aggregated_df = types_in_SA2.pivot_table(index='SAL_CODE21', columns='OS_CATEGOR', aggfunc='size', fill_value=0)

aggregated_df

OS_CATEGOR,Cemeteries,Civic squares and promenades,Conservation reserves,Government schools,Natural and semi-natural open space,Non-government schools,Parks and gardens,Public housing reserves,Recreation corridor,Services and utilities reserves,Sportsfields and organised recreation,Tertiary institutions,Transport reservations
SAL_CODE21,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
20002,0,0,0,1,15,2,31,0,1,0,1,0,0
20003,0,0,0,1,2,5,3,0,2,0,2,0,1
20006,0,0,0,0,2,0,0,0,0,0,0,0,0
20011,0,0,0,0,6,0,0,0,0,0,0,0,0
20015,0,0,0,1,3,1,16,0,8,0,2,0,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...
22911,1,0,2,3,24,3,3,0,6,31,6,0,0
22916,0,0,0,1,23,1,0,0,0,0,7,0,3
22917,1,0,0,6,21,1,33,0,0,0,15,0,2
22925,0,0,64,1,22,0,0,0,0,15,3,0,0


In [6]:
# Reset the index to turn the aggregated table into a normal DataFrame
normal_df = aggregated_df.reset_index()

normal_df

OS_CATEGOR,SAL_CODE21,Cemeteries,Civic squares and promenades,Conservation reserves,Government schools,Natural and semi-natural open space,Non-government schools,Parks and gardens,Public housing reserves,Recreation corridor,Services and utilities reserves,Sportsfields and organised recreation,Tertiary institutions,Transport reservations
0,20002,0,0,0,1,15,2,31,0,1,0,1,0,0
1,20003,0,0,0,1,2,5,3,0,2,0,2,0,1
2,20006,0,0,0,0,2,0,0,0,0,0,0,0,0
3,20011,0,0,0,0,6,0,0,0,0,0,0,0,0
4,20015,0,0,0,1,3,1,16,0,8,0,2,0,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
564,22911,1,0,2,3,24,3,3,0,6,31,6,0,0
565,22916,0,0,0,1,23,1,0,0,0,0,7,0,3
566,22917,1,0,0,6,21,1,33,0,0,0,15,0,2
567,22925,0,0,64,1,22,0,0,0,0,15,3,0,0


In [7]:

normal_df.rename(columns={'SAL_CODE21': 'SAL_CODE'}, inplace=True)

normal_df.to_csv('../../data/curated/open_space.csv', index=False)