TO USE: run all

To request PTV.data manually:
- https://data.gov.au/dataset/ds-vic-f8155dba-4f9c-43ee-ad83-f149fc3f1e9e/details?q=tram%20station
- sign up (free)
- select geographical of GDA2020 and ESRI shape file format with "select all area available" option checked
- file will be sent to the given email
- download and rename the file as PTV.zip, move it to ../data/raw/

transport count (by SA2)
- ../data/raw/PTV/public_trans.csv
- SA2_CODE21: int, id of district in SA2 formate
- metrobus_count: int, count of PTV_METRO_BUS_STOP
- metrotrain_count: int, count of PTV_METRO_TRAIN_STATION
- metrotram_count: int, count of PTV_METRO_TRAM_STOP
- regbus_count: int, count of PTV_REGIONAL_BUS_STOP
- regcoach_count: int, count of PTV_REGIONAL_COACH_STOP
- regtrain_count: int, count of PTV_REGIONAL_TRAIN_STATION
- skybus_count: int, count of PTV_SKYBUS_STOP


In [1]:
import zipfile
import os
import pandas as pd
import geopandas as gpd

import folium
pd.options.display.float_format = "{:,.4f}".format

OUTPUT_DIR = "../data/raw/PTV/ll_gda2020/esrishape/whole_of_dataset/victoria/PUBLIC_TRANSPORT/"

headers = {"accept": "text/csv"}

In [2]:
# unzip zip file
with zipfile.ZipFile(f"../data/raw/PTV.zip", "r") as zip_ref:
    zip_ref.extractall(f"../data/raw/PTV/")



In [3]:
def gpd_station_merge(poly_gdf, file_path, by_id_name = "SA2_CODE21",\
    station_id_name = "STOP_ID", method={"STOP_ID": "count"}):
    """
        A function used to merge shape file in path: {file_path} to a 
        geopandas dataframe {poly_gdf} with POLYGON geometry. 
        poly_gdf: a geopandas.GeoDataFrame object contains POLYGON geometry
        file_path: a String of file path to read target shape file
        by_id_name: a String of id name to perform groupby option
        station_id_name: a String of id name stated in the readed gdf
        method: a Dict of operations to perform after groupby
    """

    ### read station file
    station_gdf = gpd.read_file(file_path)

    # metro bus station feature selection
    station_gdf = station_gdf[[station_id_name, "geometry"]]


    # merge tabels
    join_gdf = gpd.sjoin(poly_gdf, station_gdf, how="left")
    join_gdf = join_gdf.groupby(by_id_name).agg(method)
    
    return join_gdf

In [4]:
### read shape file and make geometry readable
boundary_gdf = gpd.read_file("../data/raw/ABS/digitalBoundary/\
SA2_2021_AUST_GDA2020.shp")
boundary_gdf = boundary_gdf.loc[boundary_gdf["STE_NAME21"] == "Victoria"]
# boundary_gdf["geometry"] = boundary_gdf["geometry"].to_crs(
#     " proj=longlat  ellps=WGS84  datum=WGS84  no_defs"
# )

# digital boundary feature selection
boundary_gdf = boundary_gdf.reset_index()[["SA2_CODE21", "geometry"]].set_index("SA2_CODE21")
print(boundary_gdf.shape)
print(boundary_gdf.head(1))

(524, 1)
                                                     geometry
SA2_CODE21                                                   
201011001   POLYGON ((143.78282 -37.56666, 143.75558 -37.5...


In [5]:
mix_gdf = boundary_gdf.sort_values(by=["SA2_CODE21"])
file_paths = [
    f"{OUTPUT_DIR}PTV_METRO_BUS_STOP.shp",
    f"{OUTPUT_DIR}PTV_METRO_TRAIN_STATION.shp",
    f"{OUTPUT_DIR}PTV_METRO_TRAM_STOP.shp",
    f"{OUTPUT_DIR}PTV_REGIONAL_BUS_STOP.shp",
    f"{OUTPUT_DIR}PTV_REGIONAL_COACH_STOP.shp",
    f"{OUTPUT_DIR}PTV_REGIONAL_TRAIN_STATION.shp",
    f"{OUTPUT_DIR}PTV_SKYBUS_STOP.shp"
]

col_names = ["metrobus_count", "metrotrain_count", "metrotram_count",
    "regbus_count", "regcoach_count", "regtrain_count", "skybus_count"]


for col_name, file_path in zip(col_names, file_paths):
    print(col_name, file_path)
    cur_gdf = gpd_station_merge(boundary_gdf, file_path).rename({"STOP_ID": col_name}, axis=1)
    cur_gdf = cur_gdf.sort_values(by = col_name)
    mix_gdf = pd.concat([mix_gdf, cur_gdf], axis=1)

print(mix_gdf.shape)
print(mix_gdf.head())
mix_gdf.to_csv("../data/raw/PTV/public_trans.csv")

metrobus_count ../data/raw/PTV/ll_gda2020/esrishape/whole_of_dataset/victoria/PUBLIC_TRANSPORT/PTV_METRO_BUS_STOP.shp
metrotrain_count ../data/raw/PTV/ll_gda2020/esrishape/whole_of_dataset/victoria/PUBLIC_TRANSPORT/PTV_METRO_TRAIN_STATION.shp
metrotram_count ../data/raw/PTV/ll_gda2020/esrishape/whole_of_dataset/victoria/PUBLIC_TRANSPORT/PTV_METRO_TRAM_STOP.shp
regbus_count ../data/raw/PTV/ll_gda2020/esrishape/whole_of_dataset/victoria/PUBLIC_TRANSPORT/PTV_REGIONAL_BUS_STOP.shp
regcoach_count ../data/raw/PTV/ll_gda2020/esrishape/whole_of_dataset/victoria/PUBLIC_TRANSPORT/PTV_REGIONAL_COACH_STOP.shp
regtrain_count ../data/raw/PTV/ll_gda2020/esrishape/whole_of_dataset/victoria/PUBLIC_TRANSPORT/PTV_REGIONAL_TRAIN_STATION.shp
skybus_count ../data/raw/PTV/ll_gda2020/esrishape/whole_of_dataset/victoria/PUBLIC_TRANSPORT/PTV_SKYBUS_STOP.shp
(524, 8)
                                                     geometry  metrobus_count  \
SA2_CODE21                                                        