# Preprocessing PTV
- This notebook preprocesses the PTV data.
- The preprocessed datasets are exported into the `./data/raw` directory.

In [1]:
import sys, os
sys.path.append(os.path.abspath('../'))
from scripts.utils import create_dir, get_runtime
import time 
start_time = time.time()

import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

## Stops

### 2 - Metropolitan Train

In [3]:
stop_2 = pd.read_csv("../data/landing/ptv/2/2/stops.txt")
# stop_2

In [4]:
stop_2_filtered = stop_2[['stop_name', 'stop_lat', 'stop_lon']]
stop_2_filtered

Unnamed: 0,stop_name,stop_lat,stop_lon
0,Sunbury Railway Station (Sunbury),-37.579091,144.727319
1,Diggers Rest Railway Station (Diggers Rest),-37.627017,144.719922
2,Stony Point Railway Station (Crib Point),-38.374235,145.221837
3,Crib Point Railway Station (Crib Point),-38.366123,145.204043
4,Morradoo Railway Station (Crib Point),-38.354033,145.189602
...,...,...,...
217,East Pakenham Railway Station (Pakenham),-38.084285,145.506314
218,Southland Railway Station (Cheltenham),-37.958756,145.049121
219,Middle Gorge Railway Station (South Morang),-37.644061,145.092144
220,Hawkstowe Railway Station (South Morang),-37.622995,145.097396


In [5]:
# Create a geometry column based on the latitude and longitude
geometry = [Point(xy) for xy in zip(stop_2_filtered['stop_lon'], stop_2_filtered['stop_lat'])]

# Create a GeoDataFrame
stop_2_gdf = gpd.GeoDataFrame(stop_2_filtered, geometry=geometry)

# Set a coordinate reference system (CRS), for example, WGS84 (EPSG:4326)
stop_2_gdf.set_crs(epsg=4326, inplace=True)

# Display the GeoDataFrame
stop_2_gdf

Unnamed: 0,stop_name,stop_lat,stop_lon,geometry
0,Sunbury Railway Station (Sunbury),-37.579091,144.727319,POINT (144.72732 -37.57909)
1,Diggers Rest Railway Station (Diggers Rest),-37.627017,144.719922,POINT (144.71992 -37.62702)
2,Stony Point Railway Station (Crib Point),-38.374235,145.221837,POINT (145.22184 -38.37423)
3,Crib Point Railway Station (Crib Point),-38.366123,145.204043,POINT (145.20404 -38.36612)
4,Morradoo Railway Station (Crib Point),-38.354033,145.189602,POINT (145.1896 -38.35403)
...,...,...,...,...
217,East Pakenham Railway Station (Pakenham),-38.084285,145.506314,POINT (145.50631 -38.08428)
218,Southland Railway Station (Cheltenham),-37.958756,145.049121,POINT (145.04912 -37.95876)
219,Middle Gorge Railway Station (South Morang),-37.644061,145.092144,POINT (145.09214 -37.64406)
220,Hawkstowe Railway Station (South Morang),-37.622995,145.097396,POINT (145.0974 -37.62299)


SA2

In [6]:
SA2 = gpd.read_file("../data/landing/sa2/sa2-21-shp/SA2_2021_AUST_GDA2020.shp")
SA2 = SA2[["SA2_CODE21", "SA2_NAME21", "geometry"]]
SA2.crs = 'EPSG: 4326'

In [7]:
stops_2_sa2 = gpd.sjoin(stop_2_gdf, SA2, how="left", predicate="within")

# Display the result
stops_2_sa2

Unnamed: 0,stop_name,stop_lat,stop_lon,geometry,index_right,SA2_CODE21,SA2_NAME21
0,Sunbury Railway Station (Sunbury),-37.579091,144.727319,POINT (144.72732 -37.57909),925,210041540,Sunbury - South
1,Diggers Rest Railway Station (Diggers Rest),-37.627017,144.719922,POINT (144.71992 -37.62702),924,210041539,Diggers Rest
2,Stony Point Railway Station (Crib Point),-38.374235,145.221837,POINT (145.22184 -38.37423),1108,214021379,Hastings - Somers
3,Crib Point Railway Station (Crib Point),-38.366123,145.204043,POINT (145.20404 -38.36612),1108,214021379,Hastings - Somers
4,Morradoo Railway Station (Crib Point),-38.354033,145.189602,POINT (145.1896 -38.35403),1108,214021379,Hastings - Somers
...,...,...,...,...,...,...,...
217,East Pakenham Railway Station (Pakenham),-38.084285,145.506314,POINT (145.50631 -38.08428),988,212011551,Pakenham - South East
218,Southland Railway Station (Cheltenham),-37.958756,145.049121,POINT (145.04912 -37.95876),854,208031188,Highett (East) - Cheltenham
219,Middle Gorge Railway Station (South Morang),-37.644061,145.092144,POINT (145.09214 -37.64406),896,209041436,South Morang - South
220,Hawkstowe Railway Station (South Morang),-37.622995,145.097396,POINT (145.0974 -37.62299),895,209041435,South Morang - North


#### Push Stops_2_sa2 to `raw` layer

In [9]:
create_dir('../data/raw/PTV/2 - Metropolitan Train')
stops_2_sa2.to_csv('../data/raw/PTV/2 - Metropolitan Train/stops_2_sa2.csv', index=False)

Created directory: ../data/raw/PTV/2 - Metropolitan Train



### 3 - Metropolitan Tram

In [11]:
stop_3 = pd.read_csv("../data/landing/ptv/3/3/stops.txt")
# stop_3

In [12]:
stop_3_filtered = stop_3[['stop_name', 'stop_lat', 'stop_lon']]
stop_3_filtered

Unnamed: 0,stop_name,stop_lat,stop_lon
0,45-Glenferrie Rd/Wattletree Rd (Malvern),-37.862455,145.028508
1,44-Duncraig Ave/Wattletree Rd (Armadale),-37.862069,145.025382
2,42-Clyde St/Raleigh Rd (Maribyrnong),-37.769699,144.898841
3,43-Egerton Rd/Wattletree Rd (Armadale),-37.861710,145.022754
4,50-Vincent St/Wattletree Rd (Malvern East),-37.864226,145.043375
...,...,...,...
1620,46-Nicholls St/Wattletree Rd (Malvern),-37.862658,145.030958
1621,47-Cabrini Hospital/Wattletree Rd (Malvern),-37.862960,145.033394
1622,48-Dixon St/Wattletree Rd (Malvern),-37.863393,145.036634
1623,49-Tooronga Rd/Wattletree Rd (Malvern East),-37.863827,145.039987


In [13]:
# Create a geometry column based on the latitude and longitude
geometry = [Point(xy) for xy in zip(stop_3_filtered['stop_lon'], stop_3_filtered['stop_lat'])]

# Create a GeoDataFrame
stop_3_gdf = gpd.GeoDataFrame(stop_3_filtered, geometry=geometry)

# Set a coordinate reference system (CRS), for example, WGS84 (EPSG:4326)
stop_3_gdf.set_crs(epsg=4326, inplace=True)

# Display the GeoDataFrame
stop_3_gdf

Unnamed: 0,stop_name,stop_lat,stop_lon,geometry
0,45-Glenferrie Rd/Wattletree Rd (Malvern),-37.862455,145.028508,POINT (145.02851 -37.86246)
1,44-Duncraig Ave/Wattletree Rd (Armadale),-37.862069,145.025382,POINT (145.02538 -37.86207)
2,42-Clyde St/Raleigh Rd (Maribyrnong),-37.769699,144.898841,POINT (144.89884 -37.7697)
3,43-Egerton Rd/Wattletree Rd (Armadale),-37.861710,145.022754,POINT (145.02275 -37.86171)
4,50-Vincent St/Wattletree Rd (Malvern East),-37.864226,145.043375,POINT (145.04338 -37.86423)
...,...,...,...,...
1620,46-Nicholls St/Wattletree Rd (Malvern),-37.862658,145.030958,POINT (145.03096 -37.86266)
1621,47-Cabrini Hospital/Wattletree Rd (Malvern),-37.862960,145.033394,POINT (145.03339 -37.86296)
1622,48-Dixon St/Wattletree Rd (Malvern),-37.863393,145.036634,POINT (145.03663 -37.86339)
1623,49-Tooronga Rd/Wattletree Rd (Malvern East),-37.863827,145.039987,POINT (145.03999 -37.86383)


In [14]:
stops_3_sa2 = gpd.sjoin(stop_3_gdf, SA2, how="left", predicate="within")

# Display the result
stops_3_sa2

Unnamed: 0,stop_name,stop_lat,stop_lon,geometry,index_right,SA2_CODE21,SA2_NAME21
0,45-Glenferrie Rd/Wattletree Rd (Malvern),-37.862455,145.028508,POINT (145.02851 -37.86246),860,208041194,Malvern - Glen Iris
1,44-Duncraig Ave/Wattletree Rd (Armadale),-37.862069,145.025382,POINT (145.02538 -37.86207),795,206061135,Armadale
2,42-Clyde St/Raleigh Rd (Maribyrnong),-37.769699,144.898841,POINT (144.89884 -37.7697),1060,213031349,Maribyrnong
3,43-Egerton Rd/Wattletree Rd (Armadale),-37.861710,145.022754,POINT (145.02275 -37.86171),795,206061135,Armadale
4,50-Vincent St/Wattletree Rd (Malvern East),-37.864226,145.043375,POINT (145.04338 -37.86423),860,208041194,Malvern - Glen Iris
...,...,...,...,...,...,...,...
1620,46-Nicholls St/Wattletree Rd (Malvern),-37.862658,145.030958,POINT (145.03096 -37.86266),860,208041194,Malvern - Glen Iris
1621,47-Cabrini Hospital/Wattletree Rd (Malvern),-37.862960,145.033394,POINT (145.03339 -37.86296),860,208041194,Malvern - Glen Iris
1622,48-Dixon St/Wattletree Rd (Malvern),-37.863393,145.036634,POINT (145.03663 -37.86339),860,208041194,Malvern - Glen Iris
1623,49-Tooronga Rd/Wattletree Rd (Malvern East),-37.863827,145.039987,POINT (145.03999 -37.86383),860,208041194,Malvern - Glen Iris


#### Push Stops_3_sa2 to `raw` layer

In [15]:
create_dir('../data/raw/PTV/3 - Metropolitan Tram')
stops_3_sa2.to_csv('../data/raw/PTV/3 - Metropolitan Tram/stops_3_sa2.csv', index=False)

Directory already exists: ../data/raw/PTV/3 - Metropolitan Tram



### 4 - Metropolitan Bus

In [16]:
stop_4 = pd.read_csv("../data/landing/ptv/3/3/stops.txt")
# stop_4

In [17]:
stop_4_filtered = stop_4[['stop_name', 'stop_lat', 'stop_lon']]
stop_4_filtered

Unnamed: 0,stop_name,stop_lat,stop_lon
0,45-Glenferrie Rd/Wattletree Rd (Malvern),-37.862455,145.028508
1,44-Duncraig Ave/Wattletree Rd (Armadale),-37.862069,145.025382
2,42-Clyde St/Raleigh Rd (Maribyrnong),-37.769699,144.898841
3,43-Egerton Rd/Wattletree Rd (Armadale),-37.861710,145.022754
4,50-Vincent St/Wattletree Rd (Malvern East),-37.864226,145.043375
...,...,...,...
1620,46-Nicholls St/Wattletree Rd (Malvern),-37.862658,145.030958
1621,47-Cabrini Hospital/Wattletree Rd (Malvern),-37.862960,145.033394
1622,48-Dixon St/Wattletree Rd (Malvern),-37.863393,145.036634
1623,49-Tooronga Rd/Wattletree Rd (Malvern East),-37.863827,145.039987


In [18]:
# Create a geometry column based on the latitude and longitude
geometry = [Point(xy) for xy in zip(stop_4_filtered['stop_lon'], stop_4_filtered['stop_lat'])]

# Create a GeoDataFrame
stop_4_gdf = gpd.GeoDataFrame(stop_4_filtered, geometry=geometry)

# Set a coordinate reference system (CRS), for example, WGS84 (EPSG:4326)
stop_4_gdf.set_crs(epsg=4326, inplace=True)

# Display the GeoDataFrame
stop_4_gdf

Unnamed: 0,stop_name,stop_lat,stop_lon,geometry
0,45-Glenferrie Rd/Wattletree Rd (Malvern),-37.862455,145.028508,POINT (145.02851 -37.86246)
1,44-Duncraig Ave/Wattletree Rd (Armadale),-37.862069,145.025382,POINT (145.02538 -37.86207)
2,42-Clyde St/Raleigh Rd (Maribyrnong),-37.769699,144.898841,POINT (144.89884 -37.7697)
3,43-Egerton Rd/Wattletree Rd (Armadale),-37.861710,145.022754,POINT (145.02275 -37.86171)
4,50-Vincent St/Wattletree Rd (Malvern East),-37.864226,145.043375,POINT (145.04338 -37.86423)
...,...,...,...,...
1620,46-Nicholls St/Wattletree Rd (Malvern),-37.862658,145.030958,POINT (145.03096 -37.86266)
1621,47-Cabrini Hospital/Wattletree Rd (Malvern),-37.862960,145.033394,POINT (145.03339 -37.86296)
1622,48-Dixon St/Wattletree Rd (Malvern),-37.863393,145.036634,POINT (145.03663 -37.86339)
1623,49-Tooronga Rd/Wattletree Rd (Malvern East),-37.863827,145.039987,POINT (145.03999 -37.86383)


In [19]:
stops_4_sa2 = gpd.sjoin(stop_4_gdf, SA2, how="left", predicate="within")

# Display the result
stops_4_sa2

Unnamed: 0,stop_name,stop_lat,stop_lon,geometry,index_right,SA2_CODE21,SA2_NAME21
0,45-Glenferrie Rd/Wattletree Rd (Malvern),-37.862455,145.028508,POINT (145.02851 -37.86246),860,208041194,Malvern - Glen Iris
1,44-Duncraig Ave/Wattletree Rd (Armadale),-37.862069,145.025382,POINT (145.02538 -37.86207),795,206061135,Armadale
2,42-Clyde St/Raleigh Rd (Maribyrnong),-37.769699,144.898841,POINT (144.89884 -37.7697),1060,213031349,Maribyrnong
3,43-Egerton Rd/Wattletree Rd (Armadale),-37.861710,145.022754,POINT (145.02275 -37.86171),795,206061135,Armadale
4,50-Vincent St/Wattletree Rd (Malvern East),-37.864226,145.043375,POINT (145.04338 -37.86423),860,208041194,Malvern - Glen Iris
...,...,...,...,...,...,...,...
1620,46-Nicholls St/Wattletree Rd (Malvern),-37.862658,145.030958,POINT (145.03096 -37.86266),860,208041194,Malvern - Glen Iris
1621,47-Cabrini Hospital/Wattletree Rd (Malvern),-37.862960,145.033394,POINT (145.03339 -37.86296),860,208041194,Malvern - Glen Iris
1622,48-Dixon St/Wattletree Rd (Malvern),-37.863393,145.036634,POINT (145.03663 -37.86339),860,208041194,Malvern - Glen Iris
1623,49-Tooronga Rd/Wattletree Rd (Malvern East),-37.863827,145.039987,POINT (145.03999 -37.86383),860,208041194,Malvern - Glen Iris


#### Push Stops_4_sa2 to `raw` layer

In [20]:
create_dir('../data/raw/PTV/4 - Metropolitan Bus')
stops_4_sa2.to_csv('../data/raw/PTV/4 - Metropolitan Bus/stops_4_sa2.csv', index=False)

Created directory: ../data/raw/PTV/4 - Metropolitan Bus

