# Computing Proximity to the Closest Train Station for all Rental Properties

In [3]:
import requests
import zipfile
import io
import geopandas as gpd
from openrouteservice import Client
import json
import pandas as pd
from shapely.geometry import Point
import ast
import time
import os
import sys
import numpy as np
sys.path.append('../')
from scripts.utils import download_file, extract_zip

## Loading Train Station Data

In [4]:
url = 'https://s3.ap-southeast-2.amazonaws.com/cl-isd-prd-datashare-s3-delivery/Order_FYXPQM.zip'
zip_path = '../data/landing/train_stations/train_stations.zip'
extract_to = '../data/landing/train_stations/extracted'
    
# Download the ZIP file
download_file(url, zip_path)
    
# Extract the ZIP file
extract_zip(zip_path, extract_to)

# List files in the extracted directory
extracted_files = os.listdir(extract_to)
print("Files in extracted directory:", extracted_files)

Files in extracted directory: ['Creative Commons Licence.html', 'll_gda94', 'PTV_METRO_TRAIN_STATION_b6722101-8db5-51f0-8a6f-d1e4fe805b73.html']


In [5]:
# Path to the shapefile
shapefile_path = '../data/landing/train_stations/extracted/ll_gda94/esrishape/whole_of_dataset/victoria/PTV/PTV_METRO_TRAIN_STATION.shp'

# Read the shapefile into a GeoDataFrame
train_gdf = gpd.read_file(shapefile_path)

# Display the first few rows of the GeoDataFrame
print(train_gdf.head())

# Display the columns of the GeoDataFrame
train_gdf.columns

  STOP_ID   LATITUDE                                          STOP_NAME  \
0   19970 -37.781193             Royal Park Railway Station (Parkville)   
1   19971 -37.788140  Flemington Bridge Railway Station (North Melbo...   
2   19972 -37.794267         Macaulay Railway Station (North Melbourne)   
3   19973 -37.807419   North Melbourne Railway Station (West Melbourne)   
4   19974 -37.788657        Clifton Hill Railway Station (Clifton Hill)   

    LONGITUDE TICKETZONE                                          ROUTEUSSP  \
0  144.952301          1                                            Upfield   
1  144.939323          1                                            Upfield   
2  144.936166          1                                            Upfield   
3  144.942570          1  Flemington,Sunbury,Upfield,Werribee,Williamsto...   
4  144.995417          1                                 Mernda,Hurstbridge   

                      geometry  
0   POINT (144.9523 -37.78119)  
1  POINT

Index(['STOP_ID', 'LATITUDE', 'STOP_NAME', 'LONGITUDE', 'TICKETZONE',
       'ROUTEUSSP', 'geometry'],
      dtype='object')

In [6]:
# Convert column names to lowercase
train_gdf.columns = [col.lower() for col in train_gdf.columns]

# Display the updated GeoDataFrame
train_gdf.head()

Unnamed: 0,stop_id,latitude,stop_name,longitude,ticketzone,routeussp,geometry
0,19970,-37.781193,Royal Park Railway Station (Parkville),144.952301,1,Upfield,POINT (144.9523 -37.78119)
1,19971,-37.78814,Flemington Bridge Railway Station (North Melbo...,144.939323,1,Upfield,POINT (144.93932 -37.78814)
2,19972,-37.794267,Macaulay Railway Station (North Melbourne),144.936166,1,Upfield,POINT (144.93617 -37.79427)
3,19973,-37.807419,North Melbourne Railway Station (West Melbourne),144.94257,1,"Flemington,Sunbury,Upfield,Werribee,Williamsto...",POINT (144.94257 -37.80742)
4,19974,-37.788657,Clifton Hill Railway Station (Clifton Hill),144.995417,1,"Mernda,Hurstbridge",POINT (144.99542 -37.78866)


In [7]:
# Checking longitude and latitude are within the reasonable limits for Victoria

# Latitude: Approx [-39, -34]
# Longitude: Approx [140, 150]

train_gdf.describe()

Unnamed: 0,latitude,longitude
count,220.0,220.0
mean,-37.852378,145.045691
std,0.140332,0.139902
min,-38.374235,144.661118
25%,-37.899626,144.961156
50%,-37.826147,145.036588
75%,-37.769623,145.121451
max,-37.579091,145.486379


## Loading Rental Data

In [1]:
domain_df = pd.read_parquet('../data/raw/all_domain_properties.parquet')

# Display the DataFrame
domain_df.head()

NameError: name 'pd' is not defined