## Calculate average distance to nearest public transport

In [2]:
import geopandas as gpd
import pandas as pd
import numpy as np
import openrouteservice
from shapely.geometry import Point, Polygon, MultiPolygon

In [3]:
# Load the ORS client with your API key
client = openrouteservice.Client(key='5b3ce3597851110001cf62488c71fa847a704dee913c2695723d7af2')

In [4]:
# Load the rental properties data (make sure your CSV contains lat/lon in EPSG:4326)
properties_df = pd.read_csv('../data/raw/sample.csv')

# Convert property coordinates into GeoDataFrame
properties_df['coordinates'] = properties_df['coordinates'].apply(lambda x: tuple(map(float, x.split(','))))
properties_gdf = gpd.GeoDataFrame(properties_df, geometry=properties_df['coordinates'].apply(lambda x: Point(x[1], x[0])))

# Set CRS to WGS84 (EPSG:4326) for latitude/longitude coordinates
properties_gdf.set_crs(epsg=4326, inplace=True)

Unnamed: 0.1,Unnamed: 0,suburb,postcode,coordinates,geometry
0,3642,Moonee Ponds,3039,"(-37.7617975, 144.9195265)",POINT (144.91953 -37.7618)
1,2130,Braybrook,3019,"(-37.7885063, 144.8590231)",POINT (144.85902 -37.78851)
2,14000,Clyde North,3978,"(-38.1030616, 145.3788059)",POINT (145.37881 -38.10306)
3,2057,Williamstown,3016,"(-37.8611996, 144.8978709)",POINT (144.89787 -37.8612)
4,10800,Aireys Inlet,3231,"(-38.4591278, 144.1071273)",POINT (144.10713 -38.45913)
5,2930,Tarneit,3029,"(-37.8277664, 144.6690104)",POINT (144.66901 -37.82777)
6,651,Melbourne,3000,"(-37.8088893, 144.9625546)",POINT (144.96255 -37.80889)
7,12678,Wollert,3750,"(-37.6141717, 144.9962749)",POINT (144.99627 -37.61417)
8,2103,Altona,3018,"(-37.8639295, 144.8353463)",POINT (144.83535 -37.86393)
9,188,Melbourne,3000,"(-37.8122683, 144.9582693)",POINT (144.95827 -37.81227)


In [5]:
# Load public transport shapefiles for train and tram stops
pt_area_gdf = gpd.read_file('../data/map/Public Transport Victoria/gis_osm_transport_a_free_1.shp')
pt_line_gdf = gpd.read_file('../data/map/Public Transport Victoria/gis_osm_transport_free_1.shp')
pt_gdf = pd.concat([pt_area_gdf, pt_line_gdf], ignore_index=True)

In [6]:
# Load Victoria suburbs shapefile for filtering
vic_suburbs_gdf = gpd.read_file('../data/map/Vic_Localities/gda2020_vicgrid/esrishape/whole_of_dataset/victoria/VMADMIN/LOCALITY_POLYGON.shp')
vic_suburbs_gdf = vic_suburbs_gdf.to_crs(epsg=4326)

# Filter public transport stops that fall within Victoria
filtered_pt_gdf = gpd.sjoin(pt_gdf.to_crs(epsg=4326), vic_suburbs_gdf, how='inner', predicate='intersects')


In [7]:
# Filter train stations and tram stops
train_stops_gdf = filtered_pt_gdf[filtered_pt_gdf['fclass'].isin(['railway_station', 'railway_halt'])]
tram_stops_gdf = filtered_pt_gdf[filtered_pt_gdf['fclass'] == 'tram_stop']

# Convert any Polygon or MultiPolygon geometries to centroids
train_stops_gdf['geometry'] = train_stops_gdf['geometry'].apply(lambda geom: geom.centroid if isinstance(geom, (Polygon, MultiPolygon)) else geom)
tram_stops_gdf['geometry'] = tram_stops_gdf['geometry'].apply(lambda geom: geom.centroid if isinstance(geom, (Polygon, MultiPolygon)) else geom)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [8]:
# Re-project the property data and stops to EPSG:4326
train_stops_gdf.set_crs(epsg=4326, inplace=True, allow_override=True)
tram_stops_gdf.set_crs(epsg=4326, inplace=True, allow_override=True)
# properties_gdf.set_crs(epsg=4326, inplace=True, allow_override=True)

Unnamed: 0,osm_id,code,fclass,name,geometry,index_right,UFI,PFI,LOCALITY,GAZLOC,VICNAMESID,TASK_ID,PFI_CR,UFI_OLD,UFI_CR,LABEL_USE_
689,217625262,5603,tram_stop,Stop 124A: Casino/MCEC,POINT (144.95619 -37.82335),1921,743233293,205410296,SOUTHBANK,SOUTHBANK,102786,,2005-09-27,729544619,2023-04-17,5
690,217625264,5603,tram_stop,Stop 124A: Casino/MCEC,POINT (144.95609 -37.82337),1925,743233294,210768089,SOUTH WHARF,SOUTH WHARF,103517,,2008-03-17,729544621,2023-04-17,5
3998,122817630,5603,tram_stop,Stop D1: Docklands Stadium,POINT (144.9464 -37.81461),2897,812633154,205410229,DOCKLANDS,DOCKLANDS,100986,,2005-09-27,743233291,2023-11-24,5
4189,252587671,5603,tram_stop,Stop 7D: AAMI Park,POINT (144.98679 -37.82411),2057,777758969,205410007,MELBOURNE,MELBOURNE,102000,,2005-09-27,743233290,2023-07-03,5
4190,252588293,5603,tram_stop,Stop 7A: William Barak Bridge,POINT (144.97598 -37.81823),2057,777758969,205410007,MELBOURNE,MELBOURNE,102000,,2005-09-27,743233290,2023-07-03,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
70903,11234800056,5603,tram_stop,Stop 58: Box Hill Central,POINT (145.12208 -37.81783),2260,746966702,205410187,BOX HILL,BOX HILL,100468,,2005-09-27,468966733,2023-05-01,5
70904,11234800057,5603,tram_stop,Stop 58: Box Hill Central,POINT (145.12207 -37.8179),2260,746966702,205410187,BOX HILL,BOX HILL,100468,,2005-09-27,468966733,2023-05-01,5
77050,11852055220,5603,tram_stop,Stop 6: Melbourne Central and State Library St...,POINT (144.96427 -37.80945),2057,777758969,205410007,MELBOURNE,MELBOURNE,102000,,2005-09-27,743233290,2023-07-03,5
77586,11903142479,5603,tram_stop,Stop 30: St Kilda Junction,POINT (144.98252 -37.85535),2057,777758969,205410007,MELBOURNE,MELBOURNE,102000,,2005-09-27,743233290,2023-07-03,5


In [9]:
# Group the properties by suburb
suburbs_gdf = properties_gdf.dissolve(by='suburb', as_index=False)

In [10]:
# Function to calculate nearest transport stops for each suburb
def calculate_nearest_transports(suburb_geometry, transport_stops, num_stops=3, profile='driving-car'):
    # Suburb centroid to use as the reference point
    suburb_centroid = suburb_geometry.centroid
    
    # Find nearest transport stops (train or tram)
    nearby_stops = transport_stops.copy()
    nearby_stops['distance'] = nearby_stops.geometry.distance(suburb_centroid)
    
    # Sort by distance and take the nearest `num_stops` stops
    nearest_stops = nearby_stops.nsmallest(num_stops, 'distance')

    if nearest_stops.empty:
        return None

    total_distance = 0
    for _, stop in nearest_stops.iterrows():
        try:
            # Use ORS API to get the driving distance
            distances = client.distance_matrix(
                locations=[(suburb_centroid.x, suburb_centroid.y), (stop.geometry.x, stop.geometry.y)],
                profile=profile,
                metrics=['distance'],
                units='m'
            )
            total_distance += distances['distances'][0][1]
        except openrouteservice.exceptions.ApiError as e:
            print(f"ORS API Error: {e}")
            return None

    # Return average distance for the nearest `num_stops`
    return total_distance / num_stops

In [11]:
# Function to calculate the distance from suburb to Melbourne CBD (by car)
def calculate_distance_to_cbd(suburb_geometry, cbd_point, profile='driving-car'):
    suburb_centroid = suburb_geometry.centroid
    try:
        distances = client.distance_matrix(
            locations=[(suburb_centroid.x, suburb_centroid.y), (cbd_point.x, cbd_point.y)],
            profile=profile,
            metrics=['distance'],
            units='m'
        )
        return distances['distances'][0][1]  # Return distance in meters
    except openrouteservice.exceptions.ApiError as e:
        print(f"ORS API Error: {e}")
        return None

In [12]:
# Define the Melbourne CBD as a fixed point (Flinders Street Station: Lat: -37.8183, Lon: 144.9671)
melbourne_cbd_point = Point(144.9671, -37.8183)

# Calculate the average distance to the nearest public transport stops for each suburb
suburbs_gdf['nearest_transport_avg_distance'] = suburbs_gdf['geometry'].apply(
    lambda geom: calculate_nearest_transports(geom, train_stops_gdf.append(tram_stops_gdf), num_stops=3)
)

# Calculate the distance to Melbourne CBD for each suburb outside postcode 3000
suburbs_gdf['distance_to_cbd'] = suburbs_gdf.apply(
    lambda row: calculate_distance_to_cbd(row['geometry'], melbourne_cbd_point) if row['suburb'] != 'Melbourne' else None,
    axis=1
)

  lambda geom: calculate_nearest_transports(geom, train_stops_gdf.append(tram_stops_gdf), num_stops=3)

  nearby_stops['distance'] = nearby_stops.geometry.distance(suburb_centroid)
  lambda geom: calculate_nearest_transports(geom, train_stops_gdf.append(tram_stops_gdf), num_stops=3)

  nearby_stops['distance'] = nearby_stops.geometry.distance(suburb_centroid)
  lambda geom: calculate_nearest_transports(geom, train_stops_gdf.append(tram_stops_gdf), num_stops=3)

  nearby_stops['distance'] = nearby_stops.geometry.distance(suburb_centroid)
  lambda geom: calculate_nearest_transports(geom, train_stops_gdf.append(tram_stops_gdf), num_stops=3)

  nearby_stops['distance'] = nearby_stops.geometry.distance(suburb_centroid)
  lambda geom: calculate_nearest_transports(geom, train_stops_gdf.append(tram_stops_gdf), num_stops=3)

  nearby_stops['distance'] = nearby_stops.geometry.distance(suburb_centroid)
  lambda geom: calculate_nearest_transports(geom, train_stops_gdf.append(tram_stops_gdf), num_s

In [14]:

# Save the final dataset to a CSV file
suburbs_gdf[['suburb', 'nearest_transport_avg_distance', 'distance_to_cbd']].to_csv('../data/raw/suburb_transport_distances.csv', index=False)


In [8]:

# # Reproject all geodataframes to EPSG:3577 (Australian projection) for accurate distance calculations
# train_stops_gdf = train_stops_gdf.to_crs(epsg=3577)
# tram_stops_gdf = tram_stops_gdf.to_crs(epsg=3577)
# properties_gdf = properties_gdf.to_crs(epsg=3577)

In [9]:
# # Function to get the nearest stops within a buffer or nearest N stops
# def get_nearest_stops(property_point, transport_stops, num_stops=1, max_distance=10000):
#     # Create a buffer around the property point
#     property_buffer = property_point.buffer(max_distance)
    
#     # Filter the transport stops to those within the buffer
#     nearby_stops = transport_stops[transport_stops.geometry.within(property_buffer)]
    
#     if len(nearby_stops) > num_stops:
#         # Sort by distance and return the nearest stop(s)
#         nearby_stops['distance'] = nearby_stops.geometry.distance(property_point)
#         nearby_stops = nearby_stops.nsmallest(num_stops, 'distance')
    
#     return nearby_stops

## FOR PROPERTIES - DEPRACATED

In [9]:

# Function to calculate driving distance using ORS API (ensure lat/lon EPSG:4326 for ORS)
def calculate_nearest_transport(property_point, transport_points):
    property_buffer = property_point.buffer(30000)  # 10km buffer
    nearby_stops = transport_points[transport_points.geometry.within(property_buffer)]

    if nearby_stops.empty:
        return None

    # Calculate distances internally using GeoPandas (EPSG:3577)
    nearby_stops['distance'] = nearby_stops.geometry.distance(property_point)
    nearest_stop = nearby_stops.sort_values(by='distance').iloc[0]

    # Convert the property and nearest stop into GeoDataFrames for CRS transformation
    property_gdf = gpd.GeoDataFrame(geometry=[property_point], crs="EPSG:3577")
    nearest_stop_gdf = gpd.GeoDataFrame(geometry=[nearest_stop.geometry], crs="EPSG:3577")

    # Reproject to EPSG:4326 for the ORS API
    property_latlon = property_gdf.to_crs(epsg=4326).geometry.iloc[0]
    nearest_stop_latlon = nearest_stop_gdf.to_crs(epsg=4326).geometry.iloc[0]

    # Get coordinates for ORS API
    coords_property = (property_latlon.x, property_latlon.y)
    coords_nearest_stop = (nearest_stop_latlon.x, nearest_stop_latlon.y)

    try:
        # Calculate the driving distance using the ORS API
        distances = client.distance_matrix(
            locations=[coords_property, coords_nearest_stop],
            profile='driving-car',
            metrics=['distance'],
            units='m'
        )
        return distances['distances'][0][1]  # Return driving distance to nearest stop
    except openrouteservice.exceptions.ApiError as e:
        print(f"ORS API Error: {e}")
        return None


In [10]:
# For each property, calculate the distance to the nearest train stop and tram stop
properties_gdf['nearest_train_distance'] = properties_gdf['geometry'].apply(lambda x: calculate_nearest_transport(x, train_stops_gdf))
properties_gdf['nearest_tram_distance'] = properties_gdf['geometry'].apply(lambda x: calculate_nearest_transport(x, tram_stops_gdf))

# Save the updated property data with distances to a CSV
properties_gdf.to_csv('../data/raw/properties_with_distances.csv', index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = 

In [11]:
# Melbourne CBD coordinates (approximate center)
melbourne_cbd_coords = (144.9631, -37.8136)

# Function to calculate nearest distance based on condition
def calculate_distance_by_postcode(property_row, transport_points):
    if property_row['postcode'] == 3000:
        # Calculate walking distance to nearest public transport (train/tram)
        return calculate_nearest_transport(property_row['geometry'], transport_points, profile='foot-walking')
    else:
        # Calculate driving distance to Melbourne CBD
        return calculate_distance_to_cbd(property_row['geometry'], melbourne_cbd_coords)

# Function to calculate driving or walking distance using ORS API
def calculate_nearest_transport(property_point, transport_points, profile='driving-car'):
    property_buffer = property_point.buffer(10000)  # 10km buffer
    nearby_stops = transport_points[transport_points.geometry.within(property_buffer)]

    if nearby_stops.empty:
        return None

    # Calculate distances internally using GeoPandas (EPSG:3577)
    nearby_stops['distance'] = nearby_stops.geometry.distance(property_point)
    nearest_stop = nearby_stops.sort_values(by='distance').iloc[0]

    # Convert the property and nearest stop into GeoDataFrames for CRS transformation
    property_gdf = gpd.GeoDataFrame(geometry=[property_point], crs="EPSG:3577")
    nearest_stop_gdf = gpd.GeoDataFrame(geometry=[nearest_stop.geometry], crs="EPSG:3577")

    # Reproject to EPSG:4326 for the ORS API
    property_latlon = property_gdf.to_crs(epsg=4326).geometry.iloc[0]
    nearest_stop_latlon = nearest_stop_gdf.to_crs(epsg=4326).geometry.iloc[0]

    # Get coordinates for ORS API
    coords_property = (property_latlon.x, property_latlon.y)
    coords_nearest_stop = (nearest_stop_latlon.x, nearest_stop_latlon.y)

    try:
        # Calculate the distance using the ORS API (can be walking or driving)
        distances = client.distance_matrix(
            locations=[coords_property, coords_nearest_stop],
            profile=profile,  # Either walking or driving-car
            metrics=['distance'],
            units='m'
        )
        return distances['distances'][0][1]  # Return distance to nearest stop
    except openrouteservice.exceptions.ApiError as e:
        print(f"ORS API Error: {e}")
        return None

# Function to calculate driving distance from property to Melbourne CBD
def calculate_distance_to_cbd(property_point, cbd_coords):
    # Convert property_point to EPSG:4326 for ORS API
    property_gdf = gpd.GeoDataFrame(geometry=[property_point], crs="EPSG:3577")
    property_latlon = property_gdf.to_crs(epsg=4326).geometry.iloc[0]

    coords_property = (property_latlon.x, property_latlon.y)
    
    try:
        # Calculate driving distance to Melbourne CBD
        distances = client.distance_matrix(
            locations=[coords_property, cbd_coords],
            profile='driving-car',
            metrics=['distance'],
            units='m'
        )
        return distances['distances'][0][1]  # Return distance to CBD
    except openrouteservice.exceptions.ApiError as e:
        print(f"ORS API Error: {e}")
        return None

# Apply the calculation to each property
properties_gdf['nearest_distance'] = properties_gdf.apply(lambda row: calculate_distance_by_postcode(row, transport_points=train_stops_gdf.append(tram_stops_gdf)), axis=1)

# Save the updated property data with distances to a CSV
properties_gdf.to_csv('../data/raw/properties_distances_test.csv', index=False)


  properties_gdf['nearest_distance'] = properties_gdf.apply(lambda row: calculate_distance_by_postcode(row, transport_points=train_stops_gdf.append(tram_stops_gdf)), axis=1)
  properties_gdf['nearest_distance'] = properties_gdf.apply(lambda row: calculate_distance_by_postcode(row, transport_points=train_stops_gdf.append(tram_stops_gdf)), axis=1)
  properties_gdf['nearest_distance'] = properties_gdf.apply(lambda row: calculate_distance_by_postcode(row, transport_points=train_stops_gdf.append(tram_stops_gdf)), axis=1)
  properties_gdf['nearest_distance'] = properties_gdf.apply(lambda row: calculate_distance_by_postcode(row, transport_points=train_stops_gdf.append(tram_stops_gdf)), axis=1)
  properties_gdf['nearest_distance'] = properties_gdf.apply(lambda row: calculate_distance_by_postcode(row, transport_points=train_stops_gdf.append(tram_stops_gdf)), axis=1)
  properties_gdf['nearest_distance'] = properties_gdf.apply(lambda row: calculate_distance_by_postcode(row, transport_points=train