In [1]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
from geopy.geocoders import Nominatim
import re

In [2]:
property_df = pd.read_parquet('../../data/raw/property_details.parquet')

In [3]:
# Drop rows where price = None (Sometimes not given on website)
property_df = property_df[property_df['price'].notnull()]

# Drop nbn_details, property_features columns (often left empty on website)
property_df = property_df.drop(columns=['nbn_details', 'property_features'])

In [4]:
# View rows with None values
property_df[property_df.isnull().any(axis=1)]

Unnamed: 0,title,description,street_address,suburb,postcode,price,bedrooms,bathrooms,parking,primary_property_type,structured_features,video_count,photo_count,date_listed,days_listed,floor_plans_count,virtual_tour,nearby_schools


In [5]:
# Function to clean up the street address by removing the unit number
def remove_unit_number(address):
    # Regular expression to match unit numbers (e.g., 2657/181) and remove the first number (unit number)
    return re.sub(r'^\d+/', '', address)

# Apply the function to the 'street_address' column
property_df['street_address'] = property_df['street_address'].apply(remove_unit_number)

In [6]:
# Initialize the geolocator
geolocator = Nominatim(user_agent="property_geocoder")

# Function to get longitude and latitude
def get_coordinates(address):
    try:
        location = geolocator.geocode(address)
        if location:
            return (location.latitude, location.longitude)
        else:
            return (None, None)
    except Exception as e:
        print(f"Error getting coordinates for {address}: {e}")
        return (None, None)

# Apply the function to the 'street_address' column and store the results in new columns
property_df[['latitude', 'longitude']] = property_df['street_address'].apply(lambda x: pd.Series(get_coordinates(x)))

# Show the updated dataframe with coordinates
property_df[['street_address', 'latitude', 'longitude']]

property_df.dropna(inplace=True)
len(property_df)

property_df.to_csv("property_details_with_longlat.csv", index=False)

Error getting coordinates for 2 Sunrise Place, Wyndham Vale VIC 3024: HTTPSConnectionPool(host='nominatim.openstreetmap.org', port=443): Max retries exceeded with url: /search?q=2+Sunrise+Place%2C+Wyndham+Vale+VIC+3024&format=json&limit=1 (Caused by ReadTimeoutError("HTTPSConnectionPool(host='nominatim.openstreetmap.org', port=443): Read timed out. (read timeout=1)"))
Error getting coordinates for 3-11 High, North Melbourne VIC 3051: HTTPSConnectionPool(host='nominatim.openstreetmap.org', port=443): Max retries exceeded with url: /search?q=3-11+High%2C+North+Melbourne+VIC+3051&format=json&limit=1 (Caused by ReadTimeoutError("HTTPSConnectionPool(host='nominatim.openstreetmap.org', port=443): Read timed out. (read timeout=1)"))
Error getting coordinates for 9 High Street, North Melbourne VIC 3051: HTTPSConnectionPool(host='nominatim.openstreetmap.org', port=443): Max retries exceeded with url: /search?q=9+High+Street%2C+North+Melbourne+VIC+3051&format=json&limit=1 (Caused by ReadTimeoutE

In [7]:
# Load the train station shapefiles
regional_trains_gdf = gpd.read_file("../../data/landing/PTV/PTV_METRO_TRAIN_STATION.shp")
metro_trains_gdf = gpd.read_file("../../data/landing/PTV/PTV_REGIONAL_TRAIN_STATION.shp")

regional_bus_gdf = gpd.read_file("../../data/landing/PTV/PTV_REGIONAL_BUS_STOP.shp")
metro_bus_gdf = gpd.read_file("../../data/landing/PTV/PTV_METRO_BUS_STOP.shp")

trams_gdf = gpd.read_file("../../data/landing/PTV/PTV_METRO_TRAM_STOP.shp")


# Convert the property dataframe into a GeoDataFrame
property_gdf = gpd.GeoDataFrame(
    property_df, 
    geometry=gpd.points_from_xy(property_df.longitude, property_df.latitude), 
    crs="EPSG:4326"
)

property_gdf.to_crs(epsg=3857, inplace=True)


trains_gdf = pd.concat([regional_trains_gdf, metro_trains_gdf])
buses_gdf = pd.concat([regional_bus_gdf, metro_bus_gdf])

trains_gdf.to_crs(epsg=3857, inplace=True)
buses_gdf.to_crs(epsg=3857, inplace=True)
trams_gdf.to_crs(epsg=3857, inplace=True)

# Step 4: Create a function to calculate the distance to the closest train station
def closest_station_distance(property_point, stations_gdf):
    # Calculate the distance between a property and all train stations
    distances = stations_gdf.geometry.distance(property_point)
    # Return the minimum distance
    return distances.min()

# Apply the distance (meters) calculation to each property 
property_gdf['distance_to_closest_train'] = property_gdf.geometry.apply(
    lambda x: closest_station_distance(x, trains_gdf)
)

property_gdf['distance_to_closest_bus'] = property_gdf.geometry.apply(
    lambda x: closest_station_distance(x, buses_gdf)
)

property_gdf['distance_to_closest_tram'] = property_gdf.geometry.apply(
    lambda x: closest_station_distance(x, trams_gdf)
)

In [9]:
property_gdf

Unnamed: 0,title,description,street_address,suburb,postcode,price,bedrooms,bathrooms,parking,primary_property_type,...,days_listed,floor_plans_count,virtual_tour,nearby_schools,latitude,longitude,geometry,distance_to_closest_train,distance_to_closest_bus,distance_to_closest_tram
0,"60 Little Windrock Lane, Craigieburn VIC 3064 ...","View this 2 bedroom, 1 bathroom rental house a...","60 Little Windrock Lane, Craigieburn VIC 3064",Craigieburn,3064,$450 Per Week,2.0,1.0,1.0,House,...,14.0,0.0,False,"[{'address': 'Craigieburn, VIC 3064', 'distanc...",-37.588897,144.915516,POINT (16131921.46 -4521512.425),3587.757681,241.893482,17783.424475
1,"53 Were Street, Brighton VIC 3186 - House For ...","View this $1,500/week 4 bedroom, 2 bathroom re...","53 Were Street, Brighton VIC 3186",Brighton,3186,"$1,490.00",4.0,2.0,2.0,House,...,95.0,2.0,True,"[{'address': 'Brighton, VIC 3186', 'distance':...",-37.925640,144.999904,POINT (16141315.445 -4568926.558),1201.725436,263.466321,2485.958722
2,"43 Tackle Drive, Point Cook VIC 3030 - Townhou...","View this 3 bedroom, 2 bathroom rental townhou...","43 Tackle Drive, Point Cook VIC 3030",Point Cook,3030,$550 per Week,3.0,2.0,2.0,Townhouse/Villa,...,2.0,0.0,True,"[{'address': 'Point Cook, VIC 3030', 'distance...",-37.906257,144.720254,POINT (16110184.994 -4566191.508),3890.225343,713.624953,24933.663604
3,"3 Rostrevor Parade, Mont Albert VIC 3127 - Hou...","View this 5 bedroom, 2 bathroom rental house a...","3 Rostrevor Parade, Mont Albert VIC 3127",Mont Albert,3127,$800 weekly,5.0,2.0,2.0,House,...,66.0,0.0,False,"[{'address': 'Mont Albert, VIC 3127', 'distanc...",-37.812918,145.106110,POINT (16153138.221 -4553030.95),917.642280,424.603215,481.283363
4,"48 Roberts Street, Frankston VIC 3199 - Studio...","View this 9 bedroom, 3 bathroom rental studio ...","48 Roberts Street, Frankston VIC 3199",Frankston,3199,$299 per week,9.0,3.0,4.0,Apartment,...,65.0,1.0,False,"[{'address': 'Frankston, VIC 3199', 'distance'...",-38.154913,145.140409,POINT (16156956.429 -4601332.969),420.672031,555.155741,36692.954646
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14439,"16 Harris Avenue, Hoppers Crossing VIC 3029 - ...","View this 3 bedroom, 1 bathroom rental house a...","16 Harris Avenue, Hoppers Crossing VIC 3029",Hoppers Crossing,3029,$410 weekly,3.0,1.0,1.0,House,...,2.0,0.0,False,"[{'address': 'Hoppers Crossing, VIC 3029', 'di...",-37.874458,144.698213,POINT (16107731.39 -4561706.059),1278.928687,457.634144,24369.473967
14440,"11 Russell Street, Darley VIC 3340 - House For...","View this 4 bedroom, 2 bathroom rental house a...","11 Russell Street, Darley VIC 3340",Darley,3340,$530.00 per week,4.0,2.0,4.0,House,...,71.0,1.0,False,"[{'address': 'Darley, VIC 3340', 'distance': 2...",-37.657460,144.437942,POINT (16078758.199 -4531148.801),4227.068355,336.968127,51006.069471
14441,"27 Grazing Rd, Weir Views VIC 3338 - House For...","View this 3 bedroom, 2 bathroom rental house a...","27 Grazing Rd, Weir Views VIC 3338",Weir Views,3338,$415 Per Week,3.0,2.0,2.0,House,...,41.0,0.0,False,"[{'address': 'Melton, VIC 3338', 'distance': 9...",-37.722881,144.575340,POINT (16094053.263 -4540351.834),2770.792280,1160.221350,34305.762908
14443,"17 Laming Court, Mount Martha VIC 3934 - House...","View this 4 bedroom, 2 bathroom rental house a...","17 Laming Court, Mount Martha VIC 3934",Mount Martha,3934,$700 per week,4.0,2.0,2.0,House,...,3.0,0.0,False,"[{'address': 'Mount Martha, VIC 3934', 'distan...",-38.251174,145.044765,POINT (16146309.404 -4614969.25),15088.284608,108.701456,47756.651530


In [8]:
property_gdf.to_csv("property_details_with_distance_to_closest_ptv.csv", index=False)