In [70]:
import pandas as pd
import numpy as np
from geopy.geocoders import Nominatim
import geopandas as gpd

In [5]:
property_df = pd.read_parquet('../../data/raw/property_details.parquet')

In [6]:
# Drop rows where price = None (Sometimes not given on website)
property_df = property_df[property_df['price'].notnull()]

# Drop nbn_details, property_features columns (often left empty on website)
property_df = property_df.drop(columns=['nbn_details', 'property_features'])

In [7]:
property_df

Unnamed: 0,title,description,street_address,suburb,postcode,price,bedrooms,bathrooms,parking,primary_property_type,structured_features,video_count,photo_count,date_listed,days_listed,floor_plans_count,virtual_tour,nearby_schools
0,"60 Little Windrock Lane, Craigieburn VIC 3064 ...","View this 2 bedroom, 1 bathroom rental house a...","60 Little Windrock Lane, Craigieburn VIC 3064",Craigieburn,3064,$450 Per Week,2.0,1.0,1.0,House,"[{'category': 'Indoor', 'name': 'Built in ward...",0.0,21.0,2024-08-22T16:07:26.000,14.0,0.0,False,"[{'address': 'Craigieburn, VIC 3064', 'distanc..."
1,"53 Were Street, Brighton VIC 3186 - House For ...","View this $1,500/week 4 bedroom, 2 bathroom re...","53 Were Street, Brighton VIC 3186",Brighton,3186,"$1,490.00",4.0,2.0,2.0,House,[],0.0,6.0,2024-06-02T18:11:41.000,95.0,2.0,True,"[{'address': 'Brighton, VIC 3186', 'distance':..."
2,"43 Tackle Drive, Point Cook VIC 3030 - Townhou...","View this 3 bedroom, 2 bathroom rental townhou...","43 Tackle Drive, Point Cook VIC 3030",Point Cook,3030,$550 per Week,3.0,2.0,2.0,Townhouse/Villa,"[{'category': 'Outdoor', 'name': 'Secure Parki...",0.0,17.0,2024-09-03T12:01:18.000,2.0,0.0,True,"[{'address': 'Point Cook, VIC 3030', 'distance..."
3,"3 Rostrevor Parade, Mont Albert VIC 3127 - Hou...","View this 5 bedroom, 2 bathroom rental house a...","3 Rostrevor Parade, Mont Albert VIC 3127",Mont Albert,3127,$800 weekly,5.0,2.0,2.0,House,[],0.0,8.0,2024-07-01T12:53:48.000,66.0,0.0,False,"[{'address': 'Mont Albert, VIC 3127', 'distanc..."
4,"48 Roberts Street, Frankston VIC 3199 - Studio...","View this 9 bedroom, 3 bathroom rental studio ...","48 Roberts Street, Frankston VIC 3199",Frankston,3199,$299 per week,9.0,3.0,4.0,Apartment,"[{'category': 'Indoor', 'name': 'Furnished', '...",0.0,20.0,2024-07-02T11:24:10.000,65.0,1.0,False,"[{'address': 'Frankston, VIC 3199', 'distance'..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14440,"11 Russell Street, Darley VIC 3340 - House For...","View this 4 bedroom, 2 bathroom rental house a...","11 Russell Street, Darley VIC 3340",Darley,3340,$530.00 per week,4.0,2.0,4.0,House,"[{'category': 'Indoor', 'name': 'Gas', 'source...",0.0,15.0,2024-06-26T11:09:37.000,71.0,1.0,False,"[{'address': 'Darley, VIC 3340', 'distance': 2..."
14441,"27 Grazing Rd, Weir Views VIC 3338 - House For...","View this 3 bedroom, 2 bathroom rental house a...","27 Grazing Rd, Weir Views VIC 3338",Weir Views,3338,$415 Per Week,3.0,2.0,2.0,House,"[{'category': 'Indoor', 'name': 'Built in ward...",0.0,8.0,2024-07-26T14:51:48.000,41.0,0.0,False,"[{'address': 'Melton, VIC 3338', 'distance': 9..."
14443,"17 Laming Court, Mount Martha VIC 3934 - House...","View this 4 bedroom, 2 bathroom rental house a...","17 Laming Court, Mount Martha VIC 3934",Mount Martha,3934,$700 per week,4.0,2.0,2.0,House,"[{'category': 'Outdoor', 'name': 'Secure Parki...",0.0,6.0,2024-09-02T10:21:37.000,3.0,0.0,False,"[{'address': 'Mount Martha, VIC 3934', 'distan..."
14444,"2/110 Chute Street, Mordialloc VIC 3195 - Hous...","View this 3 bedroom, 1 bathroom rental house a...","2/110 Chute Street, Mordialloc VIC 3195",Mordialloc,3195,$580 per week,3.0,1.0,1.0,House,"[{'category': 'Indoor', 'name': 'Heating', 'so...",0.0,14.0,2024-08-12T11:27:32.000,24.0,0.0,False,"[{'address': 'Mordialloc, VIC 3195', 'distance..."


In [58]:
def convert_to_coord(address):
    loc = Nominatim(user_agent="property geocoder")
    getLoc = loc.geocode(address)
    if getLoc:
        return getLoc.latitude, getLoc.longitude
    else: return None

In [56]:
convert_to_coord('1/1-3 Sturdee Street, Reservoir, Vic 3073')

(-37.7007586, 145.0090779)

In [59]:
convert_to_coord('252 Ohallorans Road, Lara, Vic 3212')

In [64]:
# sub sample of 100 rows
subsample = property_df.sample(n=100)

# convert address to coordinates
subsample['coords'] = subsample.apply(lambda row: convert_to_coord(row.street_address), axis=1)

In [68]:
# drop rows where coords = None
subsample = subsample[subsample['coords'].notnull()]

title                    85
description              85
street_address           85
suburb                   85
postcode                 85
price                    85
bedrooms                 85
bathrooms                85
parking                  85
primary_property_type    85
structured_features      85
video_count              85
photo_count              85
date_listed              85
days_listed              85
floor_plans_count        85
virtual_tour             85
nearby_schools           85
coords                   85
dtype: int64

In [85]:
from geopy.distance import distance

# find closest distance to train station
train_stations = gpd.read_file('../../data/landing/MetroTrains/PTV_METRO_TRAIN_STATION.shp')
train_stations = train_stations.to_crs('EPSG:4326')
train_stations['coords'] = train_stations.apply(lambda row: (row['LONGITUDE'], row['LATITUDE']), axis=1)

def find_closest_station(coords):
    closest_station = None
    min_distance = float('inf')
    
    for idx, row in train_stations.iterrows():
        station_coords = (row['LATITUDE'], row['LONGITUDE'])
        dist = distance(coords, station_coords).km
        
        if dist < min_distance:
            min_distance = dist
            closest_station = row['STOP_NAME']
    
    return closest_station
    



In [89]:
# find closest train station
for idx, row in subsample.iterrows():
    subsample.at[idx, 'closest_train_station'] = find_closest_station(row['coords'])
    