In [1]:



import os
import json
import pandas as pd
from typing import Dict, Any
from urllib.request import urlretrieve
import seaborn as sns
import matplotlib.pyplot as plt
import math
import numpy as np
import pandas as pd
import geopandas as gpd
import folium
from shapely.geometry import Point
from shapely import wkt
import openrouteservice

relative_path = "../data/landing/domain/"
absolute_path = os.path.abspath(relative_path)

def extract_features(property_id: str, listing_data: Dict[str, Any]) -> Dict[str, Any]:
    features = {'id': property_id}
    
    listing_model = listing_data.get('listingModel', {})
    
    # Extract price
    features['price'] = listing_model.get('price')
    
    # Extract address
    address = listing_model.get('address', {})
    features['street'] = address.get('street')
    features['suburb'] = address.get('suburb')
    features['state'] = address.get('state')
    features['postcode'] = address.get('postcode')
    features['latitude'] = address.get('lat')
    features['longitude'] = address.get('lng')
    
    # Extract other features
    property_features = listing_model.get('features', {})
    features['bedrooms'] = property_features.get('beds')
    features['bathrooms'] = property_features.get('baths')
    features['parking'] = property_features.get('parking')
    features['propertyType'] = property_features.get('propertyType')
    
    return features



# def process_json_files(input_dir: str, output_dir: str, output_filename: str):
#     all_listings = []
    
#     for filename in os.listdir(input_dir):
#         if filename.endswith('.json'):
#             property_id = filename.split('.')[0]  # Get property ID from filename
#             file_path = os.path.join(input_dir, filename)
#             with open(file_path, 'r') as file:
#                 try:
#                     data = json.load(file)
#                     listings_map = data.get('props', {}).get('listingsMap', {})
#                     if property_id in listings_map:
#                         listing_data = listings_map[property_id]
#                         features = extract_features(property_id, listing_data)
#                         all_listings.append(features)
#                         print(f"Processed file: {filename}")
#                     else:
#                         print(f"Property ID {property_id} not found in file: {filename}")
#                 except json.JSONDecodeError:
#                     print(f"Error decoding JSON in file: {filename}")
    

#     df = pd.DataFrame(all_listings)
    
#     # Create output directory if it doesn't exist
#     os.makedirs(output_dir, exist_ok=True)
    
#     # Save DataFrame to CSV
#     output_path = os.path.join(output_dir, output_filename)
#     df.to_csv(output_path, index=False)
#     print(f"CSV file saved to: {output_path}")
#     print(f"Total files processed: {len(all_listings)}")

# # Usage
# input_directory = '../data/landing/domain'
# output_directory = '../data/raw/domain_output'
# output_filename = 'all_listings.csv'

# process_json_files(input_directory, output_directory, output_filename)

In [2]:
all_listings_df = pd.read_csv('../data/raw/domain_output/all_listings.csv')

In [3]:
all_listings_df.head()

Unnamed: 0,id,price,street,suburb,state,postcode,latitude,longitude,bedrooms,bathrooms,parking,propertyType
0,16889460,$420.00,27 Westmoreland Road,Sunshine North,VIC,3020,-37.774926,144.834817,3,1,2,house
1,17188457,$850.00,73 Reynard Street,Coburg,VIC,3058,-37.748181,144.958473,3,2,0,house
2,17160233,"$1,300.00",35 Fairmont Avenue,Camberwell,VIC,3124,-37.842214,145.071069,4,2,2,house
3,17062496,$580,13 Vesper Avenue,Tarneit,VIC,3029,-37.835021,144.669242,4,2,2,house
4,17179442,$440.00,9 Red Robin Drive,Winter Valley,VIC,3358,-37.567505,143.784207,4,2,2,house


In [4]:
shapefile_path = '../data/raw/PTV/PTV_METRO_TRAIN_STATION.shp'

# Load the shapefile
station_df = gpd.read_file(shapefile_path)

# Display the first few records to understand its structure and ensure data is loaded correctly




In [5]:
len(station_df)

220

In [6]:
rentals_df = all_listings_df.drop_duplicates().dropna(subset=['latitude', 'longitude'])

In [7]:
rentals_df['propertyType'].value_counts()

propertyType
house                7541
apartmentUnitFlat    6034
townhouse            1559
studio                214
villa                  35
carspace               19
acreageSemiRural       16
newHouseLand           16
newApartments          11
terrace                 9
semiDetached            8
duplex                  5
vacantLand              4
rural                   4
retirement              3
blockOfUnits            3
newLand                 1
Name: count, dtype: int64

In [8]:
station_df

Unnamed: 0,STOP_ID,LATITUDE,STOP_NAME,LONGITUDE,TICKETZONE,ROUTEUSSP,geometry
0,19970,-37.781193,Royal Park Railway Station (Parkville),144.952301,1,Upfield,POINT (144.95231 -37.78118)
1,19971,-37.788140,Flemington Bridge Railway Station (North Melbo...,144.939323,1,Upfield,POINT (144.93933 -37.78813)
2,19972,-37.794267,Macaulay Railway Station (North Melbourne),144.936166,1,Upfield,POINT (144.93617 -37.79425)
3,19973,-37.807419,North Melbourne Railway Station (West Melbourne),144.942570,1,"Flemington,Sunbury,Upfield,Werribee,Williamsto...",POINT (144.94258 -37.80741)
4,19974,-37.788657,Clifton Hill Railway Station (Clifton Hill),144.995417,1,"Mernda,Hurstbridge",POINT (144.99542 -37.78864)
...,...,...,...,...,...,...,...
215,19965,-37.742345,Coburg Railway Station (Coburg),144.963336,1,Upfield,POINT (144.96334 -37.74233)
216,19966,-37.754485,Moreland Railway Station (Coburg),144.961823,1,Upfield,POINT (144.96183 -37.75447)
217,19967,-37.761242,Anstey Railway Station (Brunswick),144.960684,1,Upfield,POINT (144.96069 -37.76123)
218,19968,-37.767721,Brunswick Railway Station (Brunswick),144.959587,1,Upfield,POINT (144.95959 -37.76771)


In [9]:
Points_for_rentals= rentals_df.apply(lambda x: (x['longitude'], x['latitude']), axis=1)
Points_for_station = station_df.apply(lambda x: (x['LONGITUDE'], x['LATITUDE']), axis=1)

In [10]:
rentals_df

Unnamed: 0,id,price,street,suburb,state,postcode,latitude,longitude,bedrooms,bathrooms,parking,propertyType
0,16889460,$420.00,27 Westmoreland Road,Sunshine North,VIC,3020,-37.774926,144.834817,3,1,2,house
1,17188457,$850.00,73 Reynard Street,Coburg,VIC,3058,-37.748181,144.958473,3,2,0,house
2,17160233,"$1,300.00",35 Fairmont Avenue,Camberwell,VIC,3124,-37.842214,145.071069,4,2,2,house
3,17062496,$580,13 Vesper Avenue,Tarneit,VIC,3029,-37.835021,144.669242,4,2,2,house
4,17179442,$440.00,9 Red Robin Drive,Winter Valley,VIC,3358,-37.567505,143.784207,4,2,2,house
...,...,...,...,...,...,...,...,...,...,...,...,...
15480,16644757,"$1,050 pw",2/8 Skene Street,Burwood East,VIC,3151,-37.856688,145.132631,4,3,2,townhouse
15481,16086828,$560,25 Masthead Way,Werribee South,VIC,3030,-37.963369,144.707632,4,2,2,house
15482,16076685,$700,2/26 Clyde Street,Ferntree Gully,VIC,3156,-37.882581,145.267643,3,2,2,apartmentUnitFlat
15483,17174571,$835,1/40 Elizabeth Street,Coburg,VIC,3058,-37.737269,144.982527,3,3,2,townhouse


In [11]:
rentals_df['geometry'] = Points_for_rentals
station_df['geometry'] = Points_for_station

  station_df['geometry'] = Points_for_station


In [12]:
rentals_df

Unnamed: 0,id,price,street,suburb,state,postcode,latitude,longitude,bedrooms,bathrooms,parking,propertyType,geometry
0,16889460,$420.00,27 Westmoreland Road,Sunshine North,VIC,3020,-37.774926,144.834817,3,1,2,house,"(144.8348174, -37.7749263)"
1,17188457,$850.00,73 Reynard Street,Coburg,VIC,3058,-37.748181,144.958473,3,2,0,house,"(144.9584733, -37.7481813)"
2,17160233,"$1,300.00",35 Fairmont Avenue,Camberwell,VIC,3124,-37.842214,145.071069,4,2,2,house,"(145.0710691, -37.8422139)"
3,17062496,$580,13 Vesper Avenue,Tarneit,VIC,3029,-37.835021,144.669242,4,2,2,house,"(144.6692424, -37.835021)"
4,17179442,$440.00,9 Red Robin Drive,Winter Valley,VIC,3358,-37.567505,143.784207,4,2,2,house,"(143.7842071, -37.5675049)"
...,...,...,...,...,...,...,...,...,...,...,...,...,...
15480,16644757,"$1,050 pw",2/8 Skene Street,Burwood East,VIC,3151,-37.856688,145.132631,4,3,2,townhouse,"(145.1326313, -37.85668769999999)"
15481,16086828,$560,25 Masthead Way,Werribee South,VIC,3030,-37.963369,144.707632,4,2,2,house,"(144.707632, -37.963369)"
15482,16076685,$700,2/26 Clyde Street,Ferntree Gully,VIC,3156,-37.882581,145.267643,3,2,2,apartmentUnitFlat,"(145.267643, -37.8825813)"
15483,17174571,$835,1/40 Elizabeth Street,Coburg,VIC,3058,-37.737269,144.982527,3,3,2,townhouse,"(144.9825267, -37.737269)"


In [13]:
rentals_df.iloc[0]['geometry']


(144.8348174, -37.7749263)

In [14]:
# 5b3ce3597851110001cf6248b46e26c44d9a4d4f9d0a80a2325cc4f2
client = openrouteservice.Client(key='5b3ce3597851110001cf6248545577b169fa4a4eacc8a470180afc50')

locations = [
       rentals_df.iloc[0]['geometry'],
         station_df.iloc[0]['geometry'],
         station_df.iloc[1]['geometry'],
            station_df.iloc[2]['geometry'],
]

response = client.distance_matrix(
    locations=locations,
    profile='driving-car',   # or 'foot-walking', 'cycling-regular', etc.
    metrics=['distance'],    # Only distance or also 'duration' if needed
    sources=[0],             # Index of the origin point (first in the list)
    destinations=[1,2,3]   # Indices of the destination points
)


In [15]:
station_df

Unnamed: 0,STOP_ID,LATITUDE,STOP_NAME,LONGITUDE,TICKETZONE,ROUTEUSSP,geometry
0,19970,-37.781193,Royal Park Railway Station (Parkville),144.952301,1,Upfield,"(144.952301, -37.781193)"
1,19971,-37.788140,Flemington Bridge Railway Station (North Melbo...,144.939323,1,Upfield,"(144.939323, -37.78814)"
2,19972,-37.794267,Macaulay Railway Station (North Melbourne),144.936166,1,Upfield,"(144.936166, -37.794267)"
3,19973,-37.807419,North Melbourne Railway Station (West Melbourne),144.942570,1,"Flemington,Sunbury,Upfield,Werribee,Williamsto...","(144.94257, -37.807419)"
4,19974,-37.788657,Clifton Hill Railway Station (Clifton Hill),144.995417,1,"Mernda,Hurstbridge","(144.995417, -37.788657)"
...,...,...,...,...,...,...,...
215,19965,-37.742345,Coburg Railway Station (Coburg),144.963336,1,Upfield,"(144.963336, -37.742345)"
216,19966,-37.754485,Moreland Railway Station (Coburg),144.961823,1,Upfield,"(144.961823, -37.754485)"
217,19967,-37.761242,Anstey Railway Station (Brunswick),144.960684,1,Upfield,"(144.960684, -37.761242)"
218,19968,-37.767721,Brunswick Railway Station (Brunswick),144.959587,1,Upfield,"(144.959587, -37.767721)"


In [16]:
list(station_df['STOP_NAME'])

['Royal Park Railway Station (Parkville)',
 'Flemington Bridge Railway Station (North Melbourne)',
 'Macaulay Railway Station (North Melbourne)',
 'North Melbourne Railway Station (West Melbourne)',
 'Clifton Hill Railway Station (Clifton Hill)',
 'Victoria Park Railway Station (Abbotsford)',
 'Collingwood Railway Station (Abbotsford)',
 'North Richmond Railway Station (Richmond)',
 'West Richmond Railway Station (Richmond)',
 'Jolimont-MCG Railway Station (East Melbourne)',
 'Macleod Railway Station (Macleod)',
 'Watsonia Railway Station (Watsonia)',
 'Greensborough Railway Station (Greensborough)',
 'Montmorency Railway Station (Montmorency)',
 'Eltham Railway Station (Eltham)',
 'Diamond Creek Railway Station (Diamond Creek)',
 'Wattle Glen Railway Station (Wattle Glen)',
 'Hurstbridge Railway Station (Hurstbridge)',
 'Williamstown Railway Station (Williamstown)',
 'Williamstown Beach Railway Station (Williamstown)',
 'North Williamstown Railway Station (Williamstown)',
 'Newport Ra

In [17]:
def closest_station(row, station_df, client):
    station_list = list(station_df['geometry'])
    station_names = list(station_df['STOP_NAME'])
    locations = [row['geometry'], *station_list]  # Use row's geometry instead of rentals_df.iloc[0]

    response = client.distance_matrix(
        locations=locations,
        profile='driving-car',   
        metrics=['distance'],    
        sources=[0],             
        destinations=[i for i in (range(1, len(locations)))]  # Fix the range to match locations
    )
    
    min_index, min_distance = get_min_distance(response)
    
    return station_names[min_index],locations[min_index+1],min_distance

def get_min_distance(response):
    min_distance = math.inf
    min_index = None
    for i, distance in enumerate(response['distances'][0]):  # 'distances', not 'distance'
        if distance < min_distance:
            min_distance = distance
            min_index = i
    return min_index, min_distance  

# Apply the function to each row in rentals_df




In [18]:


closest_station(rentals_df.iloc[0], station_df, client)


('Albion Railway Station (Sunshine North)', (144.824704, -37.777653), 2084.77)

In [19]:
#  testing the function
print(closest_station(rentals_df.iloc[0], station_df, client))
print(station_df[station_df['STOP_NAME'] == 'Albion Railway Station (Sunshine North)']) 

('Albion Railway Station (Sunshine North)', (144.824704, -37.777653), 2084.77)
   STOP_ID   LATITUDE                                STOP_NAME   LONGITUDE  \
29   20004 -37.777653  Albion Railway Station (Sunshine North)  144.824704   

   TICKETZONE ROUTEUSSP                  geometry  
29        1,2   Sunbury  (144.824704, -37.777653)  


In [20]:
sample_rentals_df = rentals_df.iloc[:10].copy()
sample_rentals_df[['closest_station', 'distance(m)', 'extra_info']] = sample_rentals_df.apply(lambda x: closest_station(x, station_df, client), axis=1).apply(pd.Series)



In [24]:
sample_rentals_df

Unnamed: 0,id,price,street,suburb,state,postcode,latitude,longitude,bedrooms,bathrooms,parking,propertyType,geometry,closest_station,distance(m),extra_info
0,16889460,$420.00,27 Westmoreland Road,Sunshine North,VIC,3020,-37.774926,144.834817,3,1,2,house,"(144.8348174, -37.7749263)",Albion Railway Station (Sunshine North),"(144.824704, -37.777653)",2084.77
1,17188457,$850.00,73 Reynard Street,Coburg,VIC,3058,-37.748181,144.958473,3,2,0,house,"(144.9584733, -37.7481813)",Coburg Railway Station (Coburg),"(144.963336, -37.742345)",1283.48
2,17160233,"$1,300.00",35 Fairmont Avenue,Camberwell,VIC,3124,-37.842214,145.071069,4,2,2,house,"(145.0710691, -37.8422139)",Hartwell Railway Station (Camberwell),"(145.07556, -37.843985)",477.84
3,17062496,$580,13 Vesper Avenue,Tarneit,VIC,3029,-37.835021,144.669242,4,2,2,house,"(144.6692424, -37.835021)",Werribee Railway Station (Werribee),"(144.661118, -37.899378)",8071.6
4,17179442,$440.00,9 Red Robin Drive,Winter Valley,VIC,3358,-37.567505,143.784207,4,2,2,house,"(143.7842071, -37.5675049)",Watergardens Railway Station (Sydenham),"(144.77418, -37.701129)",97951.26
5,17198904,$480,50 Manna Gum Drive,Mount Duneed,VIC,3217,-38.226829,144.334372,3,2,2,house,"(144.3343716, -38.2268287)",Werribee Railway Station (Werribee),"(144.661118, -37.899378)",58159.23
6,17204127,$895.00,51 Ferndale Road,Glen Iris,VIC,3146,-37.855363,145.074779,3,1,2,house,"(145.0747789, -37.855363)",Hartwell Railway Station (Camberwell),"(145.07556, -37.843985)",1504.62
7,17196019,$285,1 Cooper Street,Ouyen,VIC,3490,-35.06814,142.3163,2,1,3,house,"(142.3163004, -35.0681404)",Sunbury Railway Station (Sunbury),"(144.727319, -37.579091)",405415.63
8,17109740,$500 weekly,24 Cardamon Street,Armstrong Creek,VIC,3217,-38.226757,144.367373,4,2,2,house,"(144.3673729, -38.2267566)",Werribee Railway Station (Werribee),"(144.661118, -37.899378)",60263.9
9,15225888,$899/week,135 Kinnear Road,Mickleham,VIC,3064,-37.534918,144.858856,4,2,2,house,"(144.8588556, -37.5349182)",Craigieburn Railway Station (Craigieburn),"(144.94332, -37.601817)",14328.76


In [38]:
rentals_df 

Unnamed: 0,id,price,street,suburb,state,postcode,latitude,longitude,bedrooms,bathrooms,parking,propertyType,geometry
0,16889460,$420.00,27 Westmoreland Road,Sunshine North,VIC,3020,-37.774926,144.834817,3,1,2,house,"(144.8348174, -37.7749263)"
1,17188457,$850.00,73 Reynard Street,Coburg,VIC,3058,-37.748181,144.958473,3,2,0,house,"(144.9584733, -37.7481813)"
2,17160233,"$1,300.00",35 Fairmont Avenue,Camberwell,VIC,3124,-37.842214,145.071069,4,2,2,house,"(145.0710691, -37.8422139)"
3,17062496,$580,13 Vesper Avenue,Tarneit,VIC,3029,-37.835021,144.669242,4,2,2,house,"(144.6692424, -37.835021)"
4,17179442,$440.00,9 Red Robin Drive,Winter Valley,VIC,3358,-37.567505,143.784207,4,2,2,house,"(143.7842071, -37.5675049)"
...,...,...,...,...,...,...,...,...,...,...,...,...,...
15480,16644757,"$1,050 pw",2/8 Skene Street,Burwood East,VIC,3151,-37.856688,145.132631,4,3,2,townhouse,"(145.1326313, -37.85668769999999)"
15481,16086828,$560,25 Masthead Way,Werribee South,VIC,3030,-37.963369,144.707632,4,2,2,house,"(144.707632, -37.963369)"
15482,16076685,$700,2/26 Clyde Street,Ferntree Gully,VIC,3156,-37.882581,145.267643,3,2,2,apartmentUnitFlat,"(145.267643, -37.8825813)"
15483,17174571,$835,1/40 Elizabeth Street,Coburg,VIC,3058,-37.737269,144.982527,3,3,2,townhouse,"(144.9825267, -37.737269)"
