In [26]:
import pandas as pd
import geopandas as gpd
from shapely import wkt
from shapely.geometry import Point
from scipy.spatial import cKDTree
import numpy as np
import requests
import json
from tqdm.auto import tqdm
tqdm.pandas()  # "tqdm>=4.9.0"
school_path = "../data/raw/school_zones/school_zones.csv"
ptv_path = "../data/raw/ptv_zones/ptv_zones.csv"

# Rental

In [27]:
path = "../data/raw/oldlistings_rent_2.json"
property_df = pd.read_json(open(path, "r"))

In [28]:
property_df

Unnamed: 0,lat,lng,rented_prices,address,meta_data
0,-37.813730,144.955580,"[{'date': 'August 2023', 'price': '$800 per we...","201/560 LONSDALE STREET, MELBOURNE","[{'label': 'bed', 'description': 'Bed', 'quant..."
1,-37.813730,144.955580,"[{'date': 'August 2023', 'price': '$720 per we...","1702/560 LONSDALE STREET, MELBOURNE","[{'label': 'bed', 'description': 'Bed', 'quant..."
2,-37.810280,144.956670,"[{'date': 'August 2023', 'price': '$830 per we...","2007/228 ABECKETT STREET, MELBOURNE","[{'label': 'bed', 'description': 'Bed', 'quant..."
3,-37.813590,144.955720,"[{'date': 'August 2023', 'price': '$660 per we...","901/21 HEALEYS LANE, MELBOURNE","[{'label': 'bed', 'description': 'Bed', 'quant..."
4,-37.813700,144.953930,"[{'date': 'August 2023', 'price': '$830 per we...","3801/601 LITTLE LONSDALE STREET, MELBOURNE","[{'label': 'bed', 'description': 'Bed', 'quant..."
...,...,...,...,...,...
539050,-37.840959,145.032227,"[{'date': 'November 2007', 'price': '$575 pw'}...","3/422-426 GLENFERRIE ROAD, KOOYONG","[{'label': 'bed', 'description': 'Bed', 'quant..."
539051,-37.842220,145.033818,"[{'date': 'October 2007', 'price': '$1,700 pw'...","9 MERNDA ROAD, KOOYONG","[{'label': 'bed', 'description': 'Bed', 'quant..."
539052,-37.841322,145.030886,"[{'date': 'February 2007', 'price': '$575 pw'}...","27 POWER AVENUE, KOOYONG","[{'label': 'bed', 'description': 'Bed', 'quant..."
539053,-37.843799,145.032879,"[{'date': 'February 2007', 'price': '$350 pw'}...","699A TOORAK ROAD, KOOYONG","[{'label': 'bed', 'description': 'Bed', 'quant..."


In [10]:
property_df['geometry'] = property_df.apply(lambda row: Point(row['lng'], row['lat']), axis=1)

In [7]:
school  = pd.read_csv(school_path)
school['geometry'] = school['geometry'].apply(wkt.loads)
gdf_school = gpd.GeoDataFrame(school, geometry='geometry')

In [8]:
gdf_school

Unnamed: 0,School_Name,geometry
0,Lockwood Primary School,"POLYGON ((144.19743 -36.82513, 144.20366 -36.8..."
1,Lockwood South Primary School,"POLYGON ((143.99198 -36.80865, 143.99179 -36.8..."
2,Heathcote Primary School,"POLYGON ((144.6649 -37.02629, 144.88917 -36.91..."
3,Redesdale Mia Mia Primary School,"POLYGON ((144.55801 -36.89947, 144.51209 -36.9..."
4,Axedale Primary School,"POLYGON ((144.71365 -36.74947, 144.66726 -36.7..."
...,...,...
1582,Matthew Flinders Girls Secondary College,"POLYGON ((144.4229 -38.08573, 144.41875 -38.08..."
1583,Canterbury Girls Secondary College,"POLYGON ((145.03764 -37.80765, 145.09942 -37.8..."
1584,Mentone Girls Secondary College,"POLYGON ((145.07547 -38.00074, 145.0713 -37.99..."
1585,Melbourne Girls College,"POLYGON ((145.02534 -37.79516, 145.04552 -37.8..."


In [40]:
gdf_houses_with_zones = gpd.sjoin(gpd.GeoDataFrame(df), gdf_school, how="inner", predicate="within")

In [43]:
# 1127421
in_zone_set = set(gdf_houses_with_zones.reset_index()['index'].values)

In [47]:
df.iloc[1].name

1

In [48]:
# gdf_houses_with_zones.reset_index()
df["inschool"] = df.reset_index().apply(lambda row: row.name in in_zone_set, axis=1)

In [52]:
df["inschool"].sum() / df.shape[0]

np.float64(0.9896411312389274)

In [None]:
df

# PTV

In [29]:
paths = ["../data/raw/PTV/1 - Regional Train/stops_1_sa2.csv",
         "../data/raw/PTV/2 - Metropolitan Train/stops_2_sa2.csv",
         "../data/raw/PTV/3 - Metropolitan Tram/stops_3_sa2.csv", 
         "../data/raw/PTV/4 - Metropolitan Bus/stops_4_sa2.csv",
         "../data/raw/PTV/5 - Regional Coach/stops_5_sa2.csv",
         "../data/raw/PTV/6 - Regional Bus/stops_6_sa2.csv"]
total_ptv_coords = []
for path in paths:
    ptv = pd.read_csv(path)
    total_ptv_coords = total_ptv_coords + [ptv[['stop_lon','stop_lat']].values]
    # total_ptv_coords = total_ptv_coords + ptv[['stop_lon','stop_lat']].values\
total_ptv_coords = np.concatenate(total_ptv_coords)

In [30]:
ptv_df = pd.read_csv(paths[0])
ptv_df["geometry"] = ptv_df.apply(lambda row: Point(row['stop_lon'], row['stop_lat']), axis=1)
ptv_gdf = gpd.GeoDataFrame(ptv_df, geometry='geometry')

In [31]:
num_to_find = 3
tree = cKDTree(total_ptv_coords)
property_coords = property_df[['lng', 'lat']].values
distances, indices = tree.query(property_coords, k=3)
property_df["ptv_index"] = list(indices)

In [32]:
def get_time_proximity(coordinates):
    url = "http://localhost:8080/ors/v2/directions/foot-walking"
    body = {"coordinates": coordinates}
    # print(body)
    response = requests.post(url, json=body)
    try:
        if response.status_code == 200:
            summary = response.json()["routes"][0]["summary"]
            if "duration" in summary:
                return summary["duration"]
            return 0
        else:
            if(response.json()["error"]["code"] == 2004):
                # distance is too long (> 100000.0 m)
                return -1
            if(response.json()["error"]["code"] == 2010):
                # "error":{"code":2010,"message":"Could not find routable point within a radius of 400.0 meters of specified coordinate
                return -1
            print(body)
            print(response.text)
            raise Exception("Error in request")
    except Exception as e:
        print(response.json())
        raise e

# # Example usage
# directions = get_time_proximity([[144.96332, -37.8140], [144.96332, -37.8120]])
# print(directions)

In [49]:
def get_time_proximity_from_property(row, ptv_stop_coords,tree):
    min_time = -1
    # if(num_to_find == 1):
    #     coordinates = [[row['lng'], row['lat']], ptv_stop_coords[indices].tolist()]
    #     return get_time_proximity(coordinates)
    for index in row['ptv_index']:
        coordinates = [[row['lng'], row['lat']], ptv_stop_coords[index].tolist()]
        time = get_time_proximity(coordinates)
        # print(time)
        if min_time == -1:
            min_time = time
        min_time = min(time,min_time)
    # coordinates = [[row['lng'], row['lat']], ptv_stop_coords[IndexError].tolist()]
    # return get_time_proximity(coordinates)
    # print(min_time)
    
    return min_time

In [58]:
i=5

path = f"../data/raw/oldlistings_rent_{i}.json"
property_df = pd.read_json(open(path, "r"))

# get index of the closest ptv stop
property_coords = property_df[['lng', 'lat']].values
distances, indices = tree.query(property_coords, k=3)
property_df["ptv_index"] = list(indices)

# dist time matching
property_df["time_to_ptv_1"] = property_df.progress_apply(lambda row: get_time_proximity_from_property(row, total_ptv_coords, tree), axis=1)
property_df.to_csv(f"../data/raw/property/rent_{i}_ptv.csv", index=False)

100%|██████████| 240134/240134 [33:56<00:00, 117.93it/s]


In [51]:
property_df.to_csv("../data/raw/property/rent_2_ptv.csv", index=False)

In [59]:
property_df

Unnamed: 0,lat,lng,rented_prices,address,meta_data,ptv_index,time_to_ptv_1
0,-38.179179,144.714373,"[{'date': 'May 2023', 'price': '$450 per week'...","15 WARD STREET, ST LEONARDS","[{'label': 'bed', 'description': 'Bed', 'quant...","[21887, 21886, 27953]",38.4
1,-38.182560,144.711730,"[{'date': 'April 2023', 'price': '$420 per wee...","4B LEONARD STREET, ST LEONARDS","[{'label': 'bed', 'description': 'Bed', 'quant...","[26835, 21819, 21816]",218.4
2,-38.166400,144.713570,"[{'date': 'April 2023', 'price': '$400 per wee...","15 SYLVESTER AVENUE, ST LEONARDS","[{'label': 'bed', 'description': 'Bed', 'quant...","[22913, 22906, 22912]",271.3
3,-33.822830,151.195465,"[{'date': 'April 2023', 'price': '$620 per wee...","1403/2-4 ATCHISON STREET, ST LEONARDS","[{'label': 'bed', 'description': 'Bed', 'quant...","[21134, 21133, 21132]",-1.0
4,-38.175384,144.701813,"[{'date': 'April 2023', 'price': '$440 per wee...","12 FLINDERS ROAD, ST LEONARDS","[{'label': 'bed', 'description': 'Bed', 'quant...","[22907, 22908, 21821]",383.3
...,...,...,...,...,...,...,...
240129,-37.905793,144.771489,"[{'date': 'September 2019', 'price': '$380 Wee...","120 BONDI PARADE, POINT COOK","[{'label': 'bed', 'description': 'Bed', 'quant...","[15568, 15567, 15569]",1257.2
240130,-37.897700,144.742945,"[{'date': 'September 2019', 'price': '$350'}, ...","2/4 CADIZ WATERS, POINT COOK","[{'label': 'bed', 'description': 'Bed', 'quant...","[8953, 8949, 8948]",121.8
240131,-37.907750,144.735372,"[{'date': 'September 2019', 'price': '$520'}, ...","2 WHITECAPS AVENUE, POINT COOK","[{'label': 'bed', 'description': 'Bed', 'quant...","[14567, 14568, 14569]",310.0
240132,-37.883631,144.732710,"[{'date': 'September 2019', 'price': '$370 per...","96 BOARDWALK BOULEVARD, POINT COOK","[{'label': 'bed', 'description': 'Bed', 'quant...","[13153, 13152, 13672]",77.1


In [None]:
property_df

In [44]:
str([[144.96332, -37.8140], [144.96332, -37.8120]])

'[[144.96332, -37.814], [144.96332, -37.812]]'