Proximity finding with locally installed ORS server and Australia map.



In [None]:
import pandas as pd
import geopandas as gpd
from shapely import wkt
from shapely.geometry import Point
from scipy.spatial import cKDTree
import numpy as np
import requests
import json
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
tqdm.pandas()  # "tqdm>=4.9.0"


ors_url = "https://api.openrouteservice.org/v2/directions/driving-car"

# Loading dataset

## property data

In [2]:
property_data_path = "../data/curated/rental-17-24.csv"
output_dir = "../data/raw/time_park.csv"
property_df = pd.read_csv(property_data_path)
coords = property_df[['lat', 'lng']].drop_duplicates()

## Parks

In [9]:
parks_path = "../data/curated/parks-and-reserves1.csv"
parks_df = pd.read_csv(parks_path)[['latitude', 'longitude']]
parks_df.rename(columns={'latitude': 'lat', 'longitude': 'lng'}, inplace=True)

# Proximity

In [11]:
num_to_find = 1
tree = cKDTree(parks_df[['lng', 'lat']].values)
property_coords = coords[['lng', 'lat']].values
distances, indices = tree.query(property_coords, k=1)
coords["park_index"] = list(indices)

In [None]:
coords["park_index"].value_counts()

In [13]:
from collections import defaultdict
err_count = defaultdict(int)
def get_time_proximity(coordinates):
    url = ors_url
    body = {"coordinates": coordinates}
    # print(body)
    response = requests.post(url, json=body)
    try:
        if response.status_code == 200:
            summary = response.json()["routes"][0]["summary"]
            if "duration" in summary:
                return summary["duration"]
            return 0
        else:
            
            if(response.json()["error"]["code"] == 2004):
                # distance is too long (> 100000.0 m)
                err_count[2004] += 1
                return -1
            if(response.json()["error"]["code"] == 2010):
                err_count[2010] += 1
                # "error":{"code":2010,"message":"Could not find routable point within a radius of 400.0 meters of specified coordinate
                return -1
            print(response.text)
            print(body)
            
            raise Exception("Error in request")
    except Exception as e:
        print(response.json())
        raise e

# # Example usage
# directions = get_time_proximity([[144.96332, -37.8140], [144.96332, -37.8120]])
# print(directions)

In [14]:
def get_time_proximity_from_property(row, cities):
    city = cities.iloc[int(row['city_index'])]
    coordinates = [[row['lng'], row['lat']], [city['lng'], city['lat']]]
    time = get_time_proximity(coordinates)
    # print(time)
    return time

In [None]:
coords.shape

In [None]:
# dist time matching
coords["time_city"] = coords.progress_apply(lambda row: get_time_proximity_from_property(row, parks_df), axis=1)
# property_df.to_csv(f"../data/raw/property/rent_{i}_ptv.csv", index=False)

In [None]:
err_count

## output intermediate result
This can be used to join on latlng directly

In [21]:
coords.to_csv(output_dir, index=False)

# join to orginal dataset

## read all

In [28]:
import sys, os
import pandas as pd
import geopandas as gpd
sys.path.append("../")
from scripts.proximity import proximity_hard_join, proximity_sjoin

In [33]:
cleaned_df = pd.read_csv(property_data_path)
city_df = pd.read_csv(city_output)

In [None]:
cleaned_df

In [None]:
proximity_sjoin(cleaned_df, city_df).shape

In [None]:
city_df['time_city'].value_counts()

In [None]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
city_coords = pd.read_csv(f"../data/raw/time_city.csv")
gdf_city_coords = gpd.GeoDataFrame(city_coords, geometry=gpd.points_from_xy(city_coords.lng, city_coords.lat))
gpd_cleaned_df = gpd.GeoDataFrame(cleaned_df, geometry=gpd.points_from_xy(cleaned_df.lng, cleaned_df.lat))

# Ensure both GeoDataFrames have the same CRS (Coordinate Reference System)
gdf_city_coords = gdf_city_coords.set_crs("EPSG:4326")
gpd_cleaned_df = gpd_cleaned_df.set_crs("EPSG:4326")

# Perform the nearest spatial join
joined_gdf = gpd.sjoin_nearest(gpd_cleaned_df,gdf_city_coords[['geometry','time_city']], how="left",rsuffix='city_coords')
joined_gdf.drop(columns=['index_city_coords'],inplace=True)


In [None]:
gpd_cleaned_df


In [None]:
cleaned_df.dtypes