In [2]:
import pandas as pd
import numpy as np
import openrouteservice as ors
import folium
import time
from functools import reduce

In [3]:
# Please use collaborative key in openrouteservice
client = ors.Client(key='{YOUR_API_KEY}')

In [None]:
properties = pd.read_csv("../../data/curated/distance_to_property.csv").reset_index()

### IMPORTANT NOTE

The sections splitted by markdown cells below utilises openrouteservice API and openstreetmap to calculate route distance\
Due to the restriction of API service, one key can only request 10000 records daily\
Since we have around 9100 records in our dataframe, and there are 6 attribute deriving from API (see cells below)\
Therefore can only execute one section per day

#### Find Distance to Nearest School (API)

In [21]:
property_position = properties[['prop_lat', 'prop_long']]
school_position = properties[['school_lat', 'school_long']]


In [26]:
# We need to format the list correctly to use API
# There is an outer list, containing each [property, school] pairs of [longitude, latitude] coordinates 
propterty_school_coord = pd.merge(property_position.reset_index(), school_position.reset_index())
propterty_school_coord_inv = []
for i in range(len(propterty_school_coord)):
    cur_position = []
    cur_position.append([list(propterty_school_coord[['prop_long']].iloc[i])[0],
                         list(propterty_school_coord[['prop_lat']].iloc[i])[0]])
    cur_position.append([list(propterty_school_coord[['school_long']].iloc[i])[0], 
                         list(propterty_school_coord[['school_lat']].iloc[i])[0]])
    propterty_school_coord_inv.append(cur_position)

In [36]:
# This section of code takes about 7 hours to execute
school_routes = []
for each in propterty_school_coord_inv:
    time.sleep(1.5) # openrouteservice API allows only 40 requests per minute
    try:
        route = client.directions(coordinates=each)
        school_routes.append(route['routes'][0]['summary']['distance'])    # extract distance from route in form of Json
    except:
        school_routes.append(np.nan)
 

In [100]:
# add derived attribute to original dataframe
properties['school_route'] = school_routes

In [101]:
# if unable to find route distance use API, we try to estimate this
# First calculate the percentage increase of distance when converting from straight distance to route distance
avg_straight_distance =  properties.dropna(subset = ['school_route'])[['distance_school']]
avg_route_distance = properties.dropna(subset = ['school_route'])[['school_route']]
increase_rate = (avg_route_distance / avg_straight_distance).mean()

In [102]:
# Fill null route distance by multiplying straight distance by increase rate just calculated
properties['school_route'] = properties['school_route'].fillna(properties['distance_school'] * increase_rate)

#### Find Distance to Nearest Hospital (API)

In [7]:
property_position = properties[['prop_lat', 'prop_long']]
hospital_position = properties[['hospital_lat', 'hospital_long']]

In [9]:
propterty_hospital_coord = pd.merge(property_position.reset_index(), hospital_position.reset_index())
propterty_hospital_coord_inv = []
for i in range(len(propterty_hospital_coord)):
    cur_position = []
    cur_position.append([list(propterty_hospital_coord[['prop_long']].iloc[i])[0],
                         list(propterty_hospital_coord[['prop_lat']].iloc[i])[0]])
    cur_position.append([list(propterty_hospital_coord[['hospital_long']].iloc[i])[0], 
                         list(propterty_hospital_coord[['hospital_lat']].iloc[i])[0]])
    propterty_hospital_coord_inv.append(cur_position)

In [13]:
hospital_routes = []
for each in propterty_hospital_coord_inv:
    time.sleep(1.5) 
    try:
        route = client.directions(coordinates=each)
        hospital_routes.append(route['routes'][0]['summary']['distance'])  
    except:
        hospital_routes.append(np.nan)
        



In [14]:
# add derived attribute to original dataframe
properties['hospital_route'] =  hospital_routes

In [29]:
# if unable to find route distance use API, we try to estimate this
# First calculate the percentage increase of distance when converting from straight distance to route distance
straight_distance =  properties.dropna(subset = ['hospital_route'])['distance_hospital']
route_distance = properties.dropna(subset = ['hospital_route'])['hospital_route']
increase_rate = (route_distance / straight_distance).mean()

In [31]:
# Fill null route distance by multiplying straight distance by increase rate just calculated
properties['hospital_route'] = properties['hospital_route'].fillna(properties['distance_hospital'] * increase_rate)

#### Find Distance to Nearest Train Station (API)

In [5]:
property_position = properties[['prop_lat', 'prop_long']]
station_position = properties[['station_lat', 'station_long']]

In [7]:
propterty_station_coord = pd.merge(property_position.reset_index(), station_position.reset_index())
propterty_station_coord_inv = []
for i in range(len(propterty_station_coord)):
    cur_position = []
    cur_position.append([list(propterty_station_coord[['prop_long']].iloc[i])[0],
                         list(propterty_station_coord[['prop_lat']].iloc[i])[0]])
    cur_position.append([list(propterty_station_coord[['station_long']].iloc[i])[0], 
                         list(propterty_station_coord[['station_lat']].iloc[i])[0]])
    propterty_station_coord_inv.append(cur_position)

In [11]:
station_routes = []
for each in propterty_station_coord_inv:
    time.sleep(1.5) 
    try:
        route = client.directions(coordinates=each)
        station_routes.append(route['routes'][0]['summary']['distance'])  
    except:
        station_routes.append(np.nan)



In [12]:
# add derived attribute to original dataframe
properties['station_route'] =  station_routes

In [13]:
# if unable to find route distance use API, we try to estimate this
# First calculate the percentage increase of distance when converting from straight distance to route distance
straight_distance =  properties.dropna(subset = ['station_route'])['distance_station']
route_distance = properties.dropna(subset = ['station_route'])['station_route']
increase_rate = (route_distance / straight_distance).mean()

In [15]:
# Fill null route distance by multiplying straight distance by increase rate just calculated
properties['station_route'] = properties['station_route'].fillna(properties['distance_station'] * increase_rate)

#### Find Distance to Nearest Entertainment Facility(API)

In [3]:
property_position = properties[['prop_lat', 'prop_long']]
facility_position = properties[['facility_lat', 'facility_long']]

In [4]:
propterty_facility_coord = pd.merge(property_position.reset_index(), facility_position.reset_index())
propterty_facility_coord_inv = []
for i in range(len(propterty_facility_coord)):
    cur_position = []
    cur_position.append([list(propterty_facility_coord[['prop_long']].iloc[i])[0],
                         list(propterty_facility_coord[['prop_lat']].iloc[i])[0]])
    cur_position.append([list(propterty_facility_coord[['facility_long']].iloc[i])[0], 
                         list(propterty_facility_coord[['facility_lat']].iloc[i])[0]])
    propterty_facility_coord_inv.append(cur_position)

In [9]:
facility_routes = []
for each in propterty_facility_coord_inv:
    try:
        route = client.directions(coordinates=each)
        facility_routes.append(route['routes'][0]['summary']['distance'])  
        time.sleep(2) 
    except:
        facility_routes.append(np.nan)
        time.sleep(2) 



In [10]:
# add derived attribute to original dataframe
properties['facility_route'] =  facility_routes

In [11]:
# if unable to find route distance use API, we try to estimate this
# First calculate the percentage increase of distance when converting from straight distance to route distance
straight_distance =  properties.dropna(subset = ['facility_route'])['distance_facility']
route_distance = properties.dropna(subset = ['facility_route'])['facility_route']
increase_rate = (route_distance / straight_distance).mean()

In [13]:
# Fill null route distance by multiplying straight distance by increase rate just calculated
properties['facility_route'] = properties['facility_route'].fillna(properties['distance_facility'] * increase_rate)

#### Find Distance to Nearest Shopping Centre(API)

In [4]:
property_position = properties[['prop_lat', 'prop_long']]
shopping_position = properties[['shopping_lat', 'shopping_long']]

In [5]:
propterty_shopping_coord = pd.merge(property_position.reset_index(), shopping_position.reset_index())
propterty_shopping_coord_inv = []
for i in range(len(propterty_shopping_coord)):
    cur_position = []
    cur_position.append([list(propterty_shopping_coord[['prop_long']].iloc[i])[0],
                         list(propterty_shopping_coord[['prop_lat']].iloc[i])[0]])
    cur_position.append([list(propterty_shopping_coord[['shopping_long']].iloc[i])[0], 
                         list(propterty_shopping_coord[['shopping_lat']].iloc[i])[0]])
    propterty_shopping_coord_inv.append(cur_position)

In [7]:
shopping_routes = []
for each in propterty_shopping_coord_inv:
    time.sleep(1.5) 
    try:
        route = client.directions(coordinates=each)
        shopping_routes.append(route['routes'][0]['summary']['distance'])  
    except:
        shopping_routes.append(np.nan)




In [8]:
# add derived attribute to original dataframe
properties['shopping_route'] =  shopping_routes

In [9]:
# if unable to find route distance use API, we try to estimate this
# First calculate the percentage increase of distance when converting from straight distance to route distance
straight_distance =  properties.dropna(subset = ['shopping_route'])['distance_shopping']
route_distance = properties.dropna(subset = ['shopping_route'])['shopping_route']
increase_rate = (route_distance / straight_distance).mean()

In [10]:
# Fill null route distance by multiplying straight distance by increase rate just calculated
properties['shopping_route'] = properties['shopping_route'].fillna(properties['distance_shopping'] * increase_rate)

#### Find Distance to Melbourne CBD(API)

In [8]:
property_position = properties[['prop_lat', 'prop_long']]
cbd_position = properties[['cbd_lat', 'cbd_long']]

In [9]:
propterty_cbd_coord = pd.merge(property_position.reset_index(), cbd_position.reset_index())
propterty_cbd_coord_inv = []
for i in range(len(propterty_cbd_coord)):
    cur_position = []
    cur_position.append([list(propterty_cbd_coord[['prop_long']].iloc[i])[0],
                         list(propterty_cbd_coord[['prop_lat']].iloc[i])[0]])
    cur_position.append([list(propterty_cbd_coord[['cbd_long']].iloc[i])[0], 
                         list(propterty_cbd_coord[['cbd_lat']].iloc[i])[0]])
    propterty_cbd_coord_inv.append(cur_position)

In [11]:
cbd_routes = []
for each in propterty_cbd_coord_inv:
    time.sleep(1.5) 
    try:
        route = client.directions(coordinates=each)
        cbd_routes.append(route['routes'][0]['summary']['distance'])  
    except:
        cbd_routes.append(np.nan)



In [12]:
# add derived attribute to original dataframe
properties['cbd_route'] =  cbd_routes

In [13]:
# if unable to find route distance use API, we try to estimate this
# First calculate the percentage increase of distance when converting from straight distance to route distance
straight_distance =  properties.dropna(subset = ['cbd_route'])['distance_CBD']
route_distance = properties.dropna(subset = ['cbd_route'])['cbd_route']
increase_rate = (route_distance / straight_distance).mean()

In [14]:
# Fill null route distance by multiplying straight distance by increase rate just calculated
properties['cbd_route'] = properties['cbd_route'].fillna(properties['distance_CBD'] * increase_rate)

### Extract Useful columns

In [15]:
properties = properties[['numSchool_3km', 'numHospital_1km', 'numStation_1km','numShopping_3km','numFacility_3km',
                        'cbd_route','facility_route','hospital_route','shopping_route','station_route','school_route']]

In [45]:
properties.to_csv('../../data/curated/api_distance_to_property.csv', index=False)