In [16]:
import pandas as pd
import numpy as np
import openrouteservice as ors
import folium
import time

In [65]:
properties = pd.read_csv("../data/curated/distance_to_property.csv")

In [3]:
# We need to process each places below one at a time
property_position = properties[['prop_lat', 'prop_long']]
school_position = properties[['school_lat', 'school_long']]
hospital_position = properties[['hospital_lat', 'hospital_long']]
station_position = properties[['station_lat', 'station_long']]
facility_position = properties[['facility_lat', 'facility_long']]

In [4]:
client = ors.Client(key='')

### IMPORTANT NOTE

The sections splitted by markdown cells below utilises openrouteservice API and openstreetmap to calculate route distance\
Due to the restriction of API service, one key can only request 10000 records daily\
Since we have around 9100 records in our dataframe, and there are 6 attribute deriving from API (see cells below)\
Therefore can only execute one section per day

#### Find Distance to Nearest School (API)

In [21]:
propterty_school_coord = pd.merge(property_position.reset_index(), school_position.reset_index())

In [26]:
# We need to format the list correctly to use API
# There is an outer list, containing each [property, school] pairs of [longitude, latitude] coordinates 
propterty_school_coord_inv = []
for i in range(len(propterty_school_coord)):
    cur_position = []
    cur_position.append([list(propterty_school_coord[['prop_long']].iloc[i])[0],
                         list(propterty_school_coord[['prop_lat']].iloc[i])[0]])
    cur_position.append([list(propterty_school_coord[['school_long']].iloc[i])[0], 
                         list(propterty_school_coord[['school_lat']].iloc[i])[0]])
    propterty_school_coord_inv.append(cur_position)

In [36]:
# This section of code takes about 7 hours to execute
routes = []
for each in propterty_school_coord_inv:
    time.sleep(2) # openrouteservice API allows only 40 requests per minute
    try:
        route = client.directions(coordinates=each)
        routes.append(route['routes'][0]['summary']['distance'])    # extract distance from route in form of Json
    except:
        routes.append(np.nan)
 

In [100]:
# add derived attribute to original dataframe
properties['school_route'] = routes

In [101]:
# if unable to find route distance use API, we try to estimate this
# First calculate the percentage increase of distance when converting from straight distance to route distance
avg_straight_distance =  properties.dropna(subset = ['school_route'])[['distance_school']].mean()*1000 #this is in km
avg_route_distance = properties.dropna(subset = ['school_route'])[['school_route']].mean()
increase_rate = float(avg_route_distance) / float(avg_straight_distance)

In [102]:
# Fill null route distance by multiplying straight distance by increase rate just calculated
properties['school_route'] = properties['school_route'].fillna(properties['distance_school'] * increase_rate * 1000)

In [105]:
properties.to_csv('../data/curated/api_distance_property.csv', index = False)

In [None]:
propterty_hospital_coord = pd.merge(property_position.reset_index(), hospital_position.reset_index())
propterty_hospital_coord_inv = []
for i in range(len(propterty_hospital_coord)):
    cur_position = []
    cur_position.append([list(propterty_hospital_coord[['prop_long']].iloc[i])[0],
                         list(propterty_hospital_coord[['prop_lat']].iloc[i])[0]])
    cur_position.append([list(propterty_hospital_coord[['school_long']].iloc[i])[0], 
                         list(propterty_hospital_coord[['school_lat']].iloc[i])[0]])
    propterty_hospital_coord_inv.append(cur_position)

In [None]:
hospital_routes = []
for each in propterty_hospital_coord_inv:
    time.sleep(2) 
    try:
        route = client.directions(coordinates=each)
        hospital_routes.append(route['routes'][0]['summary']['distance'])  
    except:
        hospital_routes.append(np.nan)