In [1]:
import pandas as pd
import folium
import time
import re

In [5]:
df_all_centroid = pd.read_csv('../../data/raw/external-data/postcode_centroid.csv')

In [6]:
df_public_service = pd.read_csv("../../data/raw/external-data/public_service.csv")

In [4]:
# extract latitude and longitude from the geometry column using regular expression
loc = {"LATITUDE":[], "LONGITUDE":[]}
for i in range(0, len(df_public_service)):
    lon = float(re.findall(r"\d+\.?\d*", df_public_service['geometry'][i])[0])
    lat = float(re.findall(r"\d+\.?\d*", df_public_service['geometry'][i])[1])
    loc["LATITUDE"].append(-lat)
    loc["LONGITUDE"].append(lon)

In [5]:
# create two coloumns for latitude and longitude
df_public_service['LATITUDE'] = loc["LATITUDE"]
df_public_service['LONGITUDE'] = loc["LONGITUDE"]

In [6]:
import openrouteservice as ors
client = ors.Client(key='your_key') # Provide your personal API key

In [7]:
def nearest(df_all_property, df_facility, index):
    i = index
    
    # get the postcode find all facilities in this postcode
    postcode = df_all_property['POSTCODE'][i]
    facility = df_facility.loc[df_facility['postcode'] == postcode].to_dict()

    # get the longitude and latitude of the these facilities
    lat = list(facility["LATITUDE"].values())
    lon = list(facility["LONGITUDE"].values())
   
    # if there is no facility in this suburb, early return none
    if len(lat) == 0:
        return (postcode, 'none', 'none', 'none', 'none')
 
    # create a list of location, put the centroid of this suburb as the first element
    loc = [list(eval(df_all_property['centroid'][i]))]
        
        
    # append the loctations of the facilities in this suburb to the list of location
    for j in range(0, len(lat)):
        loc.append([lon[j], lat[j]])
        

    # get the driving distance and duration between the suburb centroid and each facility
    matrix = client.distance_matrix(
    locations = loc,
    sources = list(range(1, len(loc))),
    destinations = [0],
    metrics=['distance', 'duration'],
    )
    
    #get the shortest duration among all facilities
    nearest_distance = min(matrix['distances'])[0]
    nearest_duration = min(matrix['durations'])[0]                                                    
     
    # get the name and the longitude and latitude of that facility with shortest duration
    index = matrix['durations'].index(min(matrix['durations']))
    nearest_name = list(facility['NAME'].values())[index]
    nearest_latlon = loc[index + 1] 

    return (postcode, nearest_name, nearest_distance, nearest_duration, nearest_latlon)


In [8]:
# create a dictionary of public_service to store the information
public_service = {"postcode":[], "nearest_latlon":[], "nearest_name":[],
                 "nearest_duration":[], "nearest_distance":[]}

In [9]:
for i in range(0, len(df_all_centroid)):   
    
    # sleep 10 seconds each 20 postcode to avoid timeout error
    if isinstance(i % 20, int) and (i % 20 == 0):
        time.sleep(10) 
    
    # get the information of the nearest facility (shortest driving duration) in each postcode
    # using the nearest fuction
    result = nearest(df_all_centroid, df_public_service, i)
    
    # append the result to the dictionary
    public_service["nearest_name"].append(result[1])
    public_service["nearest_distance"].append(result[2])
    public_service["nearest_duration"].append(result[3])
    public_service["postcode"].append(result[0])
    public_service["nearest_latlon"].append(result[4])



In [10]:
# build a dataframe using the dictionary
postcode_nearest_public_service = pd.DataFrame(public_service)

In [12]:
# save the data
postcode_nearest_public_service.to_csv("../data/raw/ors-data/public_by_postcode.csv")