### ``Important:`` Only reproduce (Run All) this when the API's have been restored with at least 5000 total calls
### Use ``Open Route Service API`` to retrieve distance matrix (Iterative version)
#### Quota 500 per client per day, approx 1 year of merged rental-place dataset per client. 

In [1]:
import pandas as pd
import sys
import os
# caution: path[0] is reserved for script path (or '' in REPL)
sys.path.insert(1, '../../scripts/')
import openrouteservice as ors
from add_distance import add_distance_time

### The whole Place dataset from 2013 to 2021

In [2]:
if not os.path.exists('../../data/featured'):
    os.makedirs('../../data/featured')
PLACE_COLS = ['Place_Names', 'latitude', 'longitude', 'year', 'place_type', 'SA2_CODE']
place_df = pd.read_csv('../../data/curated/features_of_interst/place_all_with_sa2.csv')[PLACE_COLS]
print(place_df.shape)
place_df.head(10)

(70444, 6)


Unnamed: 0,Place_Names,latitude,longitude,year,place_type,SA2_CODE
0,TAHARA BRIDGE RESERVE PICNIC AREA,-37.66568,141.66459,2013,park,217011420
1,CULGOA RECREATION RESERVE,-35.71682,143.10101,2013,park,215031400
2,CUMBERLAND,-38.57263,143.94681,2013,park,203031049
3,WAREENA PARK,-36.35911,146.30977,2013,park,204021066
4,ENTERPRIZE PARK,-37.82,144.95973,2013,park,206041505
5,RYANS RESERVE,-37.66807,145.15741,2013,park,209031215
6,FERNTREE GULLY PICNIC GROUND,-37.89088,145.31692,2013,park,211051282
7,LORRAINE KOVACS RESERVE,-37.97763,145.25403,2013,park,212021454
8,RAY BASTIN RESERVE,-38.03359,145.30791,2013,park,212021456
9,NEVILLE HAMILTON RESERVE,-38.02612,145.34533,2013,park,212021293


In [3]:
# columns of interest in property dataset
PROPERTY_COLS = ['address', 'latitude', 'longitude', 'nbed', 'nbath', 'ncar', 'weekly_rent', 'type', 'postcode', 'year', 'month', 'residence_type', 'SA2_CODE']

In [4]:
# Test the quota status of this API key
#locations = [(144.204874, -37.088043), (144.962379, -37.810454)]
#sources = [0]
#destinations = [1]

# add back 2013, 2014 for reproduction
YEARS = [2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021] # 2022 do separately
client_index = 0 # check this back to 0
for year in YEARS:
    # update client settings
    client1 = ors.Client('5b3ce3597851110001cf6248b55ae99fc05b4e5d80e222bbbe04657d') # Philly's api key 500
    client2 = ors.Client('5b3ce3597851110001cf6248e33707f5c3914f39b4671bac87646ac7') # Phikho's api key 500
    client3 = ors.Client('5b3ce3597851110001cf6248e33390389e0b4e0292fb3dcd2dec286b') # Phikhocaz's api key 500
    client4 = ors.Client('5b3ce3597851110001cf62488fd6bd483e964233809653e71984440a') # jonghop api key 500
    client5 = ors.Client('5b3ce3597851110001cf62482d289033372149f7b6419d839d18a3dc') # gracelovesyah api key 500 
    client6 = ors.Client('5b3ce3597851110001cf62488fb71274f7ac4b2ca2c679bedd963683') # gracelovesyah1 api key 500
    client7 = ors.Client('5b3ce3597851110001cf6248768e34108776435cabbcffaf2fb63a60') # kakakkaa api key 500
    client8 = ors.Client('5b3ce3597851110001cf624851a53f74cc8e4c6c9e751bb8aee27e05') # hyunjinp api key 500
    client9 = ors.Client('5b3ce3597851110001cf6248ce6c95ac96814219a4c3a7741f323b73') # Phikho-caz's api key 2500, back up for at least 4 full year
    client_list = [client1, client2, client3, client4, client5, client6, client7, client8, client9]
    
    place_sample = place_df[place_df['year'] == year] # place df of selected year
    place_sample = place_sample.drop_duplicates(subset=['Place_Names', 'latitude']).reset_index(drop=True)   
    property_df = pd.read_csv(f"../../data/curated/property_all_with_SA2/{year}_property_with_SA2.csv")[PROPERTY_COLS] # property df of selected year
    print(f"Year {year} place sample = {place_sample.shape}")
    print(f"Client {client_index} property df = {property_df.shape}")
    # merge the place df and property df
    merged_df = property_df.merge(place_sample, how='inner', on='SA2_CODE')
    merged_df = merged_df.rename(columns={'latitude_x': 'latitude_ori', 'longitude_x': 'longitude_ori', 'latitude_y': 'latitude_des', 'longitude_y': 'longitude_des', 'year_x': 'year'})
    merged_df.drop('year_y', axis=1, inplace=True)
    print(f"merged df has shape {merged_df.shape}")
    
    client = client_list[client_index]
    added_distance_merged_df = add_distance_time(merged_df, year, client, 'saving')
    print(f"Year {year} completed, distance added dataset has shape {added_distance_merged_df.shape}")
    client_index += 1 # each client is approximately responsible for a year

Year 2013 place sample = (5928, 6)
Client 0 property df = (11466, 13)
merged df has shape (153849, 17)
Executing SA2 Code 202021027
Subset size = 210, Places count = 10, Property count = 21
To Place Distance Grand List, Normal Branch, length = 210
Executing SA2 Code 204031070
Subset size = 56, Places count = 8, Property count = 7
To Place Distance Grand List, Normal Branch, length = 266
Executing SA2 Code 211051275
Subset size = 14, Places count = 2, Property count = 7
To Place Distance Grand List, Normal Branch, length = 280
Executing SA2 Code 209031212
Subset size = 575, Places count = 25, Property count = 23
To Place Distance Grand List, Normal Branch, length = 855
Executing SA2 Code 211051276
Subset size = 440, Places count = 20, Property count = 22
To Place Distance Grand List, Normal Branch, length = 1295
Executing SA2 Code 205041094
Subset size = 986, Places count = 34, Property count = 29
To Place Distance Grand List, Normal Branch, length = 2281
Executing SA2 Code 201021012
Su