### ``Important:`` Only reproduce (Run All) this when the API's have been restored with at least 5000 total calls
### Use ``Open Route Service API`` to retrieve distance matrix (Iterative version)
#### Quota 500 per client per day, approx 1 year of merged rental-place dataset per client. 

In [1]:
import pandas as pd
import sys
import os
from dotenv import load_dotenv

load_dotenv()
# caution: path[0] is reserved for script path (or '' in REPL)
sys.path.insert(1, '../../scripts/')
import openrouteservice as ors
from add_distance import add_distance_time


In [2]:
ORS_CLIENT_ID = []
for i in range(1, 10):
    ORS_CLIENT_ID.append(os.getenv('CLIENT_ORS'+str(i)))
ORS_CLIENT_ID

['5b3ce3597851110001cf6248b55ae99fc05b4e5d80e222bbbe04657d',
 '5b3ce3597851110001cf6248e33707f5c3914f39b4671bac87646ac7',
 '5b3ce3597851110001cf6248e33390389e0b4e0292fb3dcd2dec286b',
 '5b3ce3597851110001cf62488fd6bd483e964233809653e71984440a',
 '5b3ce3597851110001cf62482d289033372149f7b6419d839d18a3dc',
 '5b3ce3597851110001cf62488fb71274f7ac4b2ca2c679bedd963683',
 '5b3ce3597851110001cf6248768e34108776435cabbcffaf2fb63a60',
 '5b3ce3597851110001cf624851a53f74cc8e4c6c9e751bb8aee27e05',
 '5b3ce3597851110001cf6248ce6c95ac96814219a4c3a7741f323b73']

### The whole Place dataset from 2013 to 2021

In [4]:
if not os.path.exists('../../data/featured'):
    os.makedirs('../../data/featured')
PLACE_COLS = ['Place_Names', 'latitude', 'longitude', 'year', 'place_type', 'SA2_CODE']
place_df = pd.read_csv('../../data/curated/features_of_interst/result_place_all_with_sa2.csv')[PLACE_COLS]
place_df.drop(place_df[place_df['place_type'] == 'market'].index, inplace=True)
print(place_df.shape)
place_df.head(10)

(75723, 6)


Unnamed: 0,Place_Names,latitude,longitude,year,place_type,SA2_CODE
0,GLADSTONE PARK SHOPPING CENTRE,-37.68802,144.88478,2015,shopping,210051245
1,WHITEHORSE PLAZA SHOPPING CENTRE,-37.81804,145.11973,2015,shopping,207031163
2,SOUTHLAND SHOPPING CENTRE,-37.96138,145.05445,2015,shopping,208031188
3,WANTIRNA MALL SHOPPING CENTRE,-37.84693,145.22945,2015,shopping,211011259
4,WARREN VILLAGE,-37.99332,145.09278,2015,shopping,208031193
5,MELTON REGIONAL SHOPPING CENTRE,-37.68627,144.56309,2015,shopping,213041358
6,KUMALA VILLAGE SHOPPING CENTRE,-37.84193,145.25695,2015,shopping,211011251
7,CHADSTONE SHOPPING CENTRE,-37.88647,145.08299,2015,shopping,208041195
8,CAULFIELD PARK SHOPPING CENTRE,-37.87276,145.02417,2015,shopping,208021177
9,DONCASTER SHOPPINGTOWN,-37.78415,145.12639,2015,shopping,207021157


In [5]:
# columns of interest in property dataset
PROPERTY_COLS = ['address', 'latitude', 'longitude', 'nbed', 'nbath', 'ncar', 'weekly_rent', 'postcode', 'year', 'month', 'residence_type', 'SA2_CODE']

In [6]:
# Test the quota status of this API key
#locations = [(144.204874, -37.088043), (144.962379, -37.810454)]
#sources = [0]
#destinations = [1]

# add back 2013, 2014 for reproduction
YEARS = [2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021] # 2022 do separately
client_index = 0 # check this back to 0
for year in YEARS:
    # update client settings
    client1 = ors.Client(ORS_CLIENT_ID[0]) # Philly's api key 500
    client2 = ors.Client(ORS_CLIENT_ID[1]) # Phikho's api key 500
    client3 = ors.Client(ORS_CLIENT_ID[2]) # Phikhocaz's api key 500
    client4 = ors.Client(ORS_CLIENT_ID[3]) # jonghop api key 500
    client5 = ors.Client(ORS_CLIENT_ID[4]) # gracelovesyah api key 500 
    client6 = ors.Client(ORS_CLIENT_ID[5]) # gracelovesyah1 api key 500
    client7 = ors.Client(ORS_CLIENT_ID[6]) # kakakkaa api key 500
    client8 = ors.Client(ORS_CLIENT_ID[7]) # hyunjinp api key 500
    client9 = ors.Client(ORS_CLIENT_ID[8]) # Phikho-caz's api key 2500, back up for at least 4 full year
    client_list = [client1, client2, client3, client4, client5, client6, client7, client8, client9]
    
    place_sample = place_df[place_df['year'] == year] # place df of selected year
    place_sample = place_sample.drop_duplicates(subset=['Place_Names', 'latitude']).reset_index(drop=True)   
    property_df = pd.read_csv(f"../../data/curated/property_all_with_SA2_classified/{year}_property_with_SA2_classified.csv")[PROPERTY_COLS] # property df of selected year
    print(f"Year {year} place sample = {place_sample.shape}")
    print(f"Client {client_index} property df = {property_df.shape}")
    # merge the place df and property df
    merged_df = property_df.merge(place_sample, how='inner', on='SA2_CODE')
    merged_df = merged_df.rename(columns={'latitude_x': 'latitude_ori', 'longitude_x': 'longitude_ori', 'latitude_y': 'latitude_des', 'longitude_y': 'longitude_des', 'year_x': 'year'})
    merged_df.drop('year_y', axis=1, inplace=True)
    print(f"merged df has shape {merged_df.shape}")
    
    client = client_list[client_index]
    added_distance_merged_df = add_distance_time(merged_df, year, client, 'saving')
    print(f"Year {year} completed, distance added dataset has shape {added_distance_merged_df.shape}")
    client_index += 1 # each client is approximately responsible for a year

Year 2013 place sample = (6512, 6)
Client 0 property df = (11466, 12)
merged df has shape (168757, 16)
Executing SA2 Code 211051275
Subset size = 14, Places count = 2, Property count = 7
To Place Distance Grand List, Normal Branch, length = 14
Executing SA2 Code 211051276
Subset size = 506, Places count = 23, Property count = 22
To Place Distance Grand List, Normal Branch, length = 520
Executing SA2 Code 205041094
Subset size = 1073, Places count = 37, Property count = 29
To Place Distance Grand List, Normal Branch, length = 1593
Executing SA2 Code 212041309
Subset size = 14, Places count = 2, Property count = 7
To Place Distance Grand List, Normal Branch, length = 1607
Executing SA2 Code 212051567
Subset size = 145, Places count = 5, Property count = 29
To Place Distance Grand List, Normal Branch, length = 1752
Executing SA2 Code 212041310
Subset size = 64, Places count = 8, Property count = 8
To Place Distance Grand List, Normal Branch, length = 1816
Executing SA2 Code 212051568
Subs