In [None]:
# from the dataset with embeded features
# we now want to get the direction information, namely the proximity and duration 
# to the points of interest from the property address

In [None]:
from tqdm import tqdm
import requests
import pandas as pd
import os

In [None]:
files_dire = '../data/curated/'
df = pd.read_csv(f'{files_dire}/listing_with_features.csv').iloc[: , 1:]
df.head()

In [None]:
# the location of melbourne central
mel_lat = -37.810246
mel_lon = 144.962768

In [None]:
# use Google Map API
API_KEY = 'YOUR_API_KEY'
payload = {}
headers = {}
values = ['distance', 'duration']

In [None]:
# the features that are going to be used by the API
features = ['address', 'pri_lat', 'pri_lon', 'sec_lat', 'sec_lon', 'ed_lat', 'ed_lon', 'train_lat', 'train_lon']
df = df[features]
n_rows = len(df)
print(n_rows)

In [None]:
# as the dataset is huge and requires hours of runtime
# each run session is cashed locally so they are not requested multiple times
if os.path.exists('../data/raw/api.csv'):
    df_done = pd.read_csv('../data/raw/api.csv').iloc[:,1:]
    requested_addresses = df_done['address'].tolist()
    # only retain the addresses that have not been requested through Google API
    df = df[~df['address'].isin(requested_addresses)]

In [None]:
results = []

In [None]:
for row_no in tqdm(range(n_rows)):
    try:
        i = df.iloc[row_no]
        # make the address acceptable by the API
        req_add = i['address'].replace(" ", "+").replace(",", "%2C") +'%2C+Victoria'
        url = f'https://maps.googleapis.com/maps/api/distancematrix/json?origins={req_add}&destinations={i[1]}%2C{i[2]}%7C{i[3]}%2C{i[4]}%7C{i[5]}%2C{i[6]}%7C{i[7]}%2C{i[8]}%7C{mel_lat}%2C{mel_lon}&key={API_KEY}'
        response = requests.request("GET", url, headers=headers, data=payload)
        x = response.json()
        result = [i['address']]
        for i in range(5):
            for value in values:
                result.append(x['rows'][0]['elements'][i][value]['value'])
        # store the result to a list
        results.append(result)
    except:
        pass

In [None]:
# save the result to a CSV for local caches and feature combinations
column_names = ['address', 'primary_distance', 'primary_duration', 'secondary_distance', 'secondary_duration', 'ed_distance', 'ed_duration', 'train_distance','train_duration', 'melb_distance', 'melb_duration']
if os.path.exists('../data/raw/api.csv'):
    df_done = df_done.append(pd.DataFrame(results, columns=column_names))
else:
    df_done = pd.DataFrame(results, columns=column_names)

In [None]:
df_done.to_csv('../data/raw/api.csv')

### Merge with the listing information with features engineered

In [None]:
df3 = pd.read_csv('../data/curated/listing_with_features.csv').iloc[:,1:]

In [None]:
df3.head()

In [None]:
df3 = pd.merge(df3, df_done, on='address')

In [None]:
df3.to_csv('../data/curated/full_listing.csv')