In [None]:
from tqdm import tqdm
import requests
import pandas as pd
import os

In [None]:
files_dire = '../data/curated/'
df = pd.read_csv(f'{files_dire}/listing_with_features.csv').iloc[: , 1:]
df.head()

## Magic Numbers

In [None]:
# the location of melbourne central
mel_lat = -37.810246
mel_lon = 144.962768

In [None]:
# use Google Map API
API_KEY = 'YOUR_API_KEY'
payload = {}
headers = {}
values = ['distance', 'duration']

In [None]:
# the features that are going to be used by the API
features = ['address', 'pri_lat', 'pri_lon', 'sec_lat', 'sec_lon', 'ed_lat', 'ed_lon', 'train_lat', 'train_lon']
df = df[features]
n_rows = len(df)
print(n_rows)

In [84]:
# as the dataset is huge and requires hours of runtime
# each run session is cashed locally so they are not requested multiple times
if os.path.exists('../data/raw/api.csv'):
    df_done = pd.read_csv('../data/raw/api.csv').iloc[:,1:]
    requested_addresses = df_done['address'].tolist()
    # only retain the addresses that have not been requested through Google API
    df = df[~df['address'].isin(requested_addresses)]

In [None]:
results = []

In [None]:
for row_no in tqdm(range(n_rows)):
    try:
        i = df.iloc[row_no]
        # make the address acceptable by the API
        req_add = i['address'].replace(" ", "+").replace(",", "%2C") +'%2C+Victoria'
        url = f'https://maps.googleapis.com/maps/api/distancematrix/json?origins={req_add}&destinations={i[1]}%2C{i[2]}%7C{i[3]}%2C{i[4]}%7C{i[5]}%2C{i[6]}%7C{i[7]}%2C{i[8]}%7C{mel_lat}%2C{mel_lon}&key={API_KEY}'
        response = requests.request("GET", url, headers=headers, data=payload)
        x = response.json()
        result = [i['address']]
        for i in range(5):
            for value in values:
                result.append(x['rows'][0]['elements'][i][value]['value'])
        # store the result to a list
        results.append(result)
    except:
        pass

In [83]:
# save the result to a CSV for local caches and feature combinations
column_names = ['address', 'primary_distance', 'primary_duration', 'secondary_distance', 'secondary_duration', 'ed_distance', 'ed_duration', 'train_distance','train_duration', 'melb_distance', 'melb_duration']
if os.path.exists('../data/raw/api.csv'):
    df_done = df_done.append(pd.DataFrame(results, columns=column_names))
else:
    df_done = pd.DataFrame(results, columns=column_names)

  df_done = df_done.append(pd.DataFrame(results, columns=column_names))


In [None]:
df_done.to_csv('../data/raw/api.csv')

### Merge with the listing information with features engineered

In [85]:
df3 = pd.read_csv('../data/curated/listing_with_features.csv').iloc[:,1:]

In [86]:
df3.head()

Unnamed: 0,bed,bath,car,type,address,suburb,postcode,url,loc_address,lat,...,proj_population_prime_working,proj_population_mature_working,proj_population_elderly,closest_ed_name,ed_lat,ed_lon,train_stop,train_n_lines,train_lat,train_lon
0,3,1,2,AUF,". GLYNDON ROAD, CAMBERWELL",Camberwell,3124,https://www.oldlistings.com.au/real-estate/VIC...,"Glyndon Road, Camberwell, Melbourne, City of B...",-37.83623,...,9588,2937,4884,Box Hill Hospital,-37.815458,145.119672,Hartwell Railway Station (Camberwell),1,-37.843985,145.07556
1,4,2,3,AUF,"1 NEVIS STREET, CAMBERWELL",Camberwell,3124,https://www.oldlistings.com.au/real-estate/VIC...,"Nevis Street, Camberwell, Melbourne, City of B...",-37.843101,...,9588,2937,4884,Box Hill Hospital,-37.815458,145.119672,Hartwell Railway Station (Camberwell),1,-37.843985,145.07556
2,2,1,1,AUF,"1/10 GLENCAIRN AVENUE, CAMBERWELL",Camberwell,3124,https://www.oldlistings.com.au/real-estate/VIC...,"Glencairn Avenue, Camberwell, Melbourne, City ...",-37.848191,...,9588,2937,4884,Box Hill Hospital,-37.815458,145.119672,Burwood Railway Station (Glen Iris),1,-37.851563,145.080511
3,3,2,2,House,"1/1017 TOORAK ROAD, CAMBERWELL",Camberwell,3124,https://www.oldlistings.com.au/real-estate/VIC...,"Toorak Road, Camberwell, Melbourne, City of Bo...",-37.850405,...,9588,2937,4884,Box Hill Hospital,-37.815458,145.119672,Burwood Railway Station (Glen Iris),1,-37.851563,145.080511
4,2,1,1,House,"1/11 EDDY STREET, CAMBERWELL",Camberwell,3124,https://www.oldlistings.com.au/real-estate/VIC...,"Eddy Street, Camberwell, Melbourne, City of Bo...",-37.846792,...,9588,2937,4884,Box Hill Hospital,-37.815458,145.119672,Hartwell Railway Station (Camberwell),1,-37.843985,145.07556


In [87]:
df3 = pd.merge(df3, df_done, on='address')

In [89]:
df3.to_csv('../data/curated/full_listing.csv')