### Importing Libraries and Functions

In [1]:
import sys
sys.path.append("../")
import overpy
from scripts.amenities_functions import fetch_amenities, get_amenity_distances
from scripts.amenities_functions import get_cities, get_dist_to_city
import pandas as pd

### Accessing Overpass API

In [2]:
# Initialize the Overpass API
overpass_api = overpy.Overpass()


### Reading in the datasets

In [3]:
# Domain datasets
gm_domain_df = pd.read_parquet("../data/curated/properties.parquet")
rv_domain_df = pd.read_parquet("../data/curated/properties.parquet")

# oldlistings datasets
gm_oldlisting_df = pd.read_csv("../data/raw/oldlisting/gm_oldlisting.csv")
rv_oldlisting_df = pd.read_csv("../data/raw/oldlisting/rv_oldlisting.csv")

### Fetching the Amenities Data

In [4]:
# Dictionary storing the queries for different amenities (nodes and ways)
queries = {
    "education": ["""
        node["amenity"="kindergarten"](area.searchArea);
        node["amenity"="school"](area.searchArea);
    """, 
    """
        way["amenity"="kindergarten"](area.searchArea);
        way["amenity"="school"](area.searchArea);
    """],
    "parks_and_gardens": ["""
        node["leisure"="park"](area.searchArea);
        node["leisure"="garden"](area.searchArea);
    """,
    """
        way["leisure"="park"](area.searchArea);
        way["leisure"="garden"](area.searchArea);
    """],
    "train_station": ["""
        node["railway"="station"](area.searchArea);
    """, 
    """
        way["railway"="station"](area.searchArea);
    """],
    "shopping": ["""
        node["shop"="supermarket"](area.searchArea);
        node["shop"="mall"](area.searchArea);
    """, 
    """
        way["shop"="supermarket"](area.searchArea);
        way["shop"="mall"](area.searchArea);
    """],
    "healthcare": ["""
        node["amenity"="hospital"](area.searchArea);
        node["amenity"="clinic"](area.searchArea);
    """,
    """
        way["amenity"="hospital"](area.searchArea);
        way["amenity"="clinic"](area.searchArea);
    """]
}


In [5]:
# Dictionary to store DataFrames for each amenity
amenities_dfs = {}

# Iterate through the queries and fetch data for each
for amenity_type, query in queries.items():
    try:
        df = fetch_amenities(overpass_api, query[0], query[1])
        amenities_dfs[amenity_type] = df
        print(f"Successfully fetched data for {amenity_type}")
    except Exception as e:
        print(f"Error fetching data for {amenity_type}: {e}")

Successfully fetched data for education
Successfully fetched data for parks_and_gardens
Successfully fetched data for train_station
Successfully fetched data for shopping
Successfully fetched data for healthcare


In [6]:
amenities_dfs

{'education':               id                                     name       amenity  \
 0      148544339                        Syndal Pre-School  kindergarten   
 1      191834621                       Tally Ho Preschool  kindergarten   
 2      207718805                      St Johns Pre-School  kindergarten   
 3      246969693             Waverley Foothills Preschool  kindergarten   
 4      247169615           Brunswick Crèche & Day Nursery  kindergarten   
 ...          ...                                      ...           ...   
 3475  1315871094  Shine Bright St Margaret's Kindergarten  kindergarten   
 3476  1316142653                 St Albans East Preschool  kindergarten   
 3477  1318009340             Country Bunch Early Learning  kindergarten   
 3478  1318025539                    Montessori Beginnings  kindergarten   
 3479  1318235998                   Happy Turtle Childcare  kindergarten   
 
                                  lat                            lon  
 0

### Fetching Major City Coordinates in Victoria

In [7]:
# Define the query to get major cities within Victoria (place = city or town)
query = """
[out:json];
area[name="Victoria"]->.searchArea;
(
  node["place"="city"](area.searchArea);
);
out body;
"""


In [8]:
# Fetches and saves the cities as a dataframe
cities_df = get_cities(overpass_api, query)
cities_df.head(10)

Unnamed: 0,name,place_type,lat,lon
0,Melbourne,city,-37.8142454,144.9631732
1,Mildura,city,-34.195274,142.1503146
2,Bendigo,city,-36.7590183,144.2826718
3,Geelong,city,-38.1493248,144.3598241
4,Ballarat,city,-37.5623013,143.8605645
5,Shepparton,city,-36.3831633,145.3988874
6,Warrnambool,city,-38.3826242,142.4814199
7,Traralgon,city,-38.1946636,146.5381646
8,Wodonga,city,-36.1205539,146.8880837


### Getting Driving Distance to Cities and Amentities

In [9]:
# Initialising ORS api_keys list
api_keys = ['5b3ce3597851110001cf624842eb8f2652b14c0589793fa6e32db6ba', # Oscar - 2500 limit
            '5b3ce3597851110001cf62484999c1f7edce4ac5a072b1c9fb50ffa2', # Nasser - 500 limit
            '5b3ce3597851110001cf6248fdd0ae85071d43598ef26e7a446a4f78', # Oscar - 500 limit
            '5b3ce3597851110001cf6248c9d76723ef574cf3a8479cd0665e80fa', # Nasser - 2500 limit
            '5b3ce3597851110001cf6248f3f1041174014fbba5afd3b9332b778b', # Uma - 2500 limit
            '5b3ce3597851110001cf6248c64a6697beff46e1903a7e15a67239ad'  # Laura - 500 limit
            '5b3ce3597851110001cf6248250927fcb46d441e9b1560ab5875952d'  # Charlie - 500 limit 
            ] 



1. Getting all the distances to the Major Cities

In [None]:
# Greater Melbourne - oldlisting data

gm_c_oldlisting_df = get_dist_to_city(gm_oldlisting_df, cities_df, api_keys)
gm_c_oldlisting_df.to_csv("../data/raw/oldlisting/gm_c_oldlisting.csv")

In [117]:
# Rest of Vic - oldlisting data

rv_c_oldlisting_df = get_dist_to_city(rv_oldlisting_df, cities_df, api_keys)
rv_c_oldlisting_df.to_csv("../data/raw/oldlisting/rv_c_oldlisting.csv")

In [None]:
# Greater Melbourne - Domain data

gm_c_domain_df = get_dist_to_city(gm_domain_df, cities_df, api_keys)
gm_c_domain_df.to_csv("../data/raw/domain/gm_c_domain.csv")

In [10]:
# Rest of Vic - Domain data

rv_c_domain_df = get_dist_to_city(rv_domain_df, cities_df, api_keys)
rv_c_domain_df.to_csv("../data/raw/domain/rv_c_domain.csv")

KeyboardInterrupt: 

2. Getting all the distances to the Amenities

In [9]:
# Greater Melbourne - oldlisting data

gm_c_a_oldlisting_df = get_amenity_distances(gm_c_oldlisting_df, amenities_dfs, api_keys)
gm_c_a_oldlisting_df.to_csv("../data/raw/oldlisting/gm_c+a_oldlisting.csv")

Processing education...
Error with batch 0.0: 403 ({'error': 'Quota exceeded'})
Quota limit exceeded for API key 5b3ce3597851110001cf62484999c1f7edce4ac5a072b1c9fb50ffa2
Using a new key... Waiting for 10 seconds before continuing.
Error with batch 360.0: HTTP Error: 502
Unhandled error occurred: HTTP Error: 502. Retrying after 10 seconds...
Processing parks_and_gardens...
Error with batch 0.0: 403 ({'error': 'Quota exceeded'})
Quota limit exceeded for API key 5b3ce3597851110001cf62484999c1f7edce4ac5a072b1c9fb50ffa2
Using a new key... Waiting for 10 seconds before continuing.
Error with batch 148.0: 403 ({'error': 'Quota exceeded'})
Quota limit exceeded for API key 5b3ce3597851110001cf6248fdd0ae85071d43598ef26e7a446a4f78
Using a new key... Waiting for 10 seconds before continuing.
Processing train_station...
Error with batch 0.0: 403 ({'error': 'Quota exceeded'})
Quota limit exceeded for API key 5b3ce3597851110001cf62484999c1f7edce4ac5a072b1c9fb50ffa2
Using a new key... Waiting for 10 s

In [119]:
# Rest of Vic - oldlisting data

rv_c_a_oldlisting_df = get_amenity_distances(rv_c_oldlisting_df, amenities_dfs, api_keys)
rv_c_a_oldlisting_df.to_csv("../data/raw/oldlisting/rv_c+a_oldlisting.csv")

Processing education...
Processing parks_and_gardens...
Processing train_station...
Processing shopping...
Error with batch 186.0: HTTP Error: 502
Unexpected error occurred: HTTP Error: 502.
Retrying after 3 seconds...
Processing healthcare...


In [None]:
# Greater Melbourne - Domain data

gm_c_a_domain_df = get_amenity_distances(gm_c_domain_df, amenities_dfs, api_keys)
gm_c_a_domain_df.to_csv("../data/raw/domain/gm_c+a_domain.csv")

In [11]:
# Rest of Vic - Domain data
rv_c_domain_df = pd.read_csv("../data/raw/domain/rv_c_domain.csv")
rv_c_a_domain_df = get_amenity_distances(rv_c_domain_df, amenities_dfs, api_keys)
rv_c_a_domain_df.to_csv("../data/raw/domain/rv_c+a_domain.csv")

Processing education...
Processing parks_and_gardens...
Processing train_station...
Processing shopping...
Processing healthcare...
