### Importing Libraries and Functions

In [1]:
import sys
sys.path.append("../")
import overpy
from scripts.amenities_functions import fetch_amenities
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

### Accessing OverPass API

In [2]:
# Initialize the Overpass API
overpass_api = overpy.Overpass()


### Reading in the Datasets (I'm not using these datasets yet)

- I only have oldlisting data
- I used the finalised preprocessed one

In [4]:
# Domain datasets
#gm_domain_df = pd.read_csv("../data/raw/all_domain_properties.parquet")
#rv_domain_df = pd.read_csv("../data/raw/all_domain_properties.parquet")

# oldlistings datasets
gm_oldlisting_df = pd.read_csv("../data/raw/oldlisting/gm_oldlisting_final.csv")
rv_oldlisting_df = pd.read_csv("../data/raw/oldlisting/rv_oldlisting_final.csv")

In [5]:
# SA2 Shapefile
sa2_gdf = gpd.read_file("../data/SA2/extracted_SA2/SA2_2021_AUST_GDA2020.shp")

### Fetching the Amenities

* I made a dictionary to separate the datasets between amenities
* Paste this query into ChatGPT and ask it to use the same format to give you a query for whatever amenities you want.
* If you do, just double check what it gives, sometimes it'll add more then what you want
    - e.g It game me pharmacies when I asked for healthcare, so I removed it and kept only clinics and hospitals

In [6]:
# Dictionary storing the queries for different amenities (nodes and ways)
queries = {
    "education": ["""
        node["amenity"="kindergarten"](area.searchArea);
        node["amenity"="school"](area.searchArea);
    """, 
    """
        way["amenity"="kindergarten"](area.searchArea);
        way["amenity"="school"](area.searchArea);
    """],
    "parks_and_gardens": ["""
        node["leisure"="park"](area.searchArea);
        node["leisure"="garden"](area.searchArea);
    """,
    """
        way["leisure"="park"](area.searchArea);
        way["leisure"="garden"](area.searchArea);
    """],
    "train_station": ["""
        node["railway"="station"](area.searchArea);
    """, 
    """
        way["railway"="station"](area.searchArea);
    """],
    "shopping": ["""
        node["shop"="supermarket"](area.searchArea);
        node["shop"="mall"](area.searchArea);
    """, 
    """
        way["shop"="supermarket"](area.searchArea);
        way["shop"="mall"](area.searchArea);
    """],
    "healthcare": ["""
        node["amenity"="hospital"](area.searchArea);
        node["amenity"="clinic"](area.searchArea);
    """,
    """
        way["amenity"="hospital"](area.searchArea);
        way["amenity"="clinic"](area.searchArea);
    """]
}


In [20]:
# Dictionary to store DataFrames for each amenity
amenities_dfs = {}

# Iterate through the queries and fetch data for each
for amenity_type, query in queries.items():
    try:
        df = fetch_amenities(overpass_api, query[0], query[1])
        amenities_dfs[amenity_type] = df
        print(f"Successfully fetched data for {amenity_type}")
    except Exception as e:
        print(f"Error fetching data for {amenity_type}: {e}")

Successfully fetched data for education
Successfully fetched data for parks_and_gardens
Successfully fetched data for train_station
Successfully fetched data for shopping
Successfully fetched data for healthcare


### Finding Which SA2 it belongs to

In [17]:
# Added this function now
# Will eventually move it to a script if it works

import geopandas as gpd
import pandas as pd

def map_amenities_to_sa2(df_amenities, sa2_gdf):
    '''
    Maps amenities to SA2 regions and returns a Pandas DataFrame with the SA2 name appended.
    '''
    
    # Create a GeoDataFrame for amenities
    gdf_amenities = gpd.GeoDataFrame(df_amenities, 
                                     geometry=gpd.points_from_xy(df_amenities.lon, df_amenities.lat),
                                     crs="EPSG:4326")
    
    # Perform a spatial join to find which SA2 region each amenity belongs to
    gdf_amenities_with_sa2 = gpd.sjoin(gdf_amenities, sa2_gdf[['SA2_NAME21', 'geometry']], how="left", op="within")
    
    # Convert the GeoDataFrame back to a Pandas DataFrame and keep only the necessary columns
    df_amenities_with_sa2 = pd.DataFrame(gdf_amenities_with_sa2.drop(columns=['geometry']))

    # Return only the original columns and the SA2 name
    return df_amenities_with_sa2[['id', 'name', 'amenity', 'lat', 'lon', 'SA2_NAME21']]



In [21]:
# Iterates through each amenity and adds SA2 region

for amenity_type, amenity_df in amenities_dfs.items():
    try:
        df = map_amenities_to_sa2(amenity_df, sa2_gdf)
        amenities_dfs[amenity_type] = df
        print(f"Successfully fetched SA2 regions for {amenity_type}")
    except Exception as e:
        print(f"Error fetching SA2 region for {amenity_type}: {e}")

  exec(code_obj, self.user_global_ns, self.user_ns)
Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: EPSG:4326
Right CRS: EPSG:7844

  gdf_amenities_with_sa2 = gpd.sjoin(gdf_amenities, sa2_gdf[['SA2_NAME21', 'geometry']], how="left", op="within")


Successfully fetched SA2 regions for education
Successfully fetched SA2 regions for parks_and_gardens


  exec(code_obj, self.user_global_ns, self.user_ns)
Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: EPSG:4326
Right CRS: EPSG:7844

  gdf_amenities_with_sa2 = gpd.sjoin(gdf_amenities, sa2_gdf[['SA2_NAME21', 'geometry']], how="left", op="within")
  exec(code_obj, self.user_global_ns, self.user_ns)
Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: EPSG:4326
Right CRS: EPSG:7844

  gdf_amenities_with_sa2 = gpd.sjoin(gdf_amenities, sa2_gdf[['SA2_NAME21', 'geometry']], how="left", op="within")


Successfully fetched SA2 regions for train_station
Successfully fetched SA2 regions for shopping


  exec(code_obj, self.user_global_ns, self.user_ns)
Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: EPSG:4326
Right CRS: EPSG:7844

  gdf_amenities_with_sa2 = gpd.sjoin(gdf_amenities, sa2_gdf[['SA2_NAME21', 'geometry']], how="left", op="within")
  exec(code_obj, self.user_global_ns, self.user_ns)
Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: EPSG:4326
Right CRS: EPSG:7844

  gdf_amenities_with_sa2 = gpd.sjoin(gdf_amenities, sa2_gdf[['SA2_NAME21', 'geometry']], how="left", op="within")


Successfully fetched SA2 regions for healthcare


In [23]:
# Test to see if SA2 was added
amenities_dfs['education']

Unnamed: 0,id,name,amenity,lat,lon,SA2_NAME21
0,148544339,Syndal Pre-School,kindergarten,-37.8741972,145.1487815,Glen Waverley - West
1,191834621,Tally Ho Preschool,kindergarten,-37.8691111,145.1642861,Glen Waverley - West
2,207718805,St Johns Pre-School,kindergarten,-37.8979989,145.1137432,Oakleigh - Huntingdale
3,246969693,Waverley Foothills Preschool,kindergarten,-37.9316411,145.2000972,Mulgrave
4,247169615,Brunswick Crèche & Day Nursery,kindergarten,-37.7722530,144.9662900,Brunswick - South
...,...,...,...,...,...,...
3475,1315871094,Shine Bright St Margaret's Kindergarten,kindergarten,-34.19080758,142.1574592,Mildura - North
3476,1316142653,St Albans East Preschool,kindergarten,-37.74306644285714285714285714,144.8165961571428571428571429,St Albans - North
3477,1318009340,Country Bunch Early Learning,kindergarten,-34.23471761666666666666666667,142.18055925,Irymple
3478,1318025539,Montessori Beginnings,kindergarten,-34.22681335,142.1609060,Irymple
