# Geoapify Melbourne Locations
## Hospitals

In [1]:
# Dependencies
import requests
import json
import pandas as pd

# Import the API key
from config import geoapify_key

In [2]:
# Set the parameters for the type of place - hospital
categories = "healthcare.hospital"

# Set the parameters for the type of search of Melbourne (incl all Municipalities)
filters = "place:51397cf9ca8e286240593190f739f8ec42c0f00101f9016cca400000000000c002089203094d656c626f75726e65"
limit = 500

# Set up a parameters dictionary
params = {
    "categories": categories,
    "limit": limit,
    "filter": filters,
    "apiKey": geoapify_key
}

# Set base URL
base_url = "https://api.geoapify.com/v2/places"

# Run a request using our params dictionary
response = requests.get(base_url, params=params)

In [3]:
# print the response url, avoid doing for public github repos in order to avoid exposing key
print(response.url)

In [4]:
# Convert response to JSON
places_data = response.json()

# Print the json (pretty printed)
print(json.dumps(places_data, indent=4, sort_keys=True))

In [5]:
# Create an empty list to store hospital data
hospital_data = []

# Check if "features" key exists in the JSON response
if "features" in places_data:
    # Loop through all the hospitals
    for feature in places_data["features"]:
        # Retrieve the desired information for each hospital
        properties = feature.get("properties", {})
        name = properties.get("name", None)
        address = properties.get("address_line2", None)
        municipality = properties.get("municipality", None)
        suburb = properties.get("suburb", None)
        postcode = properties.get("postcode", None)
        latitude = properties.get("lat", None)
        longitude = properties.get("lon", None)

        # Append the hospital data to the list
        hospital_data.append({
            "NAME": name,
            "Address": address,
            "Municipality": municipality,
            "Suburb": suburb,
            "POST_CODE": postcode,
            "Latitude":latitude,
            "Longitude":longitude
        })

    # Create a DataFrame from the hospital data
    hospital_df = pd.DataFrame(hospital_data)

else:
    print("No hospital data found.")
    
hospital_df

In [6]:
#Import mapping file - use to filter to sample postcodes for further analysis
# Specify the file path and sheet name
file_path = "Resources/Melbourne Postcodes.xlsx"
sheet_name = "Mapping"

# Read the specified sheet into a DataFrame
melb_postcodes_df = pd.read_excel(file_path, sheet_name=sheet_name)
melb_postcodes_df= melb_postcodes_df.dropna(subset=['ABS_SA2_KEY'])
melb_postcodes_df= melb_postcodes_df.dropna(subset=['HOUSE_LOCALITY'])
melb_postcodes_df= melb_postcodes_df.dropna(subset=['SCHOOL_POST_CODE'])

#convert POST_CODE to string
melb_postcodes_df['POST_CODE'] = melb_postcodes_df['POST_CODE'].astype(str)
# Display the DataFrame
melb_postcodes_df

In [7]:
# Concatenate SUBURB_NAME values for each POST_CODE
postcode_suburbs_df = melb_postcodes_df.groupby('POST_CODE')['SUBURB_NAME'].agg(lambda x: ', '.join(x)).reset_index()

# Display the resulting DataFrame
postcode_suburbs_df

In [8]:
hospitals_clean_1_df = pd.merge(melb_postcodes_df, hospital_df, how='inner', on='POST_CODE')

hospitals_clean_1_df= hospitals_clean_1_df.dropna(subset=['HOUSE_LOCALITY'])

columns_to_drop = ["RURAL_TOWNSHIP","ABS_SA2_KEY","HOUSE_LOCALITY","SCHOOL_POST_CODE",
                   "COMMENTS","SUBURB_NAME","SUBURB_POSTCODE_COMMENTS","Address","Municipality"]

hospitals_clean_1_df = hospitals_clean_1_df.drop(columns_to_drop, axis=1)
hospitals_clean_1_df

In [9]:
unique_count = hospitals_clean_1_df['POST_CODE'].nunique()

# Display the count of unique strings
print(unique_count)

In [10]:
hospitals_clean_final_df = pd.merge(hospitals_clean_1_df, postcode_suburbs_df, how='inner', on='POST_CODE')
new_column_names = {
    "SUBURB_NAME": "SUBURB_NAMES",
    }
hospitals_clean_final_df = hospitals_clean_final_df.rename(columns=new_column_names)

# Remove duplicates based on the "POST_CODE" column
hospitals_clean_final_df = hospitals_clean_final_df.drop_duplicates(subset='POST_CODE')

# Reset the index and make POST_CODE the new index
hospitals_clean_final_df = hospitals_clean_final_df.set_index('POST_CODE')

# Export cleaned population dataset to a CSV file
hospitals_clean_final_df.to_csv("Cleaned_Data/hospitals_clean_final.csv", index_label="POST_CODE")

hospitals_clean_final_df

In [11]:
# Group the hospitals_clean_final_df by the specified columns and count the occurrences of NAME
hospital_count_by_postcode_df = hospitals_clean_final_df.groupby(['POST_CODE', 'MUNICIPALITY', 'CITY_SHIRE', 'SUBURB_GROUP'])['NAME'].count().reset_index()

# Rename the count column to "Hospital_Count"
hospital_count_by_postcode_df = hospital_count_by_postcode_df.rename(columns={'NAME': 'Hospital_Count'})

# Export the hospital count by postcode to a CSV file
hospital_count_by_postcode_df.to_csv("Cleaned_Data/hospital_count_by_postcode_2023.csv", index=False)

# Display the hospital count DataFrame
hospital_count_by_postcode_df