In [2]:
import os
import pandas as pd
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
import folium
from folium.plugins import MarkerCluster

In [3]:
# Create folder to save dataset
base_dir = '../../data/'
landing_dir = os.path.join(base_dir, 'landing')
raw_dir = os.path.join(base_dir, 'raw')

if not os.path.exists(base_dir):
    os.makedirs(base_dir)


subfolder = 'Hospital'


if not os.path.exists(os.path.join(raw_dir, subfolder)):
    os.makedirs(os.path.join(raw_dir, subfolder))

In [4]:
# read data
input_path = f"{landing_dir}/{subfolder}/Hospital_info.csv"
df = pd.read_csv(input_path)

In [5]:
df.head(5)

Unnamed: 0,OBJECTID,NHSD_OPERATIONALSTATUS,NHSD_ORGANISATION_NAME,NHSD_ADDRESS,NHSD_SUBURB,NHSD_STATE,NHSD_POSTCODE,NHSD_LONG,NHSD_LAT,NHSD_SERVICE_ID,NHSD_SERVICE_TYPE,GNAF_ADDRESS_DETAIL_PID,GA_CLASS,GA_SOURCE_DATE
0,1,active,Hughes Family Practice,3 Mcnicoll Street Hughes ACT 2605,Hughes,ACT,2605,149.094864,-35.334141,7cc86945-1604-8237-e618-b873d1069cb5,General practice service,GAACT716457727,GENERAL PRACTICE,20240305
1,2,active,Lane Cove Doctors Surgery,65 Burns Bay Road Lane Cove NSW 2066,Lane Cove,NSW,2066,151.166153,-33.814552,4f8d50dc-459e-ca13-c93a-dbad8f78ca12,General practice service,GANSW705185299,GENERAL PRACTICE,20240305
2,3,active,Robinson River Community Health Centre,Robinson River Community Road Robinson River N...,Robinson River,NT,852,136.982773,-16.758528,473b7b76-4eaf-0036-0070-201c11d3fde0,General practice service,,GENERAL PRACTICE,20240305
3,4,active,Ochre Medical Centre Bonnells Bay,330 Fishery Point Road Bonnells Bay NSW 2264,Bonnells Bay,NSW,2264,151.532257,-33.110668,b57c34f6-5b3a-fde3-72e4-10daeb944b28,General practice service,GANSW711793028,GENERAL PRACTICE,20240305
4,5,active,Okely Medical Group,56 Almadine Drive Carine WA 6020,Carine,WA,6020,115.768196,-31.853407,c8c7520a-eedf-ae5b-4f6d-4bda16376279,General practice service,GAWA_146550659,GENERAL PRACTICE,20240305


In [6]:
print(df.info())


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14904 entries, 0 to 14903
Data columns (total 14 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   OBJECTID                 14904 non-null  int64  
 1   NHSD_OPERATIONALSTATUS   14904 non-null  object 
 2   NHSD_ORGANISATION_NAME   14904 non-null  object 
 3   NHSD_ADDRESS             14904 non-null  object 
 4   NHSD_SUBURB              14904 non-null  object 
 5   NHSD_STATE               14904 non-null  object 
 6   NHSD_POSTCODE            14904 non-null  int64  
 7   NHSD_LONG                14904 non-null  float64
 8   NHSD_LAT                 14904 non-null  float64
 9   NHSD_SERVICE_ID          14904 non-null  object 
 10  NHSD_SERVICE_TYPE        14904 non-null  object 
 11  GNAF_ADDRESS_DETAIL_PID  8743 non-null   object 
 12  GA_CLASS                 14904 non-null  object 
 13  GA_SOURCE_DATE           14904 non-null  int64  
dtypes: float64(2), int64(3

In [7]:
len(df)

14904

In [8]:
df["GA_CLASS"].unique()

array(['GENERAL PRACTICE', 'HOSPITAL', 'PHARMACY'], dtype=object)

In [9]:
# Only need hospital data
df_filtered = df[df['GA_CLASS'] == 'HOSPITAL']
len(df_filtered)

1280

In [10]:
# Only need VIC data
df_filtered = df_filtered[df_filtered['NHSD_STATE'] == 'VIC']
len(df_filtered)

286

In [11]:
# save data
df_filtered.to_csv(f"{raw_dir}/{subfolder}/Hospital_info.csv", index=False)


In [12]:
# Visualize data

# Create a geometry column from latitude and longitude
geometry = [Point(xy) for xy in zip(df_filtered['NHSD_LONG'], df_filtered['NHSD_LAT'])]

# Create a GeoDataFrame
gdf = gpd.GeoDataFrame(df_filtered, geometry=geometry)

# Create a folium map object, centered at a location (e.g., Melbourne)
m = folium.Map(location=[-37.8136, 144.9631], zoom_start=12)  # Adjust the zoom level as needed

# Create a MarkerCluster object
marker_cluster = MarkerCluster().add_to(m)

# Add markers to the cluster
for _, row in gdf.iterrows():
    folium.Marker(
        location=[row['NHSD_LAT'], row['NHSD_LONG']],
        popup=f"{row['NHSD_ORGANISATION_NAME']} - {row['NHSD_SUBURB']}",
        tooltip=row['NHSD_SUBURB']
    ).add_to(marker_cluster)

m

# Save the map to an HTML file for viewing in a web browser
# m.save("hospital_map_with_cluster.html")
