In [2]:
import json
from geopy.geocoders import Nominatim

# Load the JSON data
with open("asthma.json") as file:
    data = json.load(file)

# Initialize the geocoder
geolocator = Nominatim(user_agent="area-coordinates")

# Function to get coordinates from an area name
def get_coordinates(area_name):
    location = geolocator.geocode(area_name)
    if location:
        return {"latitude": location.latitude, "longitude": location.longitude}
    return {"latitude": None, "longitude": None}

# Add coordinates to each feature
for feature in data["features"]:
    area_name = feature["properties"]["area_name"].split(" (")[0]  # Extract only the area name
    feature["properties"]["coordinates"] = get_coordinates(area_name)

# Save the updated JSON data
with open("asthma_with_coordinates.json", "w") as file:
    json.dump(data, file, indent=4)



In [3]:
import json

# Define the range for latitude and longitude
lat_range = (-39, -34)
lon_range = (140.96, 150)

# Read the data from the input JSON file
with open("asthma_with_coordinates.json", "r") as file:
    data = json.load(file)

# Filter the data
filtered_data = []
for feature in data.get('features', []):
    coordinates = feature['properties'].get('coordinates', {})
    latitude = coordinates.get('latitude')
    longitude = coordinates.get('longitude')
    
    # Ensure both latitude and longitude are not None before comparing
    if latitude is not None and longitude is not None:
        if lat_range[0] <= latitude <= lat_range[1] and lon_range[0] <= longitude <= lon_range[1]:
            filtered_data.append(feature)

# Save the filtered data to a new JSON file
with open("asthma_with_coordinates_filtered.json", "w") as file:
    json.dump({"features": filtered_data}, file, indent=4)


In [9]:
import geopandas as gpd
import json
from shapely.geometry import Point

# Replace with the proper path
file_path = "C:/Users/zheyu/COMP90024_2024_ASMT2_Group12/data/SA2-Map/SA2_2021_AUST_GDA2020.shp"
sa2_gdf = gpd.read_file(file_path)
sa2_gdf = sa2_gdf.to_crs(epsg=4326)

# Load the JSON data
with open('asthma_with_coordinates.json', 'r') as json_file:
    data = json.load(json_file)

# Create GeoDataFrame from JSON data
records = []
for entry in data['features']:
    lat = entry['properties']['coordinates']['latitude']
    lon = entry['properties']['coordinates']['longitude']
    if lat is not None and lon is not None:
        point = Point(lon, lat)
        entry['properties'].pop('coordinates', None)  # Remove coordinates from properties
        record = {
            'properties': entry['properties'],
            'geometry': point
        }
        records.append(record)
    else:
        print(f"Skipping entry with invalid coordinates: {entry}")

gdf = gpd.GeoDataFrame(records, geometry='geometry', crs="EPSG:4326")

# Project to a suitable CRS
gdf_projected = gdf.to_crs(epsg=3857)
sa2_gdf_projected = sa2_gdf.to_crs(epsg=3857)

# Perform spatial join to add suburb and state names
gdf_with_sa2 = gpd.sjoin_nearest(gdf_projected, sa2_gdf_projected, how='left', distance_col='distance')

# Extract and add 'suburb' and 'state' names
gdf_with_sa2['suburb'] = gdf_with_sa2['SA2_NAME21']
gdf_with_sa2['state'] = gdf_with_sa2['STE_NAME21']
gdf_with_sa2 = gdf_with_sa2.to_crs(epsg=4326)

# Filter for entries where the state is Victoria
victoria_gdf = gdf_with_sa2[gdf_with_sa2['state'] == 'Victoria']

# Update the JSON structure with filtered data
updated_features = []
for index, row in victoria_gdf.iterrows():
    entry = {
        'type': 'Feature',
        'suburb': row['suburb'],
        'state': row['state'],
        'asthma_me_2_rate_3_11_7_13': row['properties']['asthma_me_2_rate_3_11_7_13'],
        'respirtry_me_2_rate_3_11_7_13': row['properties']['respirtry_me_2_rate_3_11_7_13']
    }
    updated_features.append(entry)

updated_data = {'features': updated_features}

# Save the updated JSON data with filtered entries
with open('victoria_data.json', 'w') as json_output_file:
    json.dump(updated_data, json_output_file, indent=2)

print("Filtered data for Victoria saved successfully")






Skipping entry with invalid coordinates: {'type': 'Feature', 'properties': {'asthma_me_2_rate_3_11_7_13': 12.3845089252748, 'area_code': 29399, 'area_name': 'Unincorporated Vic', 'respirtry_me_2_rate_3_11_7_13': 32.674331183441, 'respirtry_me_1_no_3_11_7_13': 245.220618421973, 'asthma_me_1_no_3_11_7_13': 93.0687920481125, 'coordinates': {'latitude': None, 'longitude': None}}, 'id': 'lga11_chronicdisease_modelledestimate.29399'}
Skipping entry with invalid coordinates: {'type': 'Feature', 'properties': {'asthma_me_2_rate_3_11_7_13': 13.9576502231338, 'area_code': 46450, 'area_name': 'Port Pirie City and Dists (M)', 'respirtry_me_2_rate_3_11_7_13': 31.7095419167049, 'respirtry_me_1_no_3_11_7_13': 5530.51881187721, 'asthma_me_1_no_3_11_7_13': 2446.45050336834, 'coordinates': {'latitude': None, 'longitude': None}}, 'id': 'lga11_chronicdisease_modelledestimate.46450'}
Skipping entry with invalid coordinates: {'type': 'Feature', 'properties': {'asthma_me_2_rate_3_11_7_13': 10.097877418069, '

In [13]:
import os
import requests
import json
import geopandas as gpd
from shapely.geometry import Point

# Step 1: Get data from API
url = "https://gateway.api.epa.vic.gov.au/environmentMonitoring/v1/sites"
params = {"environmentalSegment": "air"}
headers = {
    'User-agent': 'curl/8.4.0',
    'Cache-Control': 'no-cache',
    'X-API-Key': 'f6694fb4cb45496a816c8b630e885f92',
}
response = requests.get(url, params=params, headers=headers)
data = json.loads(response.text)

# Step 2: Create GeoDataFrame from API data
records = []
for record in data['records']:
    coordinates = record['geometry']['coordinates']
    if coordinates[0] is not None and coordinates[1] is not None:
        point = Point(coordinates[1], coordinates[0])  # Note: GeoPandas expects (x, y) which is (longitude, latitude)
        properties = {
            'siteID': record.get('siteID'),
            'siteName': record.get('siteName'),
            'siteType': record.get('siteType'),
            'siteHealthAdvices': record.get('siteHealthAdvices', [])
        }
        records.append({
            'properties': properties,
            'geometry': point
        })
    else:
        print(f"Skipping entry with invalid coordinates: {record}")

gdf = gpd.GeoDataFrame(records, geometry='geometry', crs="EPSG:4326")

# Step 3: Load SA2 shapefile and perform spatial join
base_path = os.path.expanduser('~/COMP90024_2024_ASMT2_Group12/data/SA2-Map')
shapefile_path = os.path.join(base_path, 'SA2_2021_AUST_GDA2020.shp')
sa2_gdf = gpd.read_file(shapefile_path)
sa2_gdf = sa2_gdf.to_crs(epsg=4326)

# Project to a suitable CRS for spatial operations
gdf_projected = gdf.to_crs(epsg=3857)
sa2_gdf_projected = sa2_gdf.to_crs(epsg=3857)

# Perform spatial join
gdf_with_sa2 = gpd.sjoin_nearest(gdf_projected, sa2_gdf_projected, how='left', distance_col='distance')
# Extract and add 'suburb' and 'state' names
gdf_with_sa2['suburb'] = gdf_with_sa2['SA2_NAME21']
gdf_with_sa2['state'] = gdf_with_sa2['STE_NAME21']
gdf_with_sa2 = gdf_with_sa2.to_crs(epsg=4326)

# Step 4: Update JSON structure
updated_records = []
particle_suburbs = []  # Initialize the list to store suburbs with "Particles" parameter

for index, row in gdf_with_sa2.iterrows():
    for advice in row['properties']['siteHealthAdvices']:
        if advice.get('healthParameter') == 'Particles':
            particle_suburbs.append(row['suburb'])
        entry = {
            'since': advice.get('since'),
            'until': advice.get('until'),
            'healthParameter': advice.get('healthParameter'),
            'averageValue': advice.get('averageValue'),
            'suburb': row['suburb'],
            'state': row['state']
        }
        updated_records.append(entry)

updated_data = {'features': updated_records}

# Print the updated JSON data
print(json.dumps(updated_data, indent=2))

# Print the list of suburbs with "Particles" parameter
print(particle_suburbs)


{
  "features": [
    {
      "since": "2024-05-21T09:00:00Z",
      "until": "2024-05-21T10:00:00Z",
      "healthParameter": "PM2.5",
      "averageValue": 10.3,
      "suburb": "Box Hill",
      "state": "Victoria"
    },
    {
      "since": "2024-05-21T09:00:00Z",
      "until": "2024-05-21T10:00:00Z",
      "healthParameter": "PM2.5",
      "averageValue": 4.98,
      "suburb": "Alphington - Fairfield",
      "state": "Victoria"
    },
    {
      "since": "2024-05-21T09:00:00Z",
      "until": "2024-05-21T10:00:00Z",
      "healthParameter": "PM2.5",
      "averageValue": 17.9,
      "suburb": "Churchill",
      "state": "Victoria"
    },
    {
      "since": "2024-05-21T09:00:00Z",
      "until": "2024-05-21T10:00:00Z",
      "healthParameter": "Particles",
      "averageValue": 26.75,
      "suburb": "Churchill",
      "state": "Victoria"
    },
    {
      "since": "2024-05-21T09:00:00Z",
      "until": "2024-05-21T10:00:00Z",
      "healthParameter": "PM2.5",
      "averageV

In [17]:
import requests
import json

# Step 1: Get data from API
url = "https://gateway.api.epa.vic.gov.au/environmentMonitoring/v1/sites"
params = {"environmentalSegment": "air"}
headers = {
    'User-agent': 'curl/8.4.0',
    'Cache-Control': 'no-cache',
    'X-API-Key': 'f6694fb4cb45496a816c8b630e885f92',
}
response = requests.get(url, params=params, headers=headers)
data = json.loads(response.text)

# Provided Particle suburbs list
particle_suburbs = [
    'Churchill', 'Yallourn North - Glengarry', 'Leongatha', 'Traralgon - East', 'Churchill', 'Churchill', 
    'Mount Baw Baw Region', 'Yallourn North - Glengarry', 'Churchill', 'Healesville - Yarra Glen', 
    'Warrnambool - South', 'Mildura - North', 'Horsham', 'Wendouree - Miners Rest', 'Bairnsdale', 
    'Castlemaine', 'Beechworth', 'Benalla', 'Ararat', 'Camperdown', 'Maffra', 'Gisborne', 'Macedon', 
    'Romsey', 'Kyneton', 'Heathcote', 'Kinglake', 'Daylesford', 'Glenelg (Vic.)', 'Hamilton (Vic.)', 
    'Kerang', 'Lorne - Anglesea', 'Shepparton - North', 'Orbost', 'Sale', 'Myrtleford', 'Mansfield (Vic.)', 
    'Portland', 'Mildura Surrounds', 'Torquay', 'Wonthaggi - Inverloch', 'Warragul', 'Yarrawonga', 'Wodonga', 
    'Kilmore - Broadford', 'Healesville - Yarra Glen', 'Sunbury - South', 'Lakes Entrance', 'Leongatha', 
    'Bacchus Marsh', 'Yarra Valley', 'Clifton Springs', 'Rutherglen', 'Swan Hill', 'Wallan', 'Rosedale', 
    'Traralgon - East', 'Yallourn North - Glengarry'
]

# Step 2: Filter and update Particle data
filtered_data = []
suburb_iter = iter(particle_suburbs)

for record in data['records']:
    for advice in record.get('siteHealthAdvices', []):
        if advice.get('healthParameter') == 'Particles':
            suburb = next(suburb_iter, None)
            if suburb is None:
                break
            filtered_data.append({
                'averageValue': advice['averageValue'],
                'suburb': suburb
            })

# Print the updated JSON data
print(json.dumps(filtered_data, indent=2))



[
  {
    "averageValue": 26.75,
    "suburb": "Churchill"
  },
  {
    "averageValue": 15.8,
    "suburb": "Yallourn North - Glengarry"
  },
  {
    "averageValue": 12.87,
    "suburb": "Leongatha"
  },
  {
    "averageValue": 25.43,
    "suburb": "Traralgon - East"
  },
  {
    "averageValue": 19,
    "suburb": "Churchill"
  },
  {
    "averageValue": 12.6,
    "suburb": "Churchill"
  },
  {
    "averageValue": 8.2,
    "suburb": "Mount Baw Baw Region"
  },
  {
    "averageValue": 29.65,
    "suburb": "Yallourn North - Glengarry"
  },
  {
    "averageValue": 10.37,
    "suburb": "Churchill"
  },
  {
    "averageValue": 9.07,
    "suburb": "Healesville - Yarra Glen"
  },
  {
    "averageValue": 4.75,
    "suburb": "Warrnambool - South"
  },
  {
    "averageValue": 5.13,
    "suburb": "Mildura - North"
  },
  {
    "averageValue": 8.2,
    "suburb": "Horsham"
  },
  {
    "averageValue": 5.97,
    "suburb": "Wendouree - Miners Rest"
  },
  {
    "averageValue": 22.85,
    "suburb": "Ba