In [1]:
from environment import *

### local air quality

In [31]:
def determine_health_advice(value):
    if value < 40:
        return "Good"
    elif 40 <= value < 80:
        return "Fair"
    elif 80 <= value < 120:
        return "Poor"
    elif 120 <= value < 300:
        return "Very poor"
    else:
        return "Extremely poor"

def euler_dist(x1, y1, x2, y2):
    return np.sqrt((x1 - x2)**2 + (y1 - y2)**2)
    

def find_sa2(lon, lat):
    with open("../data/All_SA2.json", 'r') as f:
        data = json.load(f)
        
    min_dist = float('inf')
    sa2_code = None
    sa2_name = None
    for lst in data:
        dist = euler_dist(lon, lat, lst['lon'], lst['lat'])
        if dist < min_dist:
            min_dist = dist
            sa2_code = lst['sa2_code']
            sa2_name = lst['sa2_name']
    return sa2_code, sa2_name

In [32]:
harvested = {}    
User_Agent = 'curl/8.4.0'

vic_url = "https://gateway.api.epa.vic.gov.au/environmentMonitoring/v1/sites"
params = {"environmentalSegment": "air"}
headers = {'User-Agent': User_Agent,
            'Cache-Control': 'no-cache',
            'X-API-key': "96527bc2db05455097a52c8e89fa55dc"}

In [35]:
response = requests.get(vic_url, params=params, headers=headers, timeout = 60)
if response.status_code == 200:        
    data_mel = response.json()
    
records = data_mel.get('records')

In [4]:
try:
    merged_data = []
    response = requests.get(vic_url, params=params, headers=headers, timeout = 60)
    print(response.status_code)
    if response.status_code == 200:        
        data_mel = response.json()
        for row in data_mel.get('records'):
            site_health_advices = row.get('siteHealthAdvices')
            # make sure that there is details for air quality 
            if site_health_advices is not None and isinstance(site_health_advices, list) and len(site_health_advices) > 0:
                air_detail = site_health_advices[0]
                lat = row.get("geometry").get('coordinates')[0]
                lon = row.get("geometry").get('coordinates')[1]
                sa2_code, sa2_name = find_sa2(lon, lat)
                observation_data = {
                    "siteName": row.get("siteName"),
                    "Longitude": lon,
                    "Latitude": lat,
                    "SA2_Code": sa2_code,
                    "SA2_Name": sa2_name,
                    "Date": air_detail.get("until")[:10],
                    "ParameterCode": air_detail.get("healthParameter"),
                    "Value": air_detail.get("averageValue"),
                    "HealthAdvice" : air_detail.get("healthAdvice")
                }
                merged_data.append(observation_data)
                
    site_details_url = "https://data.airquality.nsw.gov.au/api/Data/get_SiteDetails"
    site_details_response = requests.get(site_details_url, headers={"accept": "application/json"})
    site_details_data = site_details_response.json()

    # link the site id with the site name and its location
    site_id_to_details = {site["Site_Id"]: {"SiteName": site["SiteName"], "Longitude": site["Longitude"], "Latitude": site["Latitude"]} for site in site_details_data}
    # calcualte the time
    current_date = datetime.now()
    previous_date = current_date - timedelta(days=1)
    previous_date = previous_date.strftime("%Y-%m-%d")
    current_date = current_date.strftime("%Y-%m-%d")
    # get the observation information
    observations_url = "https://data.airquality.nsw.gov.au/api/Data/get_Observations"
    headers = {"accept": "application/json", "Content-Type": "application/json"}
    data = {
        "Parameters": ["PM10"],
        "Sites": list(site_id_to_details.keys()),
        "StartDate": previous_date,
        "EndDate": current_date,
        "Categories": ["Averages"],
        "SubCategories": ["Hourly"],
        "Frequency": ["24h rolling average derived from 1h average"]
        }
    observations_response = requests.post(observations_url, headers=headers, json=data)
    observations_data = observations_response.json()
    for observation in observations_data:
        site_id = observation["Site_Id"]
        site_details = site_id_to_details.get(site_id, {})
        lat = site_details.get("Latitude")
        lon = site_details.get("Longitude")
        sa2_code, sa2_name = find_sa2(lon, lat)
        site_id = observation["Site_Id"]
        site_details = site_id_to_details.get(site_id, {})
        if observation["Value"]:
            observation_data = {
                "siteName": site_details.get("SiteName"),
                "Longitude": lon,
                "Latitude": lat,
                "SA2_Code": sa2_code,
                "SA2_Name": sa2_name,
                "Date": observation["Date"],
                "ParameterCode": observation["Parameter"]["ParameterCode"],
                "Value": observation["Value"],
                "HealthAdvice" : determine_health_advice(observation["Value"])
            }
            merged_data.append(observation_data)
            
except requests.exceptions.Timeout:
    print()
except requests.exceptions.RequestException as e:
    print(e)
with open("../data/air_quality_demo.json", 'w') as f:
    json.dump(merged_data, f)

200


### local weather

In [9]:
def main():
    """Harvest weather data from BOM, and write the data into Elastic Search
       the script is called every 72hrs, matching the frequency of update of the website
    """
    with open("../data/weather_APIs.json", 'r') as f:
        site_apis = json.load(f)
        
    with open("../data/site_sa2.json") as f:
        site_sa2 = json.load(f)
    
    all_data = []
    for site, url in site_apis.items(): 
        try:
            response = requests.get(url, timeout=60)
            if response.status_code == 200:
                
                data = response.json()
                sa2_code = None
                sa2_name = None
                lon, lat = (0, 0)
                for dic in site_sa2:
                    if dic['site'] == site:
                        sa2_code = dic['sa2_code']
                        sa2_name = dic['sa2_name']
                        lon, lat = dic['location']
                        
                cleaned_data = []
                for observation in data["observations"]["data"]:
                    cleaned_observation = {
                        "name": observation.get("name", ""),
                        "local_date_time_full": observation.get("local_date_time_full", ""),
                        "Longitude": lon,
                        "Latitude": lat,
                        "SA2_Code": sa2_code,
                        "SA2_Name": sa2_name,
                        "gust_kmh": observation.get("gust_kmh", ""),
                        "apparent_t": observation.get("apparent_t"),
                        "delta_t": observation.get("delta_t", ""),
                        "air_temp": observation.get("air_temp", ""),
                        "press": observation.get("press", ""),
                        "rain_trace": observation.get("rain_trace",""),
                        "rel_hum": observation.get("rel_hum", ""), 
                        "vis_km": observation.get("vis_km", ""), 
                        "wind_spd_kmh": observation.get("wind_spd_kmh", "")
                    }
                    cleaned_data.append(cleaned_observation)
                all_data.append(cleaned_data) 
            else:
                return response.status_code
        except requests.exceptions.Timeout:
            print()
        except requests.exceptions.RequestException as e:
            print(e)
    return all_data

In [10]:
data = main()
with open("../data/weather_demo.json", 'w') as f:
    json.dump(data, f)

### Fill data for empty SA2

In [24]:
with open("../data/weather_demo.json", 'r') as f:
    data = json.load(f)
with open("../data/All_SA2.json", 'r') as f:
    all_sa2 = json.load(f)    
data

with open("../data/air_quality_demo.json", 'r') as f:
    air = json.load(f)

In [23]:
total = 0
for ins in data:
    total+=len(ins)
total

13248

In [16]:
seen = []
test = []
for inst in data:
    seen.append(inst[0]['SA2_Code'])
    test.append(len(inst))
seen = list(set(seen))