In [1]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
from dataclasses import dataclass
import httpx
import pandas as pd

In [3]:
@dataclass
class Datasource:
    name: str = ""
    meta: str = ""
    api_url: str = ""
    api_key: str = ""
    api_id: str = ""
    header: str = ""


In [4]:
data = Datasource(name="NSW Car Park Occupancy", 
                  meta="https://opendata.transport.nsw.gov.au/dataset/car-park-api",
                  api_url="https://api.transport.nsw.gov.au/v1/carpark",
                  api_id="databooth-nsw-open-data")

In [5]:
data.api_url = "https://api.transport.nsw.gov.au/v1/carpark"

In [6]:
data.api_id = "databooth-nsw-open-data"

In [7]:
data.api_key = "NSW_CARPARK_API_KEY"    # TODO: Move to secret & clean history

# https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/removing-sensitive-data-from-a-repository

In [8]:
data.header = {'Accept': 'application/json', 'Authorization': f'apikey {data.api_key}'}


In [9]:
def get_data_for_api(url, headers, column_name="unnamed", query=None):
    with httpx.Client() as client:
        r = client.get(f"{url}?{query}", headers=headers)
        if r.status_code == httpx.codes.OK:
            return pd.DataFrame.from_dict(data=r.json(), orient="index", columns=[column_name])
        else:
            return r.status_code

In [10]:
carpark_names_df = get_data_for_api(data.api_url, data.header, column_name="carpark_name")

In [11]:
if type(carpark_names_df) != pd.DataFrame:
    print(f"Error: {carpark_names_df}")
else:
    carpark_names_df.head()

Error: 401


In [12]:
if type(carpark_names_df) == pd.DataFrame:
    carpark_names_df["id"] = carpark_names_df.index
    carpark_names_df.reset_index(inplace=True)
    carpark_names_df.drop("index", axis=1)


In [13]:
def get_current_carpark_occupancy_data(url, headers):
    for row in carpark_names_df.itertuples():
        df = get_data_for_api(url, headers, column_name=row[2], query=f"facility={row[3]}")
        all_df = df if row[0] == 0 else all_df.join(df[row[2]])
    return all_df


In [14]:
if type(carpark_names_df) == pd.DataFrame:
    all_df = get_current_carpark_occupancy_data(data.api_url, data.header)

In [15]:
try:
    all_df["Ashfield Car Park"]["occupancy"]
except Exception:
    print("Data not available")

Data not available


In [16]:
# TODO: Prefect job to get the data on an hourly basis each day for a month?

In [17]:
# View the meta data in a browser (macOS at least)

!open $data.meta

In [18]:
# c.f. https://github.com/streamlit/demo-uber-nyc-pickups/blob/main/streamlit_app.py