In [1]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
from dataclasses import dataclass
import httpx
import pandas as pd

In [3]:
@dataclass
class Datasource:
    name: str = ""
    meta: str = ""
    api_url: str = ""
    api_key: str = ""
    api_id: str = ""
    header: str = ""


In [4]:
data = Datasource(name="NSW Car Park Occupancy", 
                  meta="https://opendata.transport.nsw.gov.au/dataset/car-park-api",
                  api_url="https://api.transport.nsw.gov.au/v1/carpark",
                  api_id="databooth-nsw-open-data")

In [5]:
data.api_url = "https://api.transport.nsw.gov.au/v1/carpark"

In [6]:
data.api_id = "databooth-nsw-open-data"

In [7]:
SECRET_NSW_CARPARK_API_KEY = "fwyH5kqV8mkdDwOfJlwoo11ubCivNjqmWeUk"

In [8]:
data.api_key = SECRET_NSW_CARPARK_API_KEY    # TODO: Move to secret & clean history

# https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/removing-sensitive-data-from-a-repository

In [9]:
data.header = {'Accept': 'application/json', 'Authorization': f'apikey {data.api_key}'}


In [11]:
class GetDataForApiError(Exception):
    pass

In [22]:
def get_data_for_api(url, headers, column_name="unnamed", query=None):
    with httpx.Client() as client:
        r = client.get(f"{url}?{query}", headers=headers)
        if r.status_code == httpx.codes.OK:
            data = r.json()
        else:
            raise GetDataForApiError(f"Error {r.status_code} calling API: {url}")
        return pd.DataFrame.from_dict(data=r.json(), orient="index", columns=[column_name])

In [23]:
def get_carpark_names():
    try:
        return get_data_for_api(data.api_url, data.header, column_name="carpark_name")
    except Exception as e:
        print(e)
        return None

In [29]:
carpark_names_df = get_carpark_names()

In [32]:
def reformat_carpark_names(carpark_names_df):
    if carpark_names_df is None:
        print("Error: car park names not available")
    else:
        carpark_names_df["id"] = carpark_names_df.index
        carpark_names_df.reset_index(inplace=True)
        carpark_names_df.drop("index", axis=1)
    return carpark_names_df


In [33]:
carpark_names_df = reformat_carpark_names(carpark_names_df)

In [28]:
carpark_names_df

Unnamed: 0,index,carpark_name,id
0,1,Tallawong Station Car Park,1
1,10,Warriewood Car Park,10
2,11,Narrabeen Car Park,11
3,12,Mona Vale Car Park,12
4,13,Dee Why Car Park,13
5,14,West Ryde Car Park,14
6,15,Sutherland East Parade Car Park,15
7,16,Leppington Car Park,16
8,17,Edmondson Park South Car Park,17
9,18,St Marys Car Park,18


In [21]:
# want to cache this data

try:
    carpark_names_df.to_json("../data/carpark_names.json", index=False)
except Exception:
    print("Data unavailable")

Data unavailable


In [19]:
def get_current_carpark_occupancy_data(carpark_names_df, url, headers):
    if carpark_names_df is None:
        print("Car park data not available")
        return None
    for row in carpark_names_df.itertuples():
        df = get_data_for_api(url, headers, column_name=row[2], query=f"facility={row[3]}")
        all_df = df if row[0] == 0 else all_df.join(df[row[2]])
    return all_df


In [20]:
all_df = get_current_carpark_occupancy_data(carpark_names_df, data.api_url, data.header)

GetDataForApiError: Error 503 calling API: https://api.transport.nsw.gov.au/v1/carpark

In [None]:
try:
    all_df["Ashfield Car Park"]["occupancy"]
except Exception:
    print("Car park data not available")

In [None]:
# TODO: Prefect job to get the data on an hourly basis each day for a month?

In [None]:
# View the meta data in a browser (macOS at least)

!open $data.meta

In [None]:
# c.f. https://github.com/streamlit/demo-uber-nyc-pickups/blob/main/streamlit_app.py