In [1]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
from dataclasses import dataclass
import httpx
import pandas as pd
from app.config import settings
from pathlib import Path

In [3]:
@dataclass
class Datasource:
    name: str = ""
    meta: str = ""
    api_url: str = ""
    api_key: str = ""
    api_id: str = ""
    header: str = ""


In [4]:
data = Datasource(name="NSW Car Park Occupancy", 
                  meta="https://opendata.transport.nsw.gov.au/dataset/car-park-api",
                  api_url="https://api.transport.nsw.gov.au/v1/carpark",
                  api_id="databooth-nsw-open-data")

In [5]:
data.api_url = "https://api.transport.nsw.gov.au/v1/carpark"

In [6]:
data.api_id = "databooth-nsw-open-data"

In [7]:
data.api_key = settings.NSW_CARPARK_API_KEY    # TODO: Clean repo history

# https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/removing-sensitive-data-from-a-repository

In [8]:
data.header = {'Accept': 'application/json', 'Authorization': f'apikey {data.api_key}'}


In [9]:
class GetDataForApiError(Exception):
    pass

In [10]:
def get_data_for_api(url, headers, column_name="unnamed", query=None):
    with httpx.Client() as client:
        r = client.get(f"{url}?{query}", headers=headers)
        if r.status_code == httpx.codes.OK:
            data = r.json()
        else:
            raise GetDataForApiError(f"Error {r.status_code} calling API: {url}")
        return pd.DataFrame.from_dict(data=r.json(), orient="index", columns=[column_name])

In [11]:
def get_carpark_names():
    try:
        return get_data_for_api(data.api_url, data.header, column_name="carpark_name")
    except Exception as e:
        print(e)
        return None

In [12]:
carpark_names_df = get_carpark_names()

In [13]:
def reformat_carpark_names(carpark_names_df):
    if carpark_names_df is None:
        print("Error: car park names not available")
    else:
        carpark_names_df["id"] = carpark_names_df.index
        carpark_names_df.reset_index(inplace=True)
        carpark_names_df.drop("index", axis=1, inplace=True)
    return carpark_names_df


In [14]:
carpark_names_df = reformat_carpark_names(carpark_names_df)

In [15]:
carpark_names_df

Unnamed: 0,carpark_name,id
0,Tallawong Station Car Park,1
1,Warriewood Car Park,10
2,Narrabeen Car Park,11
3,Mona Vale Car Park,12
4,Dee Why Car Park,13
5,West Ryde Car Park,14
6,Sutherland East Parade Car Park,15
7,Leppington Car Park,16
8,Edmondson Park South Car Park,17
9,St Marys Car Park,18


In [16]:
# want to cache this data

carpark_names_df.to_csv(Path("../data/carpark_names.csv"), index=False)


In [24]:
def get_current_carpark_occupancy_data(carpark_names_df, url, headers):
    if carpark_names_df is None:
        print("Car park data not available")
        return None
    for row in carpark_names_df.itertuples():
        print(row[0], row[1], row[2])
        df = get_data_for_api(url, headers, column_name=row[1], query=f"facility={row[2]}")
        all_df = df if row[0] == 0 else all_df.join(df[row[1]])
    return all_df


In [25]:
all_df = get_current_carpark_occupancy_data(carpark_names_df, data.api_url, data.header)

0 Tallawong Station Car Park 1
1 Warriewood Car Park 10
2 Narrabeen Car Park 11
3 Mona Vale Car Park 12
4 Dee Why Car Park 13
5 West Ryde Car Park 14
6 Sutherland East Parade Car Park 15
7 Leppington Car Park 16
8 Edmondson Park South Car Park 17
9 St Marys Car Park 18
10 Campbelltown Farrow Rd North Car Park 19
11 Kellyville Station Car Park 2
12 Campbelltown Hurley Steet South Car Park 20
13 Bella Vista Station Car Park 3
14 Hills Showground Station Car Park 4
15 Ashfield Car Park 486
16 Kogarah Car Park 487
17 Seven Hills Car Park 488
18 Manly Vale Car Park 489
19 Brookvale Car Park 490
20 Cherrybrook Station Car Park 5
21 Gordon Henry St North Car Park 6
22 Kiama Car Park 7
23 Gosford Car Park 8
24 Revesby Car Park 9


In [29]:
all_df.head()

Unnamed: 0,Tallawong Station Car Park,Warriewood Car Park,Narrabeen Car Park,Mona Vale Car Park,Dee Why Car Park,West Ryde Car Park,Sutherland East Parade Car Park,Leppington Car Park,Edmondson Park South Car Park,St Marys Car Park,...,Ashfield Car Park,Kogarah Car Park,Seven Hills Car Park,Manly Vale Car Park,Brookvale Car Park,Cherrybrook Station Car Park,Gordon Henry St North Car Park,Kiama Car Park,Gosford Car Park,Revesby Car Park
tsn,2155384,2101131,2101130,2103108,2099207,211420,223210,217933,217426,276010,...,213110,221710,214710,2093117,210020,2126158,207210,253330,225040,221210
time,714551324,714551287,714550763,714551151,714550907,714551066,714551334,714551312,714551249,714550931,...,714587350,714587314,714587207,714587106,714587347,714551324,714551189,714546987,714550500,714551344
spots,1004,244,46,68,117,151,373,1884,1429,682,...,180,259,1613,142,246,400,213,42,1050,262
zones,"[{'spots': '152', 'zone_id': 'CPS-CUD1', 'occu...","[{'spots': '244', 'zone_id': '1', 'occupancy':...","[{'spots': '46', 'zone_id': '1', 'occupancy': ...","[{'spots': '68', 'zone_id': '1', 'occupancy': ...","[{'spots': '117', 'zone_id': '1', 'occupancy':...","[{'spots': '151', 'zone_id': '1', 'occupancy':...","[{'spots': '373', 'zone_id': '1', 'occupancy':...","[{'spots': '1884', 'zone_id': '1', 'occupancy'...","[{'spots': '1429', 'zone_id': '1', 'occupancy'...","[{'spots': '682', 'zone_id': '1', 'occupancy':...",...,"[{'spots': '180', 'zone_id': '1', 'occupancy':...","[{'spots': '259', 'zone_id': '2', 'occupancy':...","[{'spots': '874', 'zone_id': '4', 'occupancy':...","[{'spots': '142', 'zone_id': '1', 'occupancy':...","[{'spots': '246', 'zone_id': '1', 'occupancy':...","[{'spots': '400', 'zone_id': 'CPS-CHE', 'occup...","[{'spots': '213', 'zone_id': '1', 'occupancy':...","[{'spots': '42', 'zone_id': '1', 'occupancy': ...","[{'spots': '1050', 'zone_id': '1', 'occupancy'...","[{'spots': '262', 'zone_id': '1', 'occupancy':..."
ParkID,1,1,1,1,1,1,1,1,1,1,...,1,3,4,1,1,1,1,1,1,1


In [26]:
try:
    print(all_df["Ashfield Car Park"]["occupancy"])
except Exception:
    print("Car park data not available")

{'loop': None, 'total': '148', 'monthlies': '0', 'open_gate': '0', 'transients': '152'}


In [27]:
# TODO: Prefect job to get the data on an hourly basis each day for a month?

In [28]:
# View the meta data in a browser (macOS at least)

# !open $data.meta

In [None]:
# c.f. https://github.com/streamlit/demo-uber-nyc-pickups/blob/main/streamlit_app.py