In [None]:
from datetime import datetime, timedelta

import polars as pl
import requests

#### Fetch OPW Version

In [None]:
dt_end = datetime.today() - timedelta(days=1)
dt_start = dt_end - timedelta(days=3)

start = dt_start.strftime("%Y%m%d")
end = dt_end.strftime("%Y%m%d")

url = (
    "https://api.data.gov.hk/v1/historical-archive/list-file-versions?"
    "url=https://online-price-watch.consumer.org.hk/opw/opendata/pricewatch.json&"
    "start={}&end={}&"
)

date_version = {}

In [None]:
for start_end in (start, end), (end, end):
    response = requests.get(url.format(*start_end))
    response.raise_for_status()
    
    data = response.json()
    dates = data.get("data-dictionary-dates", [])
    versions = data.get("timestamps", [])
    
    date_version.update(dict(zip(dates, versions)))

#### Download OPW Data

In [None]:
url = (
    "https://api.data.gov.hk/v1/historical-archive/get-file?"
    "url=https://online-price-watch.consumer.org.hk/opw/opendata/pricewatch.json&"
    "time={}&"
)

prices, items = [], []

In [None]:
for date, version in date_version.items():
    response = requests.get(url.format(version))
    response.raise_for_status()
    data = response.json()
    
    for item in data:
        item["code"] = str(item["code"]).upper()
        code = item["code"]
        
        price = item.pop("prices", [])
        offer = item.pop("offers", [])
        
        smkt_price = {p["supermarketCode"]: p for p in price}
        smkt_offer = {o["supermarketCode"]: o for o in offer}
        
        price = [
            {
                "code": code, "date": date,
                **smkt_price.get(smkt, {}), **smkt_offer.get(smkt, {}),
            }
            for smkt in set(smkt_price) | set(smkt_offer)
        ]
        
        prices += price
        items.append(item)

df_item = pl.json_normalize(items)
df_price = pl.from_records(prices)

In [None]:
df_item.write_parquet("../data/raw/items.parquet")
df_price.write_parquet("../data/raw/prices.parquet")