In [1]:
import requests
import csv
from pathlib import Path

### configs

In [2]:
data_dir = Path("data")
data_dir.mkdir(parents=True, exist_ok=True)

### BTO data

In [3]:
# --------------------------
# Download BTO Dataset (Apr 2008 – Mar 2023)
# --------------------------
bto_dataset_id = "d_2d493bdcc1d9a44828b6e71cb095b88d"
bto_url = f"https://data.gov.sg/api/action/datastore_search?resource_id={bto_dataset_id}&limit=50000"

print("Downloading BTO dataset...")
try:
    bto_response = requests.get(bto_url).json()
    
    if bto_response.get("success") and "result" in bto_response:
        bto_records = bto_response["result"]["records"]
        if bto_records:
            bto_filename = data_dir / "BTO_prices_Apr2008_Mar2023.csv"
            with open(bto_filename, mode="w", newline="", encoding="utf-8") as file:
                writer = csv.DictWriter(file, fieldnames=bto_records[0].keys())
                writer.writeheader()
                writer.writerows(bto_records)
            print(f"✅ Saved {bto_filename}")
        else:
            print("❌ No BTO records found.")
    else:
        print("❌ Failed to fetch BTO dataset.")
except Exception as e:
    print(f"❌ Error downloading BTO dataset: {e}")

Downloading BTO dataset...
✅ Saved data/BTO_prices_Apr2008_Mar2023.csv


### Resale data

In [5]:
from concurrent.futures import ThreadPoolExecutor, as_completed


collection_id = "189"
url_metadata = f"https://api-production.data.gov.sg/v2/public/api/collections/{collection_id}/metadata"

def fetch_dataset(dataset_id):
    print(f"⬇️ Downloading dataset: {dataset_id}")
    all_records = []
    offset = 0
    limit = 5000
    base_url = "https://data.gov.sg/api/action/datastore_search"

    while True:
        url = f"{base_url}?resource_id={dataset_id}&limit={limit}&offset={offset}"
        response = requests.get(url)
        data = response.json()

        if not data.get('success'):
            print(f"❌ Failed to fetch {dataset_id}: {data.get('error', {})}")
            return

        records = data['result']['records']
        if not records:
            break

        all_records.extend(records)
        offset += limit

    if not all_records:
        print(f"⚠️ No records found for {dataset_id}")
        return

    filename = data_dir / f"resale_{dataset_id}.csv"
    with open(filename, mode="w", newline="", encoding="utf-8") as file:
        writer = csv.DictWriter(file, fieldnames=all_records[0].keys())
        writer.writeheader()
        writer.writerows(all_records)

    print(f"✅ Saved {filename} with {len(all_records)} rows")


try:
    response = requests.get(url_metadata)
    collection_data = response.json()
    child_datasets = collection_data['data']['collectionMetadata']['childDatasets']

    with ThreadPoolExecutor(max_workers=5) as executor:
        futures = {executor.submit(fetch_dataset, ds): ds for ds in child_datasets}
        for future in as_completed(futures):
            ds = futures[future]
            try:
                future.result()
            except Exception as e:
                print(f"❌ Error with dataset {ds}: {e}")

except Exception as e:
    print(f"❌ Error fetching metadata: {e}")


⬇️ Downloading dataset: d_8b84c4ee58e3cfc0ece0d773c8ca6abc
⬇️ Downloading dataset: d_43f493c6c50d54243cc1eab0df142d6a
⬇️ Downloading dataset: d_2d5ff9ea31397b66239f245f57751537
⬇️ Downloading dataset: d_ebc5ab87086db484f88045b47411ebc5
⬇️ Downloading dataset: d_ea9ed51da2787afaf8e51f827c304208
✅ Saved data/resale_d_ea9ed51da2787afaf8e51f827c304208.csv with 37153 rows
✅ Saved data/resale_d_2d5ff9ea31397b66239f245f57751537.csv with 52203 rows
✅ Saved data/resale_d_8b84c4ee58e3cfc0ece0d773c8ca6abc.csv with 214695 rows
✅ Saved data/resale_d_ebc5ab87086db484f88045b47411ebc5.csv with 287196 rows
✅ Saved data/resale_d_43f493c6c50d54243cc1eab0df142d6a.csv with 369651 rows
