# Getting the schema of the required database

In [1]:
import duckdb
DB_PATH = "database/HDB_data.db"

In [4]:
def list_tables():
    conn = duckdb.connect(DB_PATH)
    c = conn.cursor()
    c.execute("""
        SELECT table_name
        FROM information_schema.tables
        WHERE table_schema = 'main'
        ORDER BY table_name
    """)
    tables = [r[0] for r in c.fetchall()]
    conn.close()
    return tables

list_tables()

['rental_data_2021_to_2025', 'resale_data_2017_to_2025']

In [5]:
def get_table_schema(table_name: str):
    conn = duckdb.connect(DB_PATH)
    c = conn.cursor()

    # if table doesnt exist, return error
    try:
        c.execute(f"SELECT * FROM {table_name} LIMIT 1;")
    except duckdb.OperationalError:
        return {"error": f"Table {table_name} does not exist. Call `list_tables` to see available tables."}

    # Step 1: Basic schema
    c.execute(f"PRAGMA table_info({table_name});")
    schema = [{"cid": r[0], "name": r[1], "type": r[2]} for r in c.fetchall()]

    # Step 2: Collect distinct values for key categorical columns
    if table_name == "resale_data_2017_to_2025":
      categorical_columns = ["flat_type", "town", "flat_model", "storey_range"]
      categories = {}

      for col in categorical_columns:
          try:
              c.execute(f"SELECT DISTINCT {col} FROM {table_name} ORDER BY {col} ASC;")
              categories[col] = [r[0] for r in c.fetchall()]
          except duckdb.OperationalError:
              categories[col] = []

    elif table_name == "rental_data_2021_to_2025":
      categorical_columns = ["town", "flat_type"]
      categories = {}

      for col in categorical_columns:
          try:
              c.execute(f"SELECT DISTINCT {col} FROM {table_name} ORDER BY {col} ASC;")
              categories[col] = [r[0] for r in c.fetchall()]
          except duckdb.OperationalError:
              categories[col] = []

    conn.close()

    return {"schema": schema, "categories": categories}

get_table_schema("resale_data_2017_to_2025")

{'schema': [{'cid': 0, 'name': 'year', 'type': 'INTEGER'},
  {'cid': 1, 'name': 'month_num', 'type': 'INTEGER'},
  {'cid': 2, 'name': 'town', 'type': 'VARCHAR'},
  {'cid': 3, 'name': 'flat_type', 'type': 'VARCHAR'},
  {'cid': 4, 'name': 'block', 'type': 'VARCHAR'},
  {'cid': 5, 'name': 'street_name', 'type': 'VARCHAR'},
  {'cid': 6, 'name': 'storey_range', 'type': 'VARCHAR'},
  {'cid': 7, 'name': 'floor_area_sqm', 'type': 'DOUBLE'},
  {'cid': 8, 'name': 'flat_model', 'type': 'VARCHAR'},
  {'cid': 9, 'name': 'lease_commence_date', 'type': 'INTEGER'},
  {'cid': 10, 'name': 'remaining_lease', 'type': 'VARCHAR'},
  {'cid': 11, 'name': 'resale_price', 'type': 'DOUBLE'}],
 'categories': {'flat_type': ['1 ROOM',
   '2 ROOM',
   '3 ROOM',
   '4 ROOM',
   '5 ROOM',
   'EXECUTIVE',
   'MULTI-GENERATION'],
  'town': ['ANG MO KIO',
   'BEDOK',
   'BISHAN',
   'BUKIT BATOK',
   'BUKIT MERAH',
   'BUKIT PANJANG',
   'BUKIT TIMAH',
   'CENTRAL AREA',
   'CHOA CHU KANG',
   'CLEMENTI',
   'GEYLANG',
 