In [0]:
# 01_ingest_bronze (Unity Catalog)
from perf_lab_utils import perf_lab
from pyspark.sql import functions as F

dbutils.widgets.text("catalog", spark.sql("SELECT current_catalog()").first()[0])
dbutils.widgets.text("schema", "mini_lakehouse")
CATALOG = dbutils.widgets.get("catalog")
SCHEMA = dbutils.widgets.get("schema")

def tn(name: str) -> str:
    return f"{CATALOG}.{SCHEMA}.{name}"

spark.sql(f"USE CATALOG {CATALOG}")
spark.sql(f"USE SCHEMA {SCHEMA}")

customers = spark.table(tn("customers_raw"))
orders = spark.table(tn("orders_raw"))
items = spark.table(tn("order_items_raw"))

ingest_ts = F.current_timestamp()
customers_b = customers.withColumn("ingest_ts", ingest_ts)
orders_b = orders.withColumn("ingest_ts", ingest_ts)
items_b = items.withColumn("ingest_ts", ingest_ts)

print("customers rows:", customers_b.count())
print("orders rows:", orders_b.count())
print("order_items rows:", items_b.count())

perf_lab(customers_b, "customers_bronze", keys=["customer_id"], null_cols=["customer_id"], emit_view="perf_metrics")
perf_lab(orders_b, "orders_bronze", keys=["order_id"], null_cols=["order_id", "customer_id"], emit_view="perf_metrics")
perf_lab(items_b, "order_items_bronze", keys=["order_id", "product_id"], null_cols=["order_id", "product_id"], emit_view="perf_metrics")

customers_b.write.mode("overwrite").format("delta").saveAsTable(tn("customers_bronze"))
orders_b.write.mode("overwrite").format("delta").saveAsTable(tn("orders_bronze"))
items_b.write.mode("overwrite").format("delta").saveAsTable(tn("order_items_bronze"))

print("Created tables:", tn("customers_bronze"), tn("orders_bronze"), tn("order_items_bronze"))

display(customers_b)
display(orders_b)
display(items_b)
