In [0]:
# 03_build_wide_silver (Unity Catalog)
from perf_lab_utils import perf_lab
from pyspark.sql import functions as F

dbutils.widgets.text("catalog", spark.sql("SELECT current_catalog()").first()[0])
dbutils.widgets.text("schema", "mini_lakehouse")
CATALOG = dbutils.widgets.get("catalog")
SCHEMA = dbutils.widgets.get("schema")

def tn(name: str) -> str:
    return f"{CATALOG}.{SCHEMA}.{name}"

spark.sql(f"USE CATALOG {CATALOG}")
spark.sql(f"USE SCHEMA {SCHEMA}")

c = spark.table(tn("customers_silver"))
o = spark.table(tn("orders_silver"))
i = spark.table(tn("order_items_silver"))

print("Input counts:",
      "customers=", c.count(),
      "orders=", o.count(),
      "items=", i.count())

io = i.alias("i").join(o.alias("o"), on="order_id", how="left")
wide = io.join(c.alias("c"), on="customer_id", how="left")

select_exprs = []
if "order_id" in wide.columns: select_exprs.append(F.col("order_id"))
if "customer_id" in wide.columns: select_exprs.append(F.col("customer_id"))
if "product_id" in wide.columns: select_exprs.append(F.col("product_id"))

if "order_ts" in wide.columns: select_exprs.append(F.col("order_ts"))
if "status" in wide.columns: select_exprs.append(F.col("status"))
if "order_total_amount" in wide.columns: select_exprs.append(F.col("order_total_amount"))

if "quantity" in wide.columns: select_exprs.append(F.col("quantity"))
if "unit_price" in wide.columns: select_exprs.append(F.col("unit_price"))
if "line_amount" in wide.columns: select_exprs.append(F.col("line_amount"))

rename_map = {
    "first_name": "customer_first_name",
    "last_name": "customer_last_name",
    "email": "customer_email",
    "country": "customer_country"
}
for src, dst in rename_map.items():
    if src in wide.columns:
        select_exprs.append(F.col(src).alias(dst))

wide_curated = wide.select(*select_exprs)

perf_lab(wide_curated, "sales_wide_silver", keys=["order_id", "product_id"], null_cols=["order_id", "product_id", "customer_first_name"], negative_cols=["quantity", "unit_price"], require_distinct_keys=False, emit_view="perf_metrics")

wide_curated.write.mode("overwrite").format("delta").saveAsTable(tn("sales_wide_silver"))
print("Created table:", tn("sales_wide_silver"))

wide_count = wide_curated.count()
print("Wide count:", wide_count)

orders_distinct = wide_curated.select("order_id").dropDuplicates().count() if "order_id" in wide_curated.columns else None
if orders_distinct is not None:
    print("Distinct orders in wide:", orders_distinct)

if "customer_first_name" in wide_curated.columns:
    missing_customer = wide_curated.filter(F.col("customer_first_name").isNull()).count()
    print("Rows with missing customer join:", missing_customer)
