In [0]:
# Databricks notebook source
dbutils.widgets.text("catalog", "ptd_dev")
dbutils.widgets.text("schema_bronze", "bronze")
dbutils.widgets.text("schema_silver", "silver")

catalog       = dbutils.widgets.get("catalog")
schema_bronze = dbutils.widgets.get("schema_bronze")
schema_silver = dbutils.widgets.get("schema_silver")

spark.sql(f"USE CATALOG {catalog}")
spark.sql(f"USE SCHEMA {schema_silver}")

from pyspark.sql import functions as F, Window as W

# 1) prices_daily
pr = spark.table(f"{catalog}.{schema_bronze}.prices_raw").select(
    "ticker","date","open","high","low","close","adj_close","volume","source"
)

w = W.partitionBy("ticker").orderBy("date")
prices_daily = (pr
    .withColumn("ret_1d", F.col("adj_close")/F.lag("adj_close",1).over(w) - 1)
    .withColumn("ret_5d", F.col("adj_close")/F.lag("adj_close",5).over(w) - 1)
    .withColumn("ma_5",  F.avg("adj_close").over(w.rowsBetween(-4,0)))
    .withColumn("ma_20", F.avg("adj_close").over(w.rowsBetween(-19,0)))
    .withColumn("vol_20", F.stddev_samp("ret_1d").over(w.rowsBetween(-19,0)))
    .withColumn("ingestion_ts", F.current_timestamp())
)

spark.sql(f"CREATE SCHEMA IF NOT EXISTS {catalog}.{schema_silver}")
prices_daily.write.mode("overwrite").option("overwriteSchema","true").saveAsTable(f"{catalog}.{schema_silver}.prices_daily")

# 2) market_features (SPY y ^VIX)
mf = (prices_daily
      .filter(F.col("ticker").isin("SPY", "^VIX"))
      .select("ticker","date","adj_close","ret_1d","ret_5d","ma_5","ma_20","vol_20"))

mf_pivot = (mf
    .groupBy("date")
    .pivot("ticker", ["SPY","^VIX"])
    .agg(F.first("adj_close").alias("adj_close"),
         F.first("ret_1d").alias("ret_1d"),
         F.first("ret_5d").alias("ret_5d"),
         F.first("ma_5").alias("ma_5"),
         F.first("ma_20").alias("ma_20"),
         F.first("vol_20").alias("vol_20"))
)

# aplanar nombres de columnas
cols = []
for c in mf_pivot.columns:
    if c in ("date",):
        cols.append(F.col(c))
    else:
        # Columnas vienen como p.ej. 'SPY_adj_close'
        parts = c.split("_")
        cols.append(F.col(c).alias(c))
market_features = mf_pivot.select(*cols).withColumn("ingestion_ts", F.current_timestamp())

market_features.write.mode("overwrite").option("overwriteSchema","true").saveAsTable(f"{catalog}.{schema_silver}.market_features")