In [0]:
# Databricks notebook source
dbutils.widgets.text("catalog", "ptd_dev")

catalog       = dbutils.widgets.get("catalog")
schema_bronze = "bronze"
schema_silver = "silver"

spark.sql(f"USE CATALOG {catalog}")
spark.sql(f"USE SCHEMA {schema_silver}")



In [0]:
from pyspark.sql import functions as F, Window as W
pr = spark.table(f"{catalog}.{schema_bronze}.prices_raw").select(
    "ticker","date","open","high","low","close","adj_close","volume","source"
)

w = W.partitionBy("ticker").orderBy("date")
prices_daily = (pr
    .withColumn("ret_1d", F.col("adj_close")/F.lag("adj_close",1).over(w) - 1)
    .withColumn("ret_5d", F.col("adj_close")/F.lag("adj_close",5).over(w) - 1)
    .withColumn("ma_5",  F.avg("adj_close").over(w.rowsBetween(-4,0)))
    .withColumn("ma_20", F.avg("adj_close").over(w.rowsBetween(-19,0)))
    .withColumn("vol_20", F.stddev_samp("ret_1d").over(w.rowsBetween(-19,0)))
    .withColumn("ingestion_ts", F.current_timestamp())
)

spark.sql(f"CREATE SCHEMA IF NOT EXISTS {catalog}.{schema_silver}")
prices_daily.write.mode("overwrite").option("overwriteSchema","true").saveAsTable(f"{catalog}.{schema_silver}.prices_daily")