In [8]:
from pyspark.sql import functions as F
from pyspark.sql.window import Window
from delta.tables import DeltaTable

# Load Silver table
df = spark.table("silver_stock_minutes")

# Define windows
w3 = Window.partitionBy("ticker").orderBy("timestamp").rowsBetween(-2, 0)
w8 = Window.partitionBy("ticker").orderBy("timestamp").rowsBetween(-7, 0)
w21 = Window.partitionBy("ticker").orderBy("timestamp").rowsBetween(-20, 0)

w5 = Window.partitionBy("ticker").orderBy("timestamp").rowsBetween(-4, 0)
w15 = Window.partitionBy("ticker").orderBy("timestamp").rowsBetween(-14, 0)
w30 = Window.partitionBy("ticker").orderBy("timestamp").rowsBetween(-29, 0)

# Returns
df = df.withColumn("return_1m", F.col("close") / F.lag("close").over(Window.partitionBy("ticker").orderBy("timestamp")) - 1)
df = df.withColumn("return_log_1m", F.log(F.col("close") / F.lag("close").over(Window.partitionBy("ticker").orderBy("timestamp"))))

# EMAs (using simple rolling averages as placeholder)
df = df.withColumn("ema_3", F.avg("close").over(w3))
df = df.withColumn("ema_8", F.avg("close").over(w8))
df = df.withColumn("ema_21", F.avg("close").over(w21))

# EMA Cross Signal
df = df.withColumn("ema_cross_signal",
                   F.when(F.col("ema_3") > F.col("ema_8"), F.lit("bullish"))
                    .when(F.col("ema_3") < F.col("ema_8"), F.lit("bearish"))
                    .otherwise(F.lit("neutral"))
)

# Rolling volatility
df = df.withColumn("volatility_5m", F.stddev("return_1m").over(w5))
df = df.withColumn("volatility_15m", F.stddev("return_1m").over(w15))
df = df.withColumn("volatility_30m", F.stddev("return_1m").over(w30))

# Rolling high/low
df = df.withColumn("high_15m", F.max("high").over(w15))
df = df.withColumn("low_15m", F.min("low").over(w15))

# Rolling VWAP (30m)
df = df.withColumn("vwap_30m",
                   F.sum(F.col("close") * F.col("volume")).over(w30) /
                   F.sum("volume").over(w30)
)

# Add ingestion timestamp
df = df.withColumn("ingestion_time", F.current_timestamp())

# Add date
df = df.withColumn("date", F.to_date("timestamp"))

# Final column order
final_cols = [
    "ticker", "timestamp", "date", "open", "high", "low", "close", "volume",
    "return_1m", "return_log_1m",
    "ema_3", "ema_8", "ema_21",
    "ema_cross_signal",
    "volatility_5m", "volatility_15m", "volatility_30m",
    "high_15m", "low_15m",
    "vwap_30m",
    "ingestion_time"
]

df = df.select(*final_cols)

# Save or merge into Gold table
gold_table = "gold_stock_minutes_indicators"

if spark.catalog.tableExists(gold_table):
    delta_table = DeltaTable.forName(spark, gold_table)
    (
        delta_table.alias("target")
        .merge(
            df.alias("source"),
            "target.ticker = source.ticker AND target.timestamp = source.timestamp"
        )
        .whenMatchedUpdateAll()
        .whenNotMatchedInsertAll()
        .execute()
    )
else:
    df.write.format("delta").saveAsTable(gold_table)


StatementMeta(, e05a1651-d6cf-4390-96f5-8e38a75ba200, 10, Finished, Available, Finished)