In [1]:
from pyspark.sql import functions as F
from pyspark.sql.window import Window
from delta.tables import DeltaTable

# Load Silver table
df = spark.table("silver_stock_daily")

# Define window specs
w10 = Window.partitionBy("ticker").orderBy("date").rowsBetween(-9, 0)
w20 = Window.partitionBy("ticker").orderBy("date").rowsBetween(-19, 0)
w50 = Window.partitionBy("ticker").orderBy("date").rowsBetween(-49, 0)
w200 = Window.partitionBy("ticker").orderBy("date").rowsBetween(-199, 0)

w_ema = Window.partitionBy("ticker").orderBy("date")

# Simple Moving Averages
df = df.withColumn("sma_10", F.avg("close").over(w10))
df = df.withColumn("sma_20", F.avg("close").over(w20))
df = df.withColumn("sma_50", F.avg("close").over(w50))
df = df.withColumn("sma_200", F.avg("close").over(w200))

# Exponential Moving Averages
df = df.withColumn("ema_12", F.avg("close").over(w_ema))  # placeholder
df = df.withColumn("ema_26", F.avg("close").over(w_ema))  # placeholder

# MACD (using placeholder EMA logic)
df = df.withColumn("macd", F.col("ema_12") - F.col("ema_26"))
df = df.withColumn("macd_signal", F.avg("macd").over(w9 := Window.partitionBy("ticker").orderBy("date").rowsBetween(-8, 0)))
df = df.withColumn("macd_hist", F.col("macd") - F.col("macd_signal"))

# RSI 14
w14 = Window.partitionBy("ticker").orderBy("date").rowsBetween(-14, -1)
df = df.withColumn("change", F.col("close") - F.lag("close").over(Window.partitionBy("ticker").orderBy("date")))
df = df.withColumn("gain", F.when(F.col("change") > 0, F.col("change")).otherwise(0))
df = df.withColumn("loss", F.when(F.col("change") < 0, -F.col("change")).otherwise(0))
df = df.withColumn("avg_gain", F.avg("gain").over(w14))
df = df.withColumn("avg_loss", F.avg("loss").over(w14))
df = df.withColumn("rs", F.col("avg_gain") / F.col("avg_loss"))
df = df.withColumn("rsi_14", 100 - (100 / (1 + F.col("rs"))))

# Bollinger Bands (20, 2)
df = df.withColumn("bb_mid", F.avg("close").over(w20))
df = df.withColumn("bb_std", F.stddev("close").over(w20))
df = df.withColumn("bb_upper", F.col("bb_mid") + 2 * F.col("bb_std"))
df = df.withColumn("bb_lower", F.col("bb_mid") - 2 * F.col("bb_std"))

# Returns
df = df.withColumn("return_daily", F.col("close") / F.lag("close").over(Window.partitionBy("ticker").orderBy("date")) - 1)
df = df.withColumn("return_log", F.log(F.col("close") / F.lag("close").over(Window.partitionBy("ticker").orderBy("date"))))

# Rolling volatility
df = df.withColumn("volatility_7", F.stddev("return_daily").over(Window.partitionBy("ticker").orderBy("date").rowsBetween(-6, 0)))
df = df.withColumn("volatility_14", F.stddev("return_daily").over(Window.partitionBy("ticker").orderBy("date").rowsBetween(-13, 0)))
df = df.withColumn("volatility_30", F.stddev("return_daily").over(Window.partitionBy("ticker").orderBy("date").rowsBetween(-29, 0)))

# Rolling high/low
df = df.withColumn("rolling_high_20", F.max("high").over(w20))
df = df.withColumn("rolling_low_20", F.min("low").over(w20))

# Add ingestion timestamp
df = df.withColumn("ingestion_time", F.current_timestamp())

# Final column order
final_cols = [
    "ticker", "date", "open", "high", "low", "close", "volume",
    "sma_10", "sma_20", "sma_50", "sma_200",
    "ema_12", "ema_26",
    "macd", "macd_signal", "macd_hist",
    "rsi_14",
    "bb_mid", "bb_upper", "bb_lower",
    "return_daily", "return_log",
    "volatility_7", "volatility_14", "volatility_30",
    "rolling_high_20", "rolling_low_20",
    "ingestion_time"
]

df = df.select(*final_cols)

# Save or merge into Gold table
gold_table = "gold_stock_daily_indicators"

if spark.catalog.tableExists(gold_table):
    delta_table = DeltaTable.forName(spark, gold_table)
    (
        delta_table.alias("target")
        .merge(
            df.alias("source"),
            "target.ticker = source.ticker AND target.date = source.date"
        )
        .whenMatchedUpdateAll()
        .whenNotMatchedInsertAll()
        .execute()
    )
else:
    df.write.format("delta").saveAsTable(gold_table)

StatementMeta(, a444cfbe-aa8a-4a0b-a498-48fa5d2f237b, 3, Finished, Available, Finished)