In [0]:
dbutils.widgets.text(name="env",defaultValue="",label="Enter the environment in lower case")
env = dbutils.widgets.get("env")

from pyspark.sql.functions import to_date, first, max, min, avg, last, sum, year, month, dayofmonth, window
from delta.tables import DeltaTable


path = f"abfss://stock-project-container@bgardzinski1stock.dfs.core.windows.net/{env}"
checkpoint_path = path + f"/checkpoints/daily_price_aggregates"
target_table = f"{env}_silver.daily_price_aggregates"


# Read streaming data from silver table
df_stream = spark.readStream.table(f"hive_metastore.{env}_silver.prices") \
    .withWatermark("extract_time", "5 day")


#df_stream = spark.table(f"hive_metastore.{env}_silver.prices")

# Aggregate daily OHLCV per symbol
daily_agg = (
    df_stream
    .withColumn("extract_date", to_date("extract_time"))
    .groupBy("symbol", "extract_date")
    .agg(
        max("open").alias("open"),
        max("day_high").alias("day_high"),
        min("day_low").alias("day_low"),
        avg("current_price").alias("avg_price"),
        max("volume").alias("volume"),
    )
    .withColumnRenamed("extract_date", "date")
)

daily_agg = daily_agg \
    .withColumn("year", year("date")) \
    .withColumn("month", month("date")) \
    .withColumn("day", dayofmonth("date"))

#daily_agg.show(10)

def upsert_to_delta(batch_df, batch_id):
    delta_table = DeltaTable.forName(spark, target_table)
    delta_table.alias("target").merge(
        batch_df.alias("source"),
        "target.symbol = source.symbol AND target.date = source.date"
    ).whenMatchedUpdateAll() \
     .whenNotMatchedInsertAll() \
     .execute()


query = (daily_agg.writeStream
    .foreachBatch(upsert_to_delta)
    .option("checkpointLocation", checkpoint_path)
    .outputMode("update")
    .trigger(availableNow=True)
    .start())
query.awaitTermination()

In [0]:
spark.readStream.table(f"hive_metastore.{env}_silver.prices").isStreaming

In [0]:
spark.sql(f"SELECT * FROM hive_metastore.{env}_silver.prices").show()
