In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, avg, min, max, first, last, lit, row_number
from pyspark.sql.window import Window
import datetime

# Initialize Spark
spark = SparkSession.builder.appName("CryptoDailySummary").getOrCreate()

# Secret-based config
storage_account_name = dbutils.secrets.get(scope="cryptoSecret", key="azure-storage-account-name")
storage_account_key = dbutils.secrets.get(scope="cryptoSecret", key="azure-storage-account-key")
container_name = "crypto-data"

spark.conf.set(f"fs.azure.account.key.{storage_account_name}.blob.core.windows.net", storage_account_key)

# Paths
base_path = f"wasbs://{container_name}@{storage_account_name}.blob.core.windows.net"
processed_path = f"{base_path}/processed"
daily_summary_path = f"{base_path}/output/daily_summary"

# Today's date
today_str = datetime.datetime.utcnow().strftime("%Y-%m-%d")
today_parquet = f"{processed_path}/{today_str}.parquet"

# Read processed file
df_today = spark.read.parquet(today_parquet)

# Melt from wide to long format (bitcoin, ethereum, dogecoin → coin, price)
coins = ["bitcoin", "ethereum", "dogecoin"]
df_melted = None
for coin in coins:
    df_coin = df_today.select(
        col("time").alias("timestamp"),
        col("date"),
        lit(coin).alias("coin"),
        col(coin).alias("price")
    )
    df_melted = df_coin if df_melted is None else df_melted.union(df_coin)

# Filter out null prices
df_clean = df_melted.filter(col("price").isNotNull())

# Window for open/close
w = Window.partitionBy("coin").orderBy("timestamp")

# Add open/close price
df_with_open_close = (
    df_clean.withColumn("open_price", first("price").over(w))
            .withColumn("close_price", last("price").over(w))
)

# Aggregate daily summary
df_daily = (
    df_with_open_close.groupBy("coin", "date")
        .agg(
            avg("price").alias("avg_price"),
            first("open_price").alias("opening_price"),
            last("close_price").alias("closing_price"),
            min("price").alias("min_price"),
            max("price").alias("max_price")
        )
)

# Add s.no and reorder columns
serial_window = Window.partitionBy("date").orderBy("coin")
df_final = (
    df_daily.withColumn("s_no", row_number().over(serial_window))
            .select("s_no", "date", "coin", "opening_price", "closing_price", "avg_price", "min_price", "max_price")
)

# Show result
#df_final.show(truncate=False)

# Save Daily summary 
df_final.write.mode("overwrite").parquet(f"{daily_summary_path}/{today_str}_summary.parquet")
