In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import from_unixtime, col, date_format
from datetime import datetime

# ── 1. Initialize Spark session ────────────────────────────────────────────────
spark = SparkSession.builder.appName("CryptoProcessorToday").getOrCreate()

# ── 2. Secret-based credentials ────────────────────────────────────────────────
storage_account_name = dbutils.secrets.get(scope="cryptoSecret", key="azure-storage-account-name")
storage_account_key  = dbutils.secrets.get(scope="cryptoSecret", key="azure-storage-account-key")
container_name       = "crypto-data"

# Configure Spark for Azure Blob Storage
spark.conf.set(f"fs.azure.account.key.{storage_account_name}.blob.core.windows.net", storage_account_key)

# ── 3. Define today's date and paths ────────────────────────────────────────────
today_str = datetime.today().strftime("%Y-%m-%d")  # e.g., '2025-06-07'

raw_base_path       = f"wasbs://{container_name}@{storage_account_name}.blob.core.windows.net/raw"
processed_base_path = f"wasbs://{container_name}@{storage_account_name}.blob.core.windows.net/processed"

json_files_path = f"{raw_base_path}/{today_str}/*.json"
output_path     = f"{processed_base_path}/{today_str}.parquet"

print(f"🔄 Processing data for: {today_str}")
print(f"📂 Reading from: {json_files_path}")

# ── 4. Read, transform, and write today's data ─────────────────────────────────
try:
    df = spark.read.json(json_files_path)

    if df.count() == 0:
        print(f"⚠️ No data found for today at: {json_files_path}")
    else:
        df_inr = (
            df.select(
                from_unixtime(col("timestamp")).alias("timestamp_readable"),
                col("prices.bitcoin.inr").alias("bitcoin"),
                col("prices.ethereum.inr").alias("ethereum"),
                col("prices.dogecoin.inr").alias("dogecoin")
            )
            .withColumn("date", date_format(col("timestamp_readable"), "yyyy-MM-dd"))
            .withColumn("time", date_format(col("timestamp_readable"), "HH:mm:ss"))
            .select("date", "time", "bitcoin", "ethereum", "dogecoin")
            .orderBy(col("time").asc())  # sort by time
        )

        df_inr.write.mode("overwrite").parquet(output_path)
        print(f"✅ Saved sorted data to: {output_path}")

except Exception as e:
    print(f"❌ Failed to process {json_files_path}: {e}")


🔄 Processing data for: 2025-06-07
📂 Reading from: wasbs://crypto-data@[REDACTED].blob.core.windows.net/raw/2025-06-07/*.json
✅ Saved sorted data to: wasbs://crypto-data@[REDACTED].blob.core.windows.net/processed/2025-06-07.parquet
