In [0]:
# 01_bronze_ingest: Ingest daily prices into Bronze Delta table

from datetime import datetime
from pyspark.sql import SparkSession

# -----------------------------
# CONFIG
# -----------------------------
# Same S3 bucket as generator
S3_BUCKET = "s3://price-inflation-tracker/bronze/prices"

# Delta Bronze table path
BRONZE_PATH = "s3://price-inflation-tracker/bronze_table"

# Get today's date (same as generator)
today = datetime.today().strftime("%Y-%m-%d")
csv_folder_path = f"{S3_BUCKET}/date={today}/prices_csv_output"

# -----------------------------
# READ TODAY'S CSV
# -----------------------------
spark_df = spark.read.option("header", True).csv(csv_folder_path)

print(f"✅ Read {spark_df.count()} rows from {csv_folder_path}")
display(spark_df)

# -----------------------------
# WRITE TO BRONZE DELTA TABLE
# -----------------------------
# Append mode to keep previous days
spark_df.write.format("delta") \
    .mode("append") \
    .partitionBy("date") \
    .save(BRONZE_PATH)

print(f"✅ Bronze table updated at: {BRONZE_PATH}")

# -----------------------------
# OPTIONAL: CREATE DELTA TABLE IN METASTORE
# -----------------------------
spark.sql(f"""
    CREATE TABLE IF NOT EXISTS bronze_prices
    USING DELTA
    LOCATION '{BRONZE_PATH}'
""")

print("✅ Delta table 'bronze_prices' is ready for downstream ETL")
