In [0]:
# Read gold data
gold_input_path = "/mnt/realtimedeai/gold/streaming_sales_enriched/"

sales_gold_df = spark.read.format("delta").load(gold_input_path)




In [0]:
# Optimized output location
partitioned_output_path = "/mnt/realtimedeai/gold/streaming_sales_enriched_partitioned/"

# Overwrite with partitioning
sales_gold_df.write.format("delta") \
    .mode("overwrite") \
    .partitionBy("store_id") \
    .save(partitioned_output_path)

print("Partitioned Gold table written successfully!")


In [0]:
# Register the optimized Delta table as a SQL Table
spark.sql("""
DROP TABLE IF EXISTS streaming_sales_optimized;
""")

spark.sql(f"""
CREATE TABLE streaming_sales_optimized
USING DELTA
LOCATION '{partitioned_output_path}'
""")

print("Table 'streaming_sales_optimized' registered successfully.")


In [0]:
spark.sql("""
OPTIMIZE streaming_sales_optimized
ZORDER BY (transaction_time)
""")


In [0]:

spark.sql("""
DROP VIEW IF EXISTS vw_sales_transaction_summary;
""")

# Create a view
spark.sql("""
CREATE OR REPLACE VIEW vw_sales_transaction_summary AS
SELECT
    transaction_id,
    product_id,
    product_name,
    category,
    base_price,
    quantity_sold,
    sale_amount,
    transaction_time,
    store_id,
    store_name,
    region,
    city,
    size
FROM streaming_sales_optimized
""")

print("SQL View 'vw_sales_transaction_summary' created successfully.")
