In [None]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, current_timestamp
from pyspark.sql.types import IntegerType, DoubleType

spark = SparkSession.builder \
    .appName("ETL Sales Fact") \
    .config("spark.jars.packages", "org.postgresql:postgresql:42.6.0") \
    .getOrCreate()

jdbc_url = "jdbc:postgresql://localhost:5432/retail_dw"
conn_props = {"user": "your_user", "password": "your_password", "driver": "org.postgresql.Driver"}

# Read sales staging data
sales_stage = spark.read.jdbc(jdbc_url, "wh_stage.stg_sales_fact", properties=conn_props)

# Cast for consistency
sales_stage = sales_stage.withColumn("quantity", col("quantity").cast(IntegerType())) \
                         .withColumn("unit_price", col("unit_price").cast(DoubleType()))

# In production, join with core.dim_customer and core.dim_product to get surrogate keys
sales_stage = sales_stage.withColumn("etl_insert_ts", current_timestamp()) \
                         .withColumn("etl_update_ts", current_timestamp())

sales_stage.write.jdbc(jdbc_url, "core.fact_sales", mode="append", properties=conn_props)

spark.stop()
