In [0]:
from pyspark.sql.window import Window
from pyspark.sql.functions import col, row_number

silver_order_df=spark.read.format("delta").load('abfss://silver@telcostoragelayer.dfs.core.windows.net/usage/')
# Get latest customer dimension records
dim_customer_df=spark.read.table("telco_catalog.gold.dim_customer")
customer_window = Window.partitionBy("customer_id").orderBy(col("timestamp").desc())
latest_dim_customer = dim_customer_df.withColumn("rn", row_number().over(customer_window)) \
                    .filter(col("rn") == 1) \
                        .drop("rn")


In [0]:
# Join with latest dim_customer
fact_df = silver_order_df.join(
    latest_dim_customer.select("customer_id", "dim_customer_key"),
    on="customer_id",
    how="left"
)
fact_df.display()


In [0]:
fact_order_df = fact_df.select(
    "usage_id",
    "data_used_mb",
    "usage_date",
    "dim_customer_key",
    "call_minutes",
    "sms_count",
    "roaming_minutes"
)
fact_order_df.display()

In [0]:
from delta.tables import DeltaTable

# Set path or table name
fact_table_path = "telco_catalog.gold.fact_usage"

if spark.catalog.tableExists(fact_table_path):
     delta_table=DeltaTable.forPath(spark,'abfss://gold@telcostoragelayer.dfs.core.windows.net/fact_usage')
     delta_table.alias("target") \
        .merge(
            fact_order_df.alias("source"),
            "target.usage_id = source.usage_id"
        ) \
        .whenMatchedUpdateAll() \
        .whenNotMatchedInsertAll() \
        .execute()
else:
    fact_order_df.write.format('delta').mode('overwrite').option('path','abfss://gold@telcostoragelayer.dfs.core.windows.net/fact_usage').saveAsTable('telco_catalog.gold.fact_usage')


In [0]:
display(spark.read.table("telco_catalog.gold.fact_usage"))