### **Setting Parameters**

In [0]:
# Catalog 
catalog = 'flightsproj'

# CDC column
cdc_col = 'modified_date'

# Source Schema
src_schema = 'silver'

# Source Object
src_object = 'silver_bookings'

# Back-Dated refresh
backdated_refresh = ''

# Source fact Table
fact_table = f"{catalog}.{src_schema}.{src_object}"

# Target Schema
tgt_schema = 'gold'

# Target Object
tgt_object = 'FactBookings'

# Fact Key Cols
fact_key_cols = ["DimPassengers_SK", "DimFlights_SK", "DimAirports_SK", "booking_date"]

In [0]:
dimensions = [
  {
    'table' : f"{catalog}.{tgt_schema}.dim_passengers",
    'alias' : "DimPassengers",
    'join_keys' : [("passenger_id", "passenger_id")] #(fact_col, dim_col)
  },
  {
    'table' : f"{catalog}.{tgt_schema}.dim_flights",
    'alias' : "DimFlights",
    'join_keys' : [("flight_id", "flight_id")] #(fact_col, dim_col)
  },
  {
    'table' : f"{catalog}.{tgt_schema}.dim_airports",
    'alias' : "DimAirports",
    'join_keys' : [("airport_id", "airport_id")] #(fact_col, dim_col)
  }
]

# Columns we need to keep in the fact table
fact_table_col = ["amount", "booking_date"]

#### **Last Load Date**

In [0]:
# If no backdated refresh, then we just need to get the latest records from the source table
if len(backdated_refresh) == 0:

  # If table Exists in the destination
  if spark.catalog.tableExists(f"{catalog}.{tgt_schema}.{tgt_object}"):
    last_load = spark.sql(f"select max({cdc_col}) from {catalog}.{tgt_schema}.{tgt_object}").collect()[0][0]

  else:
    last_load = '1900-01-01 00:00:00'

# if backdated refresh, then we need to get all records from the source table
else:
  last_load = backdated_refresh

# Test the Last Load
last_load
 

### _Dynamic Fact Query_

In [0]:
def generate_fact_query_incremental(fact_table, dimensions, fact_table_col, last_load):
    fact_alias = "f"

    # Base SELECT
    select_cols = [f"{fact_alias}.{col}" for col in fact_table_col]

    # Always bring CDC column
    select_cols.append(f"{fact_alias}.{cdc_col}")

    join_clauses = []

    for dim in dimensions:
        table_full = dim["table"]
        alias = dim["alias"]

        # Real surrogate key column name
        surrogate_key = f"{alias}.{alias}_SK"
        select_cols.append(surrogate_key)

        on_conditions = [
            f"{fact_alias}.{fk} = {alias}.{dk}"
            for fk, dk in dim["join_keys"]
        ]

        join_clauses.append(f"""
        LEFT JOIN {table_full} {alias}
            ON {" AND ".join(on_conditions)}
        """)

    select_stmt = ",\n        ".join(select_cols)
    joins = "\n".join(join_clauses)
    where_stmt = f"{fact_alias}.{cdc_col} >= TIMESTAMP('{last_load}')"

    return f"""
    SELECT
        {select_stmt}
    FROM {fact_table} {fact_alias}
    {joins}
    WHERE {where_stmt}
    """.strip()


In [0]:
query = generate_fact_query_incremental(fact_table, dimensions, fact_table_col, last_load)
print(query)

### Df_Fact

In [0]:
df_fact = spark.sql(query)

In [0]:
df_fact.display()

## _UPSERT_

In [0]:
from delta.tables import DeltaTable
from pyspark.sql.functions import current_timestamp

tgt_table = f"{catalog}.{tgt_schema}.{tgt_object}"

# df_union is the dataframe created from your dynamic fact query
df_union = spark.sql(query)

# If Gold fact already exists → MERGE
if spark.catalog.tableExists(tgt_table):

    dlt_object = DeltaTable.forName(spark, tgt_table)

    (
        dlt_object.alias("tgt")
        .merge(
            df_union.alias("src"),
            """
            tgt.DimPassengers_SK = src.DimPassengers_SK AND
            tgt.DimFlights_SK = src.DimFlights_SK AND
            tgt.DimAirports_SK = src.DimAirports_SK AND
            tgt.booking_date = src.booking_date
            """
        )
        .whenMatchedUpdateAll(condition=f"src.{cdc_col} >= tgt.{cdc_col}")
        .whenNotMatchedInsertAll()
        .execute()
    )

# First run → create table
else:
    (
        df_union
        .withColumn("modified_date", current_timestamp())
        .write
        .format("delta")
        .mode("append")
        .saveAsTable(tgt_table)
    )


In [0]:
%sql
use catalog `flightsproj`; select * from `gold`.`factbookings`;