### Fact Table for Reciepts

In [0]:
from pyspark.sql.functions import *
from delta.tables import DeltaTable

In [0]:
load_type = 'full'
if load_type == 'incremental':
  ingest_date = spark.sql('select max(ingestion_date) as max_date from inventory_project.silver.erp_receipts_silver').collect()[0]['max_date']
  condition = col("ingestion_date") >= lit(ingest_date)
  rec_source = spark.read.table('inventory_project.silver.erp_receipts_silver')\
      .filter(condition)
else:
  rec_source = spark.read.table('inventory_project.silver.erp_receipts_silver')
# loading dim tables
prod_source = spark.read.table('inventory_project.gold.erp_product_dim')
po_source = spark.read.table('inventory_project.gold.erp_purchase_order_fact')

In [0]:
display(rec_source)

In [0]:
fact_df = rec_source.alias('rec')\
    .join(po_source.alias('po'), col('rec.po_id') == col('po.po_id'), 'left')\
    .join(prod_source.alias('prod'), col('rec.product_id') == col('prod.product_id'), 'left')\
    .withColumn('fact_ingest_date', current_timestamp())\
    .select(
        col('rec.receipt_id'),
        coalesce(col("po.po_id"), lit('NA')).alias("po_id"),
        coalesce(col("prod.product_key"), lit('NA')).alias("product_key"),
        col('rec.quantity_received'),
        col('rec.recieved_date'),
        col('rec.warehouse_location'),
        col('fact_ingest_date')
    )
# load data to fact_table
if load_type == 'incremental':
  target_table = DeltaTable.forName(spark, 'inventory_project.gold.erp_receipt_fact')
  target_table.alias('tgt')\
    .merge(
      fact_df.alias('src'),
      'tgt.receipt_id = src.receipt_id and tgt.po_id = src.po_id and tgt.product_key = src.product_key'
    )\
    .whenMatchedUpdateAll()\
    .whenNotMatchedInsertAll()
else:
  fact_df.write.mode('overwrite').saveAsTable('inventory_project.gold.erp_receipt_fact')

In [0]:
display(fact_df)