## Supplier Performance Table

In [0]:
from pyspark.sql import functions as F
from delta.tables import DeltaTable

In [0]:
# load required tables
silver_purchase_order = spark.read.table('inventory_project.silver.erp_purchase_order_silver')
silver_products = spark.read.table('inventory_project.silver.erp_products_silver')
silver_reciepts = spark.read.table('inventory_project.silver.erp_receipts_silver')
silver_supplier = spark.read.table('inventory_project.silver.erp_supplier_silver')

combined_df = silver_purchase_order.alias('po')\
    .join(silver_reciepts.alias('rec'),F.col('po.po_id') == F.col('rec.po_id'),'left')\
    .join(silver_supplier.alias('sup'),F.col('po.supplier_id') == F.col('sup.supplier_id'),'left')\
    .join(silver_products.alias('prod'),F.col('po.product_id') == F.col('prod.product_id'),'left')\
    .withColumn('ontimeflag', F.when(F.col('rec.recieved_date') <= F.col('po.expected_date'),1).otherwise(0))\
    .withColumn('quantity_varaince',F.col('rec.quantity_received') - F.col('po.qty_ordered'))\
    .select('sup.supplier_id','sup.supplier_name','po.po_id','po.product_id','ontimeflag','quantity_varaince','rec.recieved_date','po.order_date')

combined_df.show()

In [0]:
agg_df = combined_df.groupBy('supplier_id','supplier_name').agg(
    F.sum("ontimeflag").alias('No_of_OntimeOrders'),
    F.countDistinct('po_id').alias('TotalOrders'),
    F.avg(F.datediff('rec.recieved_date','po.order_date')).alias('Avg_Days_to_Delivery'),
    F.avg('quantity_varaince').alias('Avg_Quantity_Variance')
    )
agg_df.show()
if spark.catalog.tableExists('inventory_project.gold.supplier_performance'):
    targetTable = DeltaTable.forName(spark, 'inventory_project.gold.supplier_performance')
    targetTable.alias('tgt').merge(
        agg_df.alias('src'),
        'tgt.supplier_id = src.supplier_id'
    ).whenMatchedUpdateAll().whenNotMatchedInsertAll().execute()
else:
    agg_df.write.saveAsTable('inventory_project.gold.supplier_performance')