In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import *
spark = SparkSession.builder.appName("Retail Inventory & Supply Chain Intelligence").getOrCreate()
spark

In [0]:
df_infra = spark.read.csv("dbfs:/FileStore/shared_uploads/azuser3559_mml.local@techademy.com/inventory_supply.csv", header=True, inferSchema=True)
df_infra.show()

+------+------------+-----------+----------+--------+------------+-------------+---------+---------+
|ItemID|    ItemName|   Category| Warehouse|StockQty|ReorderLevel|LastRestocked|UnitPrice|  Supplie|
+------+------------+-----------+----------+--------+------------+-------------+---------+---------+
|  I001|      LED TV|Electronics|WarehouseA|      50|          20|   2024-03-15|    30000|   AVTech|
|  I002|      Laptop|Electronics|WarehouseB|      10|          15|   2024-04-01|    70000|TechWorld|
|  I003|Office Chair|  Furniture|WarehouseA|      40|          10|   2024-03-25|     6000|  ChairCo|
|  I004|Refrigerator| Appliances|WarehouseC|       5|          10|   2024-02-20|    25000| FreezeIt|
|  I005|     Printer|Electronics|WarehouseB|       3|           5|   2024-03-30|     8000|PrintFast|
+------+------------+-----------+----------+--------+------------+-------------+---------+---------+



In [0]:
df_NR = df_infra.withColumn("NeedsReorder", col("StockQty") < col("ReorderLevel"))
df_NR.show()

+------+------------+-----------+----------+--------+------------+-------------+---------+---------+------------+
|ItemID|    ItemName|   Category| Warehouse|StockQty|ReorderLevel|LastRestocked|UnitPrice|  Supplie|NeedsReorder|
+------+------------+-----------+----------+--------+------------+-------------+---------+---------+------------+
|  I001|      LED TV|Electronics|WarehouseA|      50|          20|   2024-03-15|    30000|   AVTech|       false|
|  I002|      Laptop|Electronics|WarehouseB|      10|          15|   2024-04-01|    70000|TechWorld|        true|
|  I003|Office Chair|  Furniture|WarehouseA|      40|          10|   2024-03-25|     6000|  ChairCo|       false|
|  I004|Refrigerator| Appliances|WarehouseC|       5|          10|   2024-02-20|    25000| FreezeIt|        true|
|  I005|     Printer|Electronics|WarehouseB|       3|           5|   2024-03-30|     8000|PrintFast|        true|
+------+------------+-----------+----------+--------+------------+-------------+--------

In [0]:
df_view = df_NR.createOrReplaceTempView("items_to_restock")
spark.sql("SELECT * FROM items_to_restock WHERE NeedsReorder = true;").show()

+------+------------+-----------+----------+--------+------------+-------------+---------+---------+------------+
|ItemID|    ItemName|   Category| Warehouse|StockQty|ReorderLevel|LastRestocked|UnitPrice|  Supplie|NeedsReorder|
+------+------------+-----------+----------+--------+------------+-------------+---------+---------+------------+
|  I002|      Laptop|Electronics|WarehouseB|      10|          15|   2024-04-01|    70000|TechWorld|        true|
|  I004|Refrigerator| Appliances|WarehouseC|       5|          10|   2024-02-20|    25000| FreezeIt|        true|
|  I005|     Printer|Electronics|WarehouseB|       3|           5|   2024-03-30|     8000|PrintFast|        true|
+------+------------+-----------+----------+--------+------------+-------------+---------+---------+------------+



In [0]:
df_NR.filter(col("NeedsReorder") == True).groupBy("Warehouse").count().filter(col("count") >= 2).show()

+----------+-----+
| Warehouse|count|
+----------+-----+
|WarehouseB|    2|
+----------+-----+



In [0]:
df_CA = df_NR.groupBy("Category").agg(avg("UnitPrice").alias("AvgCatPrice"))
df_CA.show()

+-----------+-----------+
|   Category|AvgCatPrice|
+-----------+-----------+
|Electronics|    36000.0|
|  Furniture|     6000.0|
| Appliances|    25000.0|
+-----------+-----------+



In [0]:
df_join = df_NR.join(df_CA,"Category")
df_BA = df_join.withColumn("IsBelowAvg", col("UnitPrice") < col("AvgCatPrice"))
df_BA.show()

+-----------+------+------------+----------+--------+------------+-------------+---------+---------+------------+-----------+----------+
|   Category|ItemID|    ItemName| Warehouse|StockQty|ReorderLevel|LastRestocked|UnitPrice|  Supplie|NeedsReorder|AvgCatPrice|IsBelowAvg|
+-----------+------+------------+----------+--------+------------+-------------+---------+---------+------------+-----------+----------+
|Electronics|  I001|      LED TV|WarehouseA|      50|          20|   2024-03-15|    30000|   AVTech|       false|    36000.0|      true|
|Electronics|  I002|      Laptop|WarehouseB|      10|          15|   2024-04-01|    70000|TechWorld|        true|    36000.0|     false|
|  Furniture|  I003|Office Chair|WarehouseA|      40|          10|   2024-03-25|     6000|  ChairCo|       false|     6000.0|     false|
| Appliances|  I004|Refrigerator|WarehouseC|       5|          10|   2024-02-20|    25000| FreezeIt|        true|    25000.0|     false|
|Electronics|  I005|     Printer|Warehous

In [0]:
df_SS = df_BA.groupBy("Supplie").agg((sum(col("IsBelowAvg").cast("int")) / count("*")).alias("BelowAvgRatio")
).withColumn("SupplierTag", when(col("BelowAvgRatio") > 0.5, "Good Deal").otherwise("Fair Deal"))

df_SS.show()


+---------+-------------+-----------+
|  Supplie|BelowAvgRatio|SupplierTag|
+---------+-------------+-----------+
|   AVTech|          1.0|  Good Deal|
|TechWorld|          0.0|  Fair Deal|
|PrintFast|          1.0|  Good Deal|
| FreezeIt|          0.0|  Fair Deal|
|  ChairCo|          0.0|  Fair Deal|
+---------+-------------+-----------+



In [0]:
df_TSV = df_BA.withColumn("TotalStockValue", col("StockQty") * col("UnitPrice"))
df_TSV.show()

+-----------+------+------------+----------+--------+------------+-------------+---------+---------+------------+-----------+----------+---------------+
|   Category|ItemID|    ItemName| Warehouse|StockQty|ReorderLevel|LastRestocked|UnitPrice|  Supplie|NeedsReorder|AvgCatPrice|IsBelowAvg|TotalStockValue|
+-----------+------+------------+----------+--------+------------+-------------+---------+---------+------------+-----------+----------+---------------+
|Electronics|  I001|      LED TV|WarehouseA|      50|          20|   2024-03-15|    30000|   AVTech|       false|    36000.0|      true|        1500000|
|Electronics|  I002|      Laptop|WarehouseB|      10|          15|   2024-04-01|    70000|TechWorld|        true|    36000.0|     false|         700000|
|  Furniture|  I003|Office Chair|WarehouseA|      40|          10|   2024-03-25|     6000|  ChairCo|       false|     6000.0|     false|         240000|
| Appliances|  I004|Refrigerator|WarehouseC|       5|          10|   2024-02-20|  

In [0]:
df_T3 = df_TSV.orderBy(col("TotalStockValue").desc()).limit(3)
df_T3.show()

+-----------+------+------------+----------+--------+------------+-------------+---------+---------+------------+-----------+----------+---------------+
|   Category|ItemID|    ItemName| Warehouse|StockQty|ReorderLevel|LastRestocked|UnitPrice|  Supplie|NeedsReorder|AvgCatPrice|IsBelowAvg|TotalStockValue|
+-----------+------+------------+----------+--------+------------+-------------+---------+---------+------------+-----------+----------+---------------+
|Electronics|  I001|      LED TV|WarehouseA|      50|          20|   2024-03-15|    30000|   AVTech|       false|    36000.0|      true|        1500000|
|Electronics|  I002|      Laptop|WarehouseB|      10|          15|   2024-04-01|    70000|TechWorld|        true|    36000.0|     false|         700000|
|  Furniture|  I003|Office Chair|WarehouseA|      40|          10|   2024-03-25|     6000|  ChairCo|       false|     6000.0|     false|         240000|
+-----------+------+------------+----------+--------+------------+-------------+--

In [0]:
df_TSV.write.partitionBy("Warehouse").mode("overwrite").parquet("/export/inventory/value_by_warehouse/")

In [0]:
df_TSV.groupBy("Warehouse").count()
df_TSV.show()

+-----------+------+------------+----------+--------+------------+-------------+---------+---------+------------+-----------+----------+---------------+
|   Category|ItemID|    ItemName| Warehouse|StockQty|ReorderLevel|LastRestocked|UnitPrice|  Supplie|NeedsReorder|AvgCatPrice|IsBelowAvg|TotalStockValue|
+-----------+------+------------+----------+--------+------------+-------------+---------+---------+------------+-----------+----------+---------------+
|Electronics|  I001|      LED TV|WarehouseA|      50|          20|   2024-03-15|    30000|   AVTech|       false|    36000.0|      true|        1500000|
|Electronics|  I002|      Laptop|WarehouseB|      10|          15|   2024-04-01|    70000|TechWorld|        true|    36000.0|     false|         700000|
|  Furniture|  I003|Office Chair|WarehouseA|      40|          10|   2024-03-25|     6000|  ChairCo|       false|     6000.0|     false|         240000|
| Appliances|  I004|Refrigerator|WarehouseC|       5|          10|   2024-02-20|  

In [0]:
df_AS = df_TSV.groupBy("Warehouse", "Category").agg(avg("StockQty").alias("AvgStock"))
df_AS.show()


+----------+-----------+--------+
| Warehouse|   Category|AvgStock|
+----------+-----------+--------+
|WarehouseB|Electronics|     6.5|
|WarehouseA|  Furniture|    40.0|
|WarehouseC| Appliances|     5.0|
|WarehouseA|Electronics|    50.0|
+----------+-----------+--------+



In [0]:
df_UU = df_TSV.groupBy("Warehouse").agg(sum("StockQty").alias("TotalStock")).filter("TotalStock < 100")
df_UU.show()

+----------+----------+
| Warehouse|TotalStock|
+----------+----------+
|WarehouseA|        90|
|WarehouseC|         5|
|WarehouseB|        13|
+----------+----------+



In [0]:
df_TSV.write.format("delta").mode("overwrite").save("/delta/retail_inventory")
spark.sql("CREATE TABLE IF NOT EXISTS retail_inventory USING DELTA LOCATION '/delta/retail_inventory'")

spark.read.format("delta").load("/delta/retail_inventory").show()

+-----------+------+------------+----------+--------+------------+-------------+---------+---------+------------+-----------+----------+---------------+
|   Category|ItemID|    ItemName| Warehouse|StockQty|ReorderLevel|LastRestocked|UnitPrice|  Supplie|NeedsReorder|AvgCatPrice|IsBelowAvg|TotalStockValue|
+-----------+------+------------+----------+--------+------------+-------------+---------+---------+------------+-----------+----------+---------------+
|Electronics|  I001|      LED TV|WarehouseA|      50|          20|   2024-03-15|    30000|   AVTech|       false|    36000.0|      true|        1500000|
|Electronics|  I002|      Laptop|WarehouseB|      10|          15|   2024-04-01|    70000|TechWorld|        true|    36000.0|     false|         700000|
|  Furniture|  I003|Office Chair|WarehouseA|      40|          10|   2024-03-25|     6000|  ChairCo|       false|     6000.0|     false|         240000|
| Appliances|  I004|Refrigerator|WarehouseC|       5|          10|   2024-02-20|  

In [0]:
from delta.tables import DeltaTable
delta = DeltaTable.forPath(spark, "/delta/retail_inventory")

delta.update("ItemName = 'Laptop'", {"StockQty": "20"})
display(delta.toDF())

Category,ItemID,ItemName,Warehouse,StockQty,ReorderLevel,LastRestocked,UnitPrice,Supplie,NeedsReorder,AvgCatPrice,IsBelowAvg,TotalStockValue
Electronics,I001,LED TV,WarehouseA,50,20,2024-03-15,30000,AVTech,False,36000.0,True,1500000
Furniture,I003,Office Chair,WarehouseA,40,10,2024-03-25,6000,ChairCo,False,6000.0,False,240000
Appliances,I004,Refrigerator,WarehouseC,5,10,2024-02-20,25000,FreezeIt,True,25000.0,False,125000
Electronics,I005,Printer,WarehouseB,3,5,2024-03-30,8000,PrintFast,True,36000.0,True,24000
Electronics,I002,Laptop,WarehouseB,20,15,2024-04-01,70000,TechWorld,True,36000.0,False,700000


In [0]:
delta.delete("StockQty = 0")
display(delta.toDF())

Category,ItemID,ItemName,Warehouse,StockQty,ReorderLevel,LastRestocked,UnitPrice,Supplie,NeedsReorder,AvgCatPrice,IsBelowAvg,TotalStockValue
Electronics,I002,Laptop,WarehouseB,20,15,2024-04-01,70000,TechWorld,True,36000.0,False,700000
Electronics,I001,LED TV,WarehouseA,50,20,2024-03-15,30000,AVTech,False,36000.0,True,1500000
Furniture,I003,Office Chair,WarehouseA,40,10,2024-03-25,6000,ChairCo,False,6000.0,False,240000
Appliances,I004,Refrigerator,WarehouseC,5,10,2024-02-20,25000,FreezeIt,True,25000.0,False,125000
Electronics,I005,Printer,WarehouseB,3,5,2024-03-30,8000,PrintFast,True,36000.0,True,24000


In [0]:
spark.sql("DESCRIBE HISTORY retail_inventory").show()
spark.read.format("delta").option("versionAsOf", 0).load("/delta/retail_inventory").show()

+-------+-------------------+----------------+--------------------+---------+--------------------+----+------------------+--------------------+-----------+-----------------+-------------+--------------------+------------+--------------------+
|version|          timestamp|          userId|            userName|operation| operationParameters| job|          notebook|           clusterId|readVersion|   isolationLevel|isBlindAppend|    operationMetrics|userMetadata|          engineInfo|
+-------+-------------------+----------------+--------------------+---------+--------------------+----+------------------+--------------------+-----------+-----------------+-------------+--------------------+------------+--------------------+
|      9|2025-06-19 09:43:54|6267232536945943|azuser3559_mml.lo...|   DELETE|{predicate -> ["(...|NULL|{4060256036899043}|0612-123310-2108yh11|          8|WriteSerializable|        false|{numRemovedFiles ...|        NULL|Databricks-Runtim...|
|      8|2025-06-19 09:43:49

In [0]:
df1 = spark.read.format("csv").option("header", "true").load("dbfs:/FileStore/shared_uploads/azuser3559_mml.local@techademy.com/restock_logs.csv")
df1.show()

+------+-----------+-------------+
|ItemID|RestockDate|QuantityAdded|
+------+-----------+-------------+
|  I002| 2024-04-20|           10|
|  I005| 2024-04-22|            5|
|  I001| 2024-04-25|           20|
+------+-----------+-------------+



In [0]:
df_join = df_TSV.join(df1, "ItemID", "left")
df_join.show()

+------+-----------+------------+----------+--------+------------+-------------+---------+---------+------------+-----------+----------+---------------+-----------+-------------+
|ItemID|   Category|    ItemName| Warehouse|StockQty|ReorderLevel|LastRestocked|UnitPrice|  Supplie|NeedsReorder|AvgCatPrice|IsBelowAvg|TotalStockValue|RestockDate|QuantityAdded|
+------+-----------+------------+----------+--------+------------+-------------+---------+---------+------------+-----------+----------+---------------+-----------+-------------+
|  I005|Electronics|     Printer|WarehouseB|       3|           5|   2024-03-30|     8000|PrintFast|        true|    36000.0|      true|          24000| 2024-04-22|            5|
|  I003|  Furniture|Office Chair|WarehouseA|      40|          10|   2024-03-25|     6000|  ChairCo|       false|     6000.0|     false|         240000|       NULL|         NULL|
|  I004| Appliances|Refrigerator|WarehouseC|       5|          10|   2024-02-20|    25000| FreezeIt|     

In [0]:
df_update = df_join.withColumn("UpdatedStock", col("StockQty") + coalesce(col("QuantityAdded"), lit(0))).withColumn("RestockedRecently", col("QuantityAdded").isNotNull())
df_update.show()

+------+-----------+------------+----------+--------+------------+-------------+---------+---------+------------+-----------+----------+---------------+-----------+-------------+------------+-----------------+
|ItemID|   Category|    ItemName| Warehouse|StockQty|ReorderLevel|LastRestocked|UnitPrice|  Supplie|NeedsReorder|AvgCatPrice|IsBelowAvg|TotalStockValue|RestockDate|QuantityAdded|UpdatedStock|RestockedRecently|
+------+-----------+------------+----------+--------+------------+-------------+---------+---------+------------+-----------+----------+---------------+-----------+-------------+------------+-----------------+
|  I005|Electronics|     Printer|WarehouseB|       3|           5|   2024-03-30|     8000|PrintFast|        true|    36000.0|      true|          24000| 2024-04-22|            5|         8.0|             true|
|  I003|  Furniture|Office Chair|WarehouseA|      40|          10|   2024-03-25|     6000|  ChairCo|       false|     6000.0|     false|         240000|       N

In [0]:
delta.alias("tgt").merge(df_update.alias("src"),"tgt.ItemID = src.ItemID").whenMatchedUpdate(set={"StockQty": "src.UpdatedStock","LastRestocked": "src.RestockDate"}).execute()

In [0]:
df_view = df_TSV.withColumn("TotalStockValue", col("StockQty") * col("UnitPrice")).withColumn("NeedsReorder", col("StockQty") < col("ReorderLevel"))

df_view.createOrReplaceTempView("inventory_summary")
display(spark.sql("SELECT * FROM inventory_summary"))


Category,ItemID,ItemName,Warehouse,StockQty,ReorderLevel,LastRestocked,UnitPrice,Supplie,NeedsReorder,AvgCatPrice,IsBelowAvg,TotalStockValue
Electronics,I001,LED TV,WarehouseA,50,20,2024-03-15,30000,AVTech,False,36000.0,True,1500000
Electronics,I002,Laptop,WarehouseB,10,15,2024-04-01,70000,TechWorld,True,36000.0,False,700000
Furniture,I003,Office Chair,WarehouseA,40,10,2024-03-25,6000,ChairCo,False,6000.0,False,240000
Appliances,I004,Refrigerator,WarehouseC,5,10,2024-02-20,25000,FreezeIt,True,25000.0,False,125000
Electronics,I005,Printer,WarehouseB,3,5,2024-03-30,8000,PrintFast,True,36000.0,True,24000


In [0]:
spark.sql("""
    CREATE OR REPLACE TEMP VIEW supplier_leaderboard AS
    SELECT Supplie, AVG(UnitPrice) as AvgPrice
    FROM inventory_summary
    GROUP BY Supplie
    ORDER BY AvgPrice ASC
""")
display(spark.sql("SELECT * FROM supplier_leaderboard"))

Supplie,AvgPrice
ChairCo,6000.0
PrintFast,8000.0
FreezeIt,25000.0
AVTech,30000.0
TechWorld,70000.0


In [0]:
df_WO = df_TSV.withColumn("StockCategory", when(col("StockQty") > 2 * col("ReorderLevel"), "Overstocked").when(col("StockQty") < col("ReorderLevel"), "LowStock").otherwise("Optimal"))

df_WO.show()

+-----------+------+------------+----------+--------+------------+-------------+---------+---------+------------+-----------+----------+---------------+-------------+
|   Category|ItemID|    ItemName| Warehouse|StockQty|ReorderLevel|LastRestocked|UnitPrice|  Supplie|NeedsReorder|AvgCatPrice|IsBelowAvg|TotalStockValue|StockCategory|
+-----------+------+------------+----------+--------+------------+-------------+---------+---------+------------+-----------+----------+---------------+-------------+
|Electronics|  I001|      LED TV|WarehouseA|      50|          20|   2024-03-15|    30000|   AVTech|       false|    36000.0|      true|        1500000|  Overstocked|
|Electronics|  I002|      Laptop|WarehouseB|      10|          15|   2024-04-01|    70000|TechWorld|        true|    36000.0|     false|         700000|     LowStock|
|  Furniture|  I003|Office Chair|WarehouseA|      40|          10|   2024-03-25|     6000|  ChairCo|       false|     6000.0|     false|         240000|  Overstocked

In [0]:
df_WO.filter(col("StockCategory") == "LowStock").show()

+-----------+------+------------+----------+--------+------------+-------------+---------+---------+------------+-----------+----------+---------------+-------------+
|   Category|ItemID|    ItemName| Warehouse|StockQty|ReorderLevel|LastRestocked|UnitPrice|  Supplie|NeedsReorder|AvgCatPrice|IsBelowAvg|TotalStockValue|StockCategory|
+-----------+------+------------+----------+--------+------------+-------------+---------+---------+------------+-----------+----------+---------------+-------------+
|Electronics|  I002|      Laptop|WarehouseB|      10|          15|   2024-04-01|    70000|TechWorld|        true|    36000.0|     false|         700000|     LowStock|
| Appliances|  I004|Refrigerator|WarehouseC|       5|          10|   2024-02-20|    25000| FreezeIt|        true|    25000.0|     false|         125000|     LowStock|
|Electronics|  I005|     Printer|WarehouseB|       3|           5|   2024-03-30|     8000|PrintFast|        true|    36000.0|      true|          24000|     LowStock

In [0]:
df_WO.where("StockCategory = 'LowStock'").show()


+-----------+------+------------+----------+--------+------------+-------------+---------+---------+------------+-----------+----------+---------------+-------------+
|   Category|ItemID|    ItemName| Warehouse|StockQty|ReorderLevel|LastRestocked|UnitPrice|  Supplie|NeedsReorder|AvgCatPrice|IsBelowAvg|TotalStockValue|StockCategory|
+-----------+------+------------+----------+--------+------------+-------------+---------+---------+------------+-----------+----------+---------------+-------------+
|Electronics|  I002|      Laptop|WarehouseB|      10|          15|   2024-04-01|    70000|TechWorld|        true|    36000.0|     false|         700000|     LowStock|
| Appliances|  I004|Refrigerator|WarehouseC|       5|          10|   2024-02-20|    25000| FreezeIt|        true|    25000.0|     false|         125000|     LowStock|
|Electronics|  I005|     Printer|WarehouseB|       3|           5|   2024-03-30|     8000|PrintFast|        true|    36000.0|      true|          24000|     LowStock

In [0]:

df_RS = df_TSV.withColumn("RestockMonth", month("LastRestocked"))
df_RS.show()

+-----------+------+------------+----------+--------+------------+-------------+---------+---------+------------+-----------+----------+---------------+------------+
|   Category|ItemID|    ItemName| Warehouse|StockQty|ReorderLevel|LastRestocked|UnitPrice|  Supplie|NeedsReorder|AvgCatPrice|IsBelowAvg|TotalStockValue|RestockMonth|
+-----------+------+------------+----------+--------+------------+-------------+---------+---------+------------+-----------+----------+---------------+------------+
|Electronics|  I001|      LED TV|WarehouseA|      50|          20|   2024-03-15|    30000|   AVTech|       false|    36000.0|      true|        1500000|           3|
|Electronics|  I002|      Laptop|WarehouseB|      10|          15|   2024-04-01|    70000|TechWorld|        true|    36000.0|     false|         700000|           4|
|  Furniture|  I003|Office Chair|WarehouseA|      40|          10|   2024-03-25|     6000|  ChairCo|       false|     6000.0|     false|         240000|           3|
| Ap

In [0]:
df_SA = df_RS.withColumn("StockAge", datediff(current_date(), col("LastRestocked")))
df_SA.show()

+-----------+------+------------+----------+--------+------------+-------------+---------+---------+------------+-----------+----------+---------------+------------+--------+
|   Category|ItemID|    ItemName| Warehouse|StockQty|ReorderLevel|LastRestocked|UnitPrice|  Supplie|NeedsReorder|AvgCatPrice|IsBelowAvg|TotalStockValue|RestockMonth|StockAge|
+-----------+------+------------+----------+--------+------------+-------------+---------+---------+------------+-----------+----------+---------------+------------+--------+
|Electronics|  I001|      LED TV|WarehouseA|      50|          20|   2024-03-15|    30000|   AVTech|       false|    36000.0|      true|        1500000|           3|     461|
|Electronics|  I002|      Laptop|WarehouseB|      10|          15|   2024-04-01|    70000|TechWorld|        true|    36000.0|     false|         700000|           4|     444|
|  Furniture|  I003|Office Chair|WarehouseA|      40|          10|   2024-03-25|     6000|  ChairCo|       false|     6000.0|

In [0]:
df_SAB = df_SA.withColumn("StockAgeBucket", when(col("StockAge") <= 30, "New").when(col("StockAge") <= 90, "Moderate").otherwise("Stale"))
df_SAB.show()

+-----------+------+------------+----------+--------+------------+-------------+---------+---------+------------+-----------+----------+---------------+------------+--------+--------------+
|   Category|ItemID|    ItemName| Warehouse|StockQty|ReorderLevel|LastRestocked|UnitPrice|  Supplie|NeedsReorder|AvgCatPrice|IsBelowAvg|TotalStockValue|RestockMonth|StockAge|StockAgeBucket|
+-----------+------+------------+----------+--------+------------+-------------+---------+---------+------------+-----------+----------+---------------+------------+--------+--------------+
|Electronics|  I001|      LED TV|WarehouseA|      50|          20|   2024-03-15|    30000|   AVTech|       false|    36000.0|      true|        1500000|           3|     461|         Stale|
|Electronics|  I002|      Laptop|WarehouseB|      10|          15|   2024-04-01|    70000|TechWorld|        true|    36000.0|     false|         700000|           4|     444|         Stale|
|  Furniture|  I003|Office Chair|WarehouseA|      

In [0]:
df_SAB.write.mode("overwrite").option("header", True).csv("/export/inventory/csv_output")


In [0]:
df_SAB.write.mode("overwrite").json("/export/inventory/json_output")

In [0]:
df_SAB.write.format("delta").mode("overwrite").save("/export/inventory/delta_output")

In [0]:
df_SAB.filter("StockAgeBucket = 'Stale'").write.mode("overwrite").option("header", True).csv("/export/inventory/stale_items/")