In [0]:
from pyspark.sql.types import *
from pyspark.sql.functions import *

inventory_schema = StructType([
    StructField("ItemID", StringType()),
    StructField("ItemName", StringType()),
    StructField("Category", StringType()),
    StructField("Warehouse", StringType()),
    StructField("StockQty", IntegerType()),
    StructField("ReorderLevel", IntegerType()),
    StructField("LastRestocked", DateType()),
    StructField("UnitPrice", IntegerType()),
    StructField("Supplier", StringType())
])

inventory_df = spark.read.option("header", True).schema(inventory_schema).csv("file:/Workspace/Shared/inventory_supply.csv")


Scenario 1: Inventory Alerting System

In [0]:
inventory_df = inventory_df.withColumn("NeedsReorder", col("StockQty") < col("ReorderLevel"))

inventory_df.filter("NeedsReorder").createOrReplaceTempView("items_needing_restock")
warehouse_alerts = inventory_df.filter("NeedsReorder") \
    .groupBy("Warehouse").count().filter("count > 2")


Scenario 2: Supplier Price Optimization

In [0]:

category_avg = inventory_df.groupBy("Category").agg(avg("UnitPrice").alias("AvgCategoryPrice"))
joined_df = inventory_df.join(category_avg, on="Category")
joined_df = joined_df.withColumn("BelowMarket", col("UnitPrice") < col("AvgCategoryPrice"))
supplier_deal = joined_df.groupBy("Supplier").agg(
    (sum(when(col("BelowMarket"), 1).otherwise(0)) / count("*")).alias("BelowMarketRatio")
).withColumn("GoodDeal", col("BelowMarketRatio") > 0.5)


Scenario 3: Cost Forecasting

In [0]:
inventory_df = inventory_df.withColumn("TotalStockValue", col("StockQty") * col("UnitPrice"))

top3_value_items = inventory_df.orderBy(col("TotalStockValue").desc()).limit(3)
inventory_df.write.partitionBy("Warehouse").mode("overwrite").parquet("file:/Workspace/Shared/value_partitioned")


Scenario 4: Warehouse Utilization

In [0]:

items_per_warehouse = inventory_df.groupBy("Warehouse").count()


avg_stock = inventory_df.groupBy("Warehouse", "Category").agg(avg("StockQty").alias("AvgStock"))


total_stock = inventory_df.groupBy("Warehouse").agg(sum("StockQty").alias("TotalStock"))
underutilized = total_stock.filter("TotalStock < 100")


Scenario 5: Delta Audit Trail



In [0]:

delta_path = "file:/Workspace/Shared/retail_inventory"
inventory_df.write.format("delta").mode("overwrite").save(delta_path)

from delta.tables import DeltaTable
delta_table = DeltaTable.forPath(spark, delta_path)


delta_table.update("ItemName = 'Laptop'", {"StockQty": "20"})
delta_table.delete("StockQty = 0")
delta_table.history().show()
version_0 = spark.read.format("delta").option("versionAsOf", 0).load(delta_path)


+-------+--------------------+----------------+--------------------+---------+--------------------+----+------------------+--------------------+-----------+-----------------+-------------+--------------------+------------+--------------------+
|version|           timestamp|          userId|            userName|operation| operationParameters| job|          notebook|           clusterId|readVersion|   isolationLevel|isBlindAppend|    operationMetrics|userMetadata|          engineInfo|
+-------+--------------------+----------------+--------------------+---------+--------------------+----+------------------+--------------------+-----------+-----------------+-------------+--------------------+------------+--------------------+
|      3|2025-06-19 05:30:...|7868838587549447|azuser3557_mml.lo...| OPTIMIZE|{predicate -> [],...|NULL|{1222209826929774}|0619-042535-5t46f450|          1|SnapshotIsolation|        false|{numRemovedFiles ...|        NULL|Databricks-Runtim...|
|      2|2025-06-19 05:3

Scenario 6: Alerts from Restock Logs

In [0]:

restock_log = spark.read.option("header", True).option("inferSchema", True).csv("file:/Workspace/Shared/restock_logs.csv")


restock_joined = inventory_df.join(restock_log, on="ItemID", how="left") \
    .withColumn("NewStockQty", col("StockQty") + coalesce(col("QuantityAdded"), lit(0))) \
    .withColumn("RestockedRecently", col("QuantityAdded").isNotNull())


updates_df = restock_joined.select("ItemID", "NewStockQty", "RestockedRecently")

delta_table.alias("target").merge(
    updates_df.alias("updates"),
    "target.ItemID = updates.ItemID"
).whenMatchedUpdate(set={
    "StockQty": "updates.NewStockQty"
}).execute()


Scenario 7: Report Generation with SQL Views

In [0]:
inventory_df.createOrReplaceTempView("inventory")

spark.sql("""
CREATE OR REPLACE TEMP VIEW inventory_summary AS
SELECT ItemName, Category, StockQty, NeedsReorder, (StockQty * UnitPrice) AS TotalStockValue
FROM inventory
""")

spark.sql("""
CREATE OR REPLACE TEMP VIEW supplier_leaderboard AS
SELECT Supplier, AVG(UnitPrice) AS AvgPrice
FROM inventory
GROUP BY Supplier
ORDER BY AvgPrice ASC
""")


DataFrame[]

Scenario 8: Advanced Filtering

In [0]:

inventory_df = inventory_df.withColumn("StockStatus", when(col("StockQty") > 2 * col("ReorderLevel"), "Overstocked")
                                        .when(col("StockQty") < col("ReorderLevel"), "LowStock")
                                        .otherwise("Normal"))


low_stock_filter = inventory_df.filter("StockStatus = 'LowStock'")


overstock_filter = inventory_df.where(col("StockStatus") == "Overstocked")


Scenario 9: Feature Engineering


In [0]:

inventory_df = inventory_df.withColumn("RestockMonth", month("LastRestocked"))
inventory_df = inventory_df.withColumn("StockAge", datediff(current_date(), col("LastRestocked")))
inventory_df = inventory_df.withColumn("StockAgeBucket", when(col("StockAge") < 30, "New")
                                       .when(col("StockAge") < 90, "Moderate")
                                       .otherwise("Stale"))


Scenario 10: Export Options

In [0]:

inventory_df.write.mode("overwrite").option("header", True).csv("file:/Workspace/Shared/analyst_csv")
inventory_df.write.mode("overwrite").json("file:/Workspace/Shared/integration_json")
inventory_df.write.format("delta").mode("overwrite").save("file:/Workspace/Shared/pipeline_delta")

inventory_df.filter("StockAgeBucket = 'Stale'") \
    .write.mode("overwrite").json("file:/Workspace/Shared/")
