In [0]:
from pyspark.sql import SparkSession
spark=SparkSession.builder\
      .appName("Inventory-supply")\
      .getOrCreate()
spark

In [0]:
from pyspark.sql.functions import *

df=spark.read.option("header",True).option("inferSchema",True) \
    .csv("file:/Workspace/Shared/inventory_supply.csv")
df.printSchema()
df.show()

df=df.withColumn("NeedsReorder", col("StockQty") < col("ReorderLevel"))

df.createOrReplaceTempView("needs_reorder")

spark.sql("SELECT * FROM needs_reorder WHERE NeedsReorder = true").show()
spark.sql("""
SELECT Warehouse, COUNT(*) AS ItemsUnderReorder
FROM needs_reorder
WHERE NeedsReorder = true
GROUP BY Warehouse
HAVING COUNT(*) > 2
""").show()

root
 |-- ItemID: string (nullable = true)
 |-- ItemName: string (nullable = true)
 |-- Category: string (nullable = true)
 |-- Warehouse: string (nullable = true)
 |-- StockQty: integer (nullable = true)
 |-- ReorderLevel: integer (nullable = true)
 |-- LastRestocked: date (nullable = true)
 |-- UnitPrice: integer (nullable = true)
 |-- Supplier: string (nullable = true)

+------+------------+-----------+----------+--------+------------+-------------+---------+---------+
|ItemID|    ItemName|   Category| Warehouse|StockQty|ReorderLevel|LastRestocked|UnitPrice| Supplier|
+------+------------+-----------+----------+--------+------------+-------------+---------+---------+
|  I001|      LED TV|Electronics|WarehouseA|      50|          20|   2024-03-15|    30000|   AVTech|
|  I002|      Laptop|Electronics|WarehouseB|      10|          15|   2024-04-01|    70000|TechWorld|
|  I003|Office Chair|  Furniture|WarehouseA|      40|          10|   2024-03-25|     6000|  ChairCo|
|  I004|Refrigerat

In [0]:
supplier_avg = df.groupBy("Supplier").agg(avg("UnitPrice").alias("AvgPriceBySupplier"))
s=df.join(supplier_avg, "Supplier")
market_avg = df.groupBy("Category").agg(avg("UnitPrice").alias("AvgPriceByCategory"))
s= s.join(market_avg, "Category")
print("Avg price by supplier:")
s.show()

print("Below market average:")
s=s.withColumn("BelowMarket", col("UnitPrice") < col("AvgPriceByCategory"))
s.show()

score = s.groupBy("Supplier").agg(
    (sum(col("BelowMarket").cast("int")) / count("*")).alias("BelowPct")
)
good_deals = score.withColumn("GoodDeal", col("BelowPct") > 0.5)
print("Good deals:")
good_deals.show()


Avg price by supplier:
+-----------+---------+------+------------+----------+--------+------------+-------------+---------+------------+------------------+------------------+
|   Category| Supplier|ItemID|    ItemName| Warehouse|StockQty|ReorderLevel|LastRestocked|UnitPrice|NeedsReorder|AvgPriceBySupplier|AvgPriceByCategory|
+-----------+---------+------+------------+----------+--------+------------+-------------+---------+------------+------------------+------------------+
|Electronics|   AVTech|  I001|      LED TV|WarehouseA|      50|          20|   2024-03-15|    30000|       false|           30000.0|           36000.0|
|Electronics|TechWorld|  I002|      Laptop|WarehouseB|      10|          15|   2024-04-01|    70000|        true|           70000.0|           36000.0|
|Electronics|PrintFast|  I005|     Printer|WarehouseB|       3|           5|   2024-03-30|     8000|        true|            8000.0|           36000.0|
| Appliances| FreezeIt|  I004|Refrigerator|WarehouseC|       5|  

In [0]:
print("Total stock value by warehouse:")
df = df.withColumn("TotalStockValue", col("StockQty") * col("UnitPrice"))
df.groupBy("Warehouse").agg(sum("TotalStockValue").alias("TotalStockValue")).show()

print("Top 3 warehouses by stock value:")
df.orderBy(col("TotalStockValue").desc()).limit(3).show()

df.write.mode("overwrite").parquet("file:/Workspace/Shared/stock_by_warehouse", partitionBy="Warehouse")

Total stock value by warehouse:
+----------+---------------+
| Warehouse|TotalStockValue|
+----------+---------------+
|WarehouseA|        1740000|
|WarehouseC|         125000|
|WarehouseB|         724000|
+----------+---------------+

Top 3 warehouses by stock value:
+------+------------+-----------+----------+--------+------------+-------------+---------+---------+------------+---------------+
|ItemID|    ItemName|   Category| Warehouse|StockQty|ReorderLevel|LastRestocked|UnitPrice| Supplier|NeedsReorder|TotalStockValue|
+------+------------+-----------+----------+--------+------------+-------------+---------+---------+------------+---------------+
|  I001|      LED TV|Electronics|WarehouseA|      50|          20|   2024-03-15|    30000|   AVTech|       false|        1500000|
|  I002|      Laptop|Electronics|WarehouseB|      10|          15|   2024-04-01|    70000|TechWorld|        true|         700000|
|  I003|Office Chair|  Furniture|WarehouseA|      40|          10|   2024-03-25| 

In [0]:
stock_counts=df.groupBy("Warehouse").count().withColumnRenamed("count", "ItemCount")
print("Stock counts:")
stock_counts.show()

avg_stock=df.groupBy("Warehouse","Category").agg(avg("StockQty").alias("AvgStock"))
print("Average stock:")
avg_stock.show()

underutilized=stock_counts.join(df.groupBy("Warehouse").agg(expr("sum(StockQty)").alias("TotalStock")), "Warehouse") \
    .filter(col("TotalStock") < 100)
print("Underutilized warehouses:")
underutilized.show()

Stock counts:
+----------+---------+
| Warehouse|ItemCount|
+----------+---------+
|WarehouseA|        2|
|WarehouseC|        1|
|WarehouseB|        2|
+----------+---------+

Average stock:
+----------+-----------+--------+
| Warehouse|   Category|AvgStock|
+----------+-----------+--------+
|WarehouseB|Electronics|     6.5|
|WarehouseA|  Furniture|    40.0|
|WarehouseC| Appliances|     5.0|
|WarehouseA|Electronics|    50.0|
+----------+-----------+--------+

Underutilized warehouses:
+----------+---------+----------+
| Warehouse|ItemCount|TotalStock|
+----------+---------+----------+
|WarehouseA|        2|        90|
|WarehouseC|        1|         5|
|WarehouseB|        2|        13|
+----------+---------+----------+



In [0]:
from delta.tables import DeltaTable

df.write.format("delta").mode("overwrite").save("file:/Workspace/Shared/retail_inventory")
delta = DeltaTable.forPath(spark, "file:/Workspace/Shared/retail_inventory")

delta.update(condition="ItemName='Laptop'", set={"StockQty": "20"})

delta.delete("StockQty = 0")

spark.sql("DESCRIBE HISTORY delta.`file:/Workspace/Shared/retail_inventory`").show()
spark.read.format("delta").option("versionAsOf", 0).load("file:/Workspace/Shared/retail_inventory").show()

+-------+--------------------+----------------+--------------------+---------+--------------------+----+------------------+--------------------+-----------+-----------------+-------------+--------------------+------------+--------------------+
|version|           timestamp|          userId|            userName|operation| operationParameters| job|          notebook|           clusterId|readVersion|   isolationLevel|isBlindAppend|    operationMetrics|userMetadata|          engineInfo|
+-------+--------------------+----------------+--------------------+---------+--------------------+----+------------------+--------------------+-----------+-----------------+-------------+--------------------+------------+--------------------+
|     19|2025-06-19 06:20:...|4833629471493945|azuser3545_mml.lo...| OPTIMIZE|{predicate -> [],...|NULL|{1093877947262588}|0611-043339-3vb7b9iv|         17|SnapshotIsolation|        false|{numRemovedFiles ...|        NULL|Databricks-Runtim...|
|     18|2025-06-19 06:2

In [0]:
from delta.tables import DeltaTable

logs = spark.read.option("header", True).csv("file:/Workspace/Shared/restock_logs.csv") \
    .withColumnRenamed("QuantityAdded ", "QuantityAdded") \
    .withColumn("RestockDate", to_date("RestockDate", "yyyy-MM-dd"))

df = spark.read.format("delta").load("file:/Workspace/Shared/retail_inventory")

if 'RestockedRecently' not in df.columns:
    df = df.withColumn("RestockedRecently", col("StockQty") * 0 == 1)  
    df.write.format("delta").mode("overwrite").option("overwriteSchema", "true").save("file:/Workspace/Shared/retail_inventory")

delta = DeltaTable.forPath(spark, "file:/Workspace/Shared/retail_inventory")

updated = df.alias("i").join(logs.alias("r"), "ItemID", "left") \
    .withColumn("NewStockQty", col("StockQty") + col("QuantityAdded")) \
    .withColumn("RestockedRecently", col("QuantityAdded").isNotNull())

delta.alias("t").merge(
    updated.select("ItemID", "NewStockQty", "RestockedRecently").alias("s"),
    "t.ItemID = s.ItemID"
).whenMatchedUpdate(set={
    "StockQty": "s.NewStockQty",
    "RestockedRecently": "s.RestockedRecently"
}).execute()

In [0]:

spark.sql("""
CREATE OR REPLACE TEMP VIEW inventory_summary AS
SELECT ItemName, Category, StockQty, NeedsReorder, StockQty*UnitPrice AS TotalStockValue
FROM delta.`file:/Workspace/Shared/retail_inventory`
""")

spark.sql("""
CREATE OR REPLACE TEMP VIEW supplier_leaderboard AS
SELECT Supplier, AVG(UnitPrice) AS AvgPrice
FROM delta.`file:/Workspace/Shared/retail_inventory`
GROUP BY Supplier
ORDER BY AvgPrice
""")


DataFrame[]

In [0]:
df=df.select(
    "ItemName", "Category", "StockQty", "ReorderLevel", "TotalStockValue","LastRestocked"
).withColumn(
    "NeedsReorder", col("StockQty") < col("ReorderLevel")
)
df.createOrReplaceTempView("inventory_summary")

df = spark.table("inventory_summary").withColumn("StockStatus",
    when(col("StockQty") > 2 * col("ReorderLevel"), "Overstocked")
    .when(col("StockQty") < col("ReorderLevel"), "LowStock")
    .otherwise("OK")
)

df.filter(col("StockQty") < col("ReorderLevel")).show()
df.where("StockQty < ReorderLevel").show()

+--------+--------+--------+------------+---------------+-------------+------------+-----------+
|ItemName|Category|StockQty|ReorderLevel|TotalStockValue|LastRestocked|NeedsReorder|StockStatus|
+--------+--------+--------+------------+---------------+-------------+------------+-----------+
+--------+--------+--------+------------+---------------+-------------+------------+-----------+

+--------+--------+--------+------------+---------------+-------------+------------+-----------+
|ItemName|Category|StockQty|ReorderLevel|TotalStockValue|LastRestocked|NeedsReorder|StockStatus|
+--------+--------+--------+------------+---------------+-------------+------------+-----------+
+--------+--------+--------+------------+---------------+-------------+------------+-----------+



In [0]:
df = spark.table("inventory_summary")
df = df.withColumn("RestockMonth", month("LastRestocked")) \
       .withColumn("StockAge", datediff(current_date(), col("LastRestocked"))) \
       .withColumn("StockAgeBucket",
           when(col("StockAge") < 30, "New")
           .when(col("StockAge") < 90, "Moderate")
           .otherwise("Stale"))
df.select("ItemName", "RestockMonth", "StockAge", "StockAgeBucket").show()

+------------+------------+--------+--------------+
|    ItemName|RestockMonth|StockAge|StockAgeBucket|
+------------+------------+--------+--------------+
|      Laptop|           4|     444|         Stale|
|      LED TV|           3|     461|         Stale|
|Office Chair|           3|     451|         Stale|
|Refrigerator|           2|     485|         Stale|
|     Printer|           3|     446|         Stale|
+------------+------------+--------+--------------+



In [0]:
df.write.mode("overwrite").option("header",True) \
   .csv("file:/Workspace/Shared/export/inventory/all_items_csv")
df.write.mode("overwrite").json("file:/Workspace/Shared/export/inventory/all_items_json")
df.write.mode("overwrite").format("delta") \
   .save("file:/Workspace/Shared/export/inventory/all_items_delta")