In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import when, col

spark = SparkSession.builder.appName("Inventory_Management").getOrCreate()

# Load Product
products_df = spark.read.option("header", True).csv("file:/Workspace/Shared/products.csv", inferSchema=True)

# Load Warehouse Stock Report
stock_df = spark.read.option("header", True).csv("file:/Workspace/Shared/warehouse_stock_report.csv", inferSchema=True)



In [0]:
products_df.show()
stock_df.display()

+----------+------------+-----------+-----------------+-------------+-----+
|product_id|        name|   category|quantity_in_stock|reorder_level|price|
+----------+------------+-----------+-----------------+-------------+-----+
|         1|    Notebook| Stationery|              100|           20| 45.5|
|         2|         Pen| Stationery|              300|           50| 10.0|
|         3|       Mouse|Electronics|               50|           15|350.0|
|         4|    Keyboard|Electronics|               30|           10|550.0|
|         5|   USB Drive|Electronics|               70|           25|250.0|
|         6|Office Chair|  Furniture|               60|           10|120.0|
|         7|       Table|  Furniture|               20|            5|750.0|
|         8| File Folder| Stationery|               80|           20| 15.0|
|         9|     Charger|Electronics|               25|           10|500.0|
|        10|Sticky Notes| Stationery|              200|           50|  5.0|
+----------+

warehouse_id,location,product_id,name,net_stock,status
1,Chennai,1,Notebook,70,OK
1,Chennai,3,Mouse,96,OK
1,Chennai,4,Keyboard,-79,Understocked
1,Chennai,6,Office Chair,52,OK
1,Chennai,8,File Folder,48,OK
1,Chennai,10,Sticky Notes,27,Understocked
2,Bangalore,1,Notebook,97,OK
2,Bangalore,2,Pen,-133,Understocked
2,Bangalore,3,Mouse,102,OK
2,Bangalore,4,Keyboard,-82,Understocked


In [0]:
# Join product and stock data on product_id
joined_df = stock_df.join(products_df, on=['product_id', 'name'], how='left')




In [0]:
from pyspark.sql.functions import when, col, abs, count, max
from pyspark.sql.window import Window

# Add reorder_flag and stock_gap columns
final_df = joined_df.withColumn(
    "reorder_flag",
    when(col("net_stock") < col("reorder_level"), "REORDER").otherwise("OK")
).withColumn(
    "stock_gap",
    col("net_stock") - col("reorder_level")
).withColumn(
    "severity",
    when(col("stock_gap") >= 0, "None")
    .when(abs(col("stock_gap")) < 10, "Low")
    .when(abs(col("stock_gap")) < 30, "Medium")
    .otherwise("High")
)
final_df.display()

product_id,name,warehouse_id,location,net_stock,status,category,quantity_in_stock,reorder_level,price,reorder_flag,stock_gap,severity
1,Notebook,1,Chennai,70,OK,Stationery,100,20,45.5,OK,50,
3,Mouse,1,Chennai,96,OK,Electronics,50,15,350.0,OK,81,
4,Keyboard,1,Chennai,-79,Understocked,Electronics,30,10,550.0,REORDER,-89,High
6,Office Chair,1,Chennai,52,OK,Furniture,60,10,120.0,OK,42,
8,File Folder,1,Chennai,48,OK,Stationery,80,20,15.0,OK,28,
10,Sticky Notes,1,Chennai,27,Understocked,Stationery,200,50,5.0,REORDER,-23,Medium
1,Notebook,2,Bangalore,97,OK,Stationery,100,20,45.5,OK,77,
2,Pen,2,Bangalore,-133,Understocked,Stationery,300,50,10.0,REORDER,-183,High
3,Mouse,2,Bangalore,102,OK,Electronics,50,15,350.0,OK,87,
4,Keyboard,2,Bangalore,-82,Understocked,Electronics,30,10,550.0,REORDER,-92,High


In [0]:
# Load stock movements data
movements_df = spark.read.option("multiline", True).json("file:/Workspace/Shared/stock_movements.json")

# Get last movement date 
movement_stats = movements_df.groupBy("product_id").agg(
    max("movement_date").alias("last_movement_date")
)

# Join with final_df
final_inventory_df = final_df.join(movement_stats, on="product_id", how="left")
final_inventory_df.display()

product_id,name,warehouse_id,location,net_stock,status,category,quantity_in_stock,reorder_level,price,reorder_flag,stock_gap,severity,last_movement_date
1,Notebook,1,Chennai,70,OK,Stationery,100,20,45.5,OK,50,,2025-06-12T00:00:00
3,Mouse,1,Chennai,96,OK,Electronics,50,15,350.0,OK,81,,2025-06-11T00:00:00
4,Keyboard,1,Chennai,-79,Understocked,Electronics,30,10,550.0,REORDER,-89,High,2025-06-07T00:00:00
6,Office Chair,1,Chennai,52,OK,Furniture,60,10,120.0,OK,42,,2025-06-16T00:00:00
8,File Folder,1,Chennai,48,OK,Stationery,80,20,15.0,OK,28,,2025-06-15T00:00:00
10,Sticky Notes,1,Chennai,27,Understocked,Stationery,200,50,5.0,REORDER,-23,Medium,2025-06-16T00:00:00
1,Notebook,2,Bangalore,97,OK,Stationery,100,20,45.5,OK,77,,2025-06-12T00:00:00
2,Pen,2,Bangalore,-133,Understocked,Stationery,300,50,10.0,REORDER,-183,High,2025-06-13T00:00:00
3,Mouse,2,Bangalore,102,OK,Electronics,50,15,350.0,OK,87,,2025-06-11T00:00:00
4,Keyboard,2,Bangalore,-82,Understocked,Electronics,30,10,550.0,REORDER,-92,High,2025-06-07T00:00:00


In [0]:
# Save as Delta and CSV
final_inventory_df.write.format("delta").mode("overwrite").save("/mnt/final_inventory_dashboard_delta")
final_inventory_df.write.option("header", True).mode("overwrite").csv("/mnt/final_inventory_dashboard_csv")
