In [6]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [20]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, sum as _sum

spark = SparkSession.builder.appName("InventoryManagement").getOrCreate()

# Load stock movements
stock_df = spark.read.option("multiLine", True).json("/content/drive/MyDrive/stock_movements.json")
stock_df.show()



+-------------------+-----------+-------------+----------+--------+---------------+------------------+------------+
|      movement_date|movement_id|movement_type|product_id|quantity|         reason|          supplier|warehouse_id|
+-------------------+-----------+-------------+----------+--------+---------------+------------------+------------+
|2025-06-07T00:00:00|          1|          OUT|         5|      68|      New stock|       GadgetWorld|           4|
|2025-06-16T00:00:00|          2|           IN|        10|      17| Damaged return|     TechGear Ltd.|           1|
|2025-06-12T00:00:00|          3|           IN|         5|      30|    Replacement|OfficeSupplies Co.|           3|
|2025-06-06T00:00:00|          4|           IN|         8|      48|    New product|     DailyUse Inc.|           1|
|2025-06-07T00:00:00|          5|           IN|         2|     100|    New product|     DailyUse Inc.|           4|
|2025-06-15T00:00:00|          6|           IN|         5|     104|     

In [21]:
# Filter only 'IN' and 'OUT' separately
in_stock = stock_df.filter(col("movement_type") == "IN") \
    .groupBy("warehouse_id", "product_id") \
    .agg(_sum("quantity").alias("in_quantity"))

out_stock = stock_df.filter(col("movement_type") == "OUT") \
    .groupBy("warehouse_id", "product_id") \
    .agg(_sum("quantity").alias("out_quantity"))

# Join IN and OUT to compute net stock
stock_summary = in_stock.join(out_stock, ["warehouse_id", "product_id"], "outer") \
    .fillna(0) \
    .withColumn("net_stock", col("in_quantity") - col("out_quantity"))

stock_summary.show()


+------------+----------+-----------+------------+---------+
|warehouse_id|product_id|in_quantity|out_quantity|net_stock|
+------------+----------+-----------+------------+---------+
|           1|         1|         70|           0|       70|
|           1|         3|          0|          96|      -96|
|           1|         4|          0|          79|      -79|
|           1|         6|          0|          52|      -52|
|           1|         8|         48|           0|       48|
|           1|        10|         61|          34|       27|
|           2|         1|          0|         123|     -123|
|           2|         2|          0|         133|     -133|
|           2|         3|        102|           0|      102|
|           2|         4|          0|          82|      -82|
|           2|         5|          0|         106|     -106|
|           2|         9|        140|          99|       41|
|           2|        10|         33|           0|       33|
|           3|         3

In [22]:
# Load products with reorder level
products_df = spark.read.csv("/content/drive/MyDrive/products.csv", header=True, inferSchema=True)

# Load warehouses with capacity
warehouses_df = spark.read.csv("/content/drive/MyDrive/warehouses.csv", header=True, inferSchema=True)


In [23]:
from pyspark.sql.functions import when, col

enriched_df = stock_summary.join(products_df.select("product_id", "name", "reorder_level"), "product_id") \
    .join(warehouses_df.select("warehouse_id", "location", "capacity"), "warehouse_id")

# Flag stock status
enriched_df = enriched_df.withColumn(
    "status",
    when(col("net_stock") < col("reorder_level"), "Understocked")
    .when(col("net_stock") > col("capacity"), "Overstocked")
    .otherwise("OK")
)

enriched_df.select("warehouse_id", "location", "product_id", "name", "net_stock", "status").show()



+------------+---------+----------+------------+---------+------------+
|warehouse_id| location|product_id|        name|net_stock|      status|
+------------+---------+----------+------------+---------+------------+
|           1|  Chennai|         1|    Notebook|       70|          OK|
|           1|  Chennai|         3|       Mouse|      -96|Understocked|
|           1|  Chennai|         4|    Keyboard|      -79|Understocked|
|           1|  Chennai|         6|Office Chair|      -52|Understocked|
|           1|  Chennai|         8| File Folder|       48|          OK|
|           1|  Chennai|        10|Sticky Notes|       27|Understocked|
|           2|Bangalore|         1|    Notebook|     -123|Understocked|
|           2|Bangalore|         2|         Pen|     -133|Understocked|
|           2|Bangalore|         3|       Mouse|      102|          OK|
|           2|Bangalore|         4|    Keyboard|      -82|Understocked|
|           2|Bangalore|         5|   USB Drive|     -106|Unders

In [24]:
# Save report to CSV
enriched_df.select("warehouse_id", "location", "product_id", "name", "net_stock", "status") \
    .write.mode("overwrite").csv("warehouse_stock_report", header=True)


In [25]:
df = spark.read.option("header", True).csv("warehouse_stock_report")
df.show()


+------------+---------+----------+------------+---------+------------+
|warehouse_id| location|product_id|        name|net_stock|      status|
+------------+---------+----------+------------+---------+------------+
|           1|  Chennai|         1|    Notebook|       70|          OK|
|           1|  Chennai|         3|       Mouse|      -96|Understocked|
|           1|  Chennai|         4|    Keyboard|      -79|Understocked|
|           1|  Chennai|         6|Office Chair|      -52|Understocked|
|           1|  Chennai|         8| File Folder|       48|          OK|
|           1|  Chennai|        10|Sticky Notes|       27|Understocked|
|           2|Bangalore|         1|    Notebook|     -123|Understocked|
|           2|Bangalore|         2|         Pen|     -133|Understocked|
|           2|Bangalore|         3|       Mouse|      102|          OK|
|           2|Bangalore|         4|    Keyboard|      -82|Understocked|
|           2|Bangalore|         5|   USB Drive|     -106|Unders

In [26]:
# Merge into single CSV file
!cat warehouse_stock_report/part-*.csv > warehouse_stock_report.csv

# Download from Google Colab
from google.colab import files
files.download("warehouse_stock_report.csv")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>