In [63]:
from pyspark.sql import SparkSession

spark = SparkSession.builder.appName("Warehouse_Level_Analysis").getOrCreate()

spark

In [64]:
from google.colab import drive
drive.mount('/content/mydrive')

Drive already mounted at /content/mydrive; to attempt to forcibly remount, call drive.mount("/content/mydrive", force_remount=True).


In [65]:
from pyspark.sql.functions import *

stockPath = (
    "/content/mydrive/MyDrive/Hexware_Training_DataEngineering/Project/"
    "Inventory_Management_System/Week-03/cleaned_stock_data.csv"
    )
warehousePath = (
    "/content/mydrive/MyDrive/Hexware_Training_DataEngineering/Project/"
    "Inventory_Management_System/Week-03/warehouses.csv"
    )

stockDF = spark.read.csv(stockPath, header=True, inferSchema=True)
warehouseDF = spark.read.csv(warehousePath, header=True, inferSchema=True)


print("Stock Dataframe Schema:\n")
stockDF.printSchema()
print("\nWarehouse Dataframe Schema:\n")
warehouseDF.printSchema()

print("\n Stock Dataframe:\n")
stockDF.show(5)
print("\n Warehouse Dataframe:\n")
warehouseDF.show(5)

Stock Dataframe Schema:

root
 |-- movement_id: integer (nullable = true)
 |-- product_id: integer (nullable = true)
 |-- warehouse_id: integer (nullable = true)
 |-- quantity: integer (nullable = true)
 |-- movement_type: string (nullable = true)
 |-- movement_date: timestamp (nullable = true)
 |-- reference_number: string (nullable = true)
 |-- reason: string (nullable = true)
 |-- net_quantity: integer (nullable = true)


Warehouse Dataframe Schema:

root
 |-- warehouse_id: integer (nullable = true)
 |-- name: string (nullable = true)
 |-- location: string (nullable = true)
 |-- capacity: integer (nullable = true)


 Stock Dataframe:

+-----------+----------+------------+--------+-------------+-------------------+----------------+-----------------+------------+
|movement_id|product_id|warehouse_id|quantity|movement_type|      movement_date|reference_number|           reason|net_quantity|
+-----------+----------+------------+--------+-------------+-------------------+----------------

In [75]:
# Grouping warehouse with current stock
warehouse_stock = stockDF.groupBy("warehouse_id").agg(sum("net_quantity").alias("current_stock"))
warehouse_stock.show()

+------------+-------------+
|warehouse_id|current_stock|
+------------+-------------+
|         205|         4200|
|         202|         3900|
|         203|         8700|
|         204|        40100|
|         201|         7500|
+------------+-------------+



In [67]:
# Joining the two tables using inner join
wh_analysis = warehouse_stock.join(warehouseDF, "warehouse_id","inner")\
                  .select("warehouse_id", "name", "location","current_stock",
                          "capacity")


print("\nCurrent Stock per Warehouse:")
wh_analysis.show()



Current Stock per Warehouse:
+------------+--------------------+---------------+-------------+--------+
|warehouse_id|                name|       location|current_stock|capacity|
+------------+--------------------+---------------+-------------+--------+
|         201|North Distributio...|    Chicago, IL|         7500|   10000|
|         202|      West Coast Hub|Los Angeles, CA|         3900|   15000|
|         203|       East Regional|   New York, NY|         8700|    8000|
|         204|       South Central|     Dallas, TX|        40100|   12000|
|         205|     Midwest Storage|Kansas City, MO|         4200|    9000|
+------------+--------------------+---------------+-------------+--------+



In [68]:
# Usuage percentage calculation
wh_analysis = wh_analysis.withColumn("usuage_percent",
    round((col("current_stock") / col("capacity")) * 100, 2)
)

In [69]:
wh_analysis = wh_analysis.withColumn(
    "status",
    when(col("usuage_percent") > 70, "OVERSTOCKED")
    .when(col("usuage_percent") < 30, "UNDERSTOCKED")
    .otherwise("NORMAL")
)

In [70]:
print("\nWarehouse Stock Status:")
wh_analysis.orderBy("usuage_percent", ascending=False).show()


Warehouse Stock Status:
+------------+--------------------+---------------+-------------+--------+--------------+------------+
|warehouse_id|                name|       location|current_stock|capacity|usuage_percent|      status|
+------------+--------------------+---------------+-------------+--------+--------------+------------+
|         204|       South Central|     Dallas, TX|        40100|   12000|        334.17| OVERSTOCKED|
|         203|       East Regional|   New York, NY|         8700|    8000|        108.75| OVERSTOCKED|
|         201|North Distributio...|    Chicago, IL|         7500|   10000|          75.0| OVERSTOCKED|
|         205|     Midwest Storage|Kansas City, MO|         4200|    9000|         46.67|      NORMAL|
|         202|      West Coast Hub|Los Angeles, CA|         3900|   15000|          26.0|UNDERSTOCKED|
+------------+--------------------+---------------+-------------+--------+--------------+------------+



In [71]:
warehouse_report = wh_analysis.select("warehouse_id","name","location","current_stock",
    "capacity","usuage_percent","status"
    ).orderBy("usuage_percent", ascending=False)


In [72]:
warehouse_report.show()


+------------+--------------------+---------------+-------------+--------+--------------+------------+
|warehouse_id|                name|       location|current_stock|capacity|usuage_percent|      status|
+------------+--------------------+---------------+-------------+--------+--------------+------------+
|         204|       South Central|     Dallas, TX|        40100|   12000|        334.17| OVERSTOCKED|
|         203|       East Regional|   New York, NY|         8700|    8000|        108.75| OVERSTOCKED|
|         201|North Distributio...|    Chicago, IL|         7500|   10000|          75.0| OVERSTOCKED|
|         205|     Midwest Storage|Kansas City, MO|         4200|    9000|         46.67|      NORMAL|
|         202|      West Coast Hub|Los Angeles, CA|         3900|   15000|          26.0|UNDERSTOCKED|
+------------+--------------------+---------------+-------------+--------+--------------+------------+



In [74]:
output_path = (
    "/content/mydrive/MyDrive/Hexware_Training_DataEngineering/Project/"
    "Inventory_Management_System/Week-03/warehouse_stock_status.csv"
)


report = wh_analysis.toPandas()

report.to_csv(output_path, index=False)
print(" Report successfully saved ")

 Report successfully saved 
