In [2]:
pip install faker

Note: you may need to restart the kernel to use updated packages.


In [7]:
import os
import pandas as pd
import numpy as np
from faker import Faker

# Initializing Faker
fake = Faker()

# Setting random seed for reproducibility
np.random.seed(42)

# ---------------------- Generating Inventory Logs Data ---------------------- #
num_products = 100
inventory_logs = pd.DataFrame({
    "Product_ID": [f"P{str(i).zfill(3)}" for i in range(1, num_products + 1)],
    "Product_Name": [fake.word().capitalize() for _ in range(num_products)],
    "Category": np.random.choice(["Electronics", "Furniture", "Clothing", "Food", "Pharmaceuticals"], num_products),
    "Stock_Level": np.random.randint(10, 500, num_products),
    "Reorder_Point": np.random.randint(5, 100, num_products),
    "Inventory_Turnover": np.round(np.random.uniform(1, 10, num_products), 2),
    # Adding Date column (random dates for 2023)
    "Date": [fake.date_this_year() for _ in range(num_products)],
    # Adding Cost_Per_Unit column (random prices between 10 to 100)
    "Cost_Per_Unit": np.round(np.random.uniform(10, 100, num_products), 2)
})

# ---------------------- Generating Supplier Data ---------------------- #
num_suppliers = 20
supplier_data = pd.DataFrame({
    "Supplier_ID": [f"S{str(i).zfill(3)}" for i in range(1, num_suppliers + 1)],
    "Supplier_Name": [fake.company() for _ in range(num_suppliers)],
    "Lead_Time_Days": np.random.randint(3, 30, num_suppliers),
    "Delivery_Accuracy": np.round(np.random.uniform(85, 100, num_suppliers), 2),
    "Supplier_Rating": np.round(np.random.uniform(2.5, 5, num_suppliers), 2)
})

# ---------------------- Generating Shipping & Logistics Data ---------------------- #
num_shipments = 200
shipping_data = pd.DataFrame({
    "Shipment_ID": [f"SH{str(i).zfill(4)}" for i in range(1, num_shipments + 1)],
    "Origin": [fake.city() for _ in range(num_shipments)],
    "Destination": [fake.city() for _ in range(num_shipments)],
    "Shipping_Cost": np.round(np.random.uniform(50, 500, num_shipments), 2),
    "Delivery_Time_Days": np.random.randint(1, 15, num_shipments),
    "Delay_Flag": np.random.choice(["Yes", "No"], num_shipments, p=[0.2, 0.8])
})

# ---------------------- Generating Warehouse Data ---------------------- #
num_warehouses = 10
warehouse_data = pd.DataFrame({
    "Warehouse_ID": [f"W{str(i).zfill(2)}" for i in range(1, num_warehouses + 1)],
    "Warehouse_Location": [fake.city() for _ in range(num_warehouses)],
    "Latitude": np.round(np.random.uniform(-90, 90, num_warehouses), 6),
    "Longitude": np.round(np.random.uniform(-180, 180, num_warehouses), 6),
    "Storage_Capacity": np.random.randint(1000, 10000, num_warehouses),
    "Current_Utilization": np.random.randint(500, 9500, num_warehouses)
})

# ---------------------- Saving Data to CSV Files ---------------------- #
# Define directory
save_dir = "data"

# Creating directory if it does not exist
os.makedirs(save_dir, exist_ok=True)

# Saving datasets as CSV files
inventory_logs.to_csv(f"{save_dir}/inventory_logs.csv", index=False)
supplier_data.to_csv(f"{save_dir}/supplier_data.csv", index=False)
shipping_data.to_csv(f"{save_dir}/shipping_data.csv", index=False)
warehouse_data.to_csv(f"{save_dir}/warehouse_data.csv", index=False)

print("✅ Files saved successfully in the 'data' directory.")


✅ Files saved successfully in the 'data' directory.
