In [None]:
# Datasets to Generate
# Inventory_Transactions.csv : Tracks inventory movement (inbound, outbound, adjustments) over time.
# Supplier_Performance.csv : Monitors reliability, lead times, and defect rates of suppliers.
# Purchase_Orders.csv : Contains data about procurement transactions (quantities, costs, suppliers).
# IoT_Stock_Updates.csv : Real-time stock level updates from IoT sensors in warehouses.
# Sales_History.csv : Historical sales data for demand forecasting.
# Weather_Data.csv: Weather conditions by location for demand-impact analysis.

In [1]:
import pandas as pd
import numpy as np
import random
from faker import Faker

faker = Faker("en_GB")

# Generate data
data = []
for _ in range(1000):
    data.append({
        "Transaction_ID": faker.uuid4(),
        "Product_ID": random.randint(1001, 1050),
        "Warehouse_Location": faker.city(),
        "Transaction_Type": random.choice(["Inbound", "Outbound", "Adjustment"]),
        "Quantity": random.randint(-500, 500),
        "Transaction_Date": faker.date_this_year(),
        # Controlled notes in English
        "Notes": random.choice([
            "Routine stock adjustment",
            "Damaged goods removed",
            "Shipment received",
            "Outbound delivery processed",
            None, None  # Allow for some missing values
        ])
    })

df_inventory = pd.DataFrame(data)
df_inventory.to_csv("Inventory_Transactions.csv", index=False)
print("Inventory_Transactions.csv generated with English notes!")


Inventory_Transactions.csv generated with English notes!


In [2]:
# Generate data
suppliers = ["AlphaSupply", "BetaLogistics", "GammaDeliveries", "DeltaWarehousing"]
data = []
for supplier in suppliers:
    for month in range(1, 13):
        data.append({
            "Supplier_ID": suppliers.index(supplier) + 1,
            "Supplier_Name": supplier,
            "Month": month,
            "On_Time_Percentage": random.uniform(85, 100),  # Slightly messy percentages
            "Defect_Rate_Percentage": random.uniform(0, 5),
            "Average_Lead_Time_Days": random.randint(1, 10)
        })

df_suppliers = pd.DataFrame(data)
df_suppliers.to_csv("Supplier_Performance.csv", index=False)
print("Supplier_Performance.csv generated!")


Supplier_Performance.csv generated!


In [3]:
# Generate data
data = []
for _ in range(500):
    data.append({
        "PO_ID": faker.uuid4(),
        "Supplier_ID": random.randint(1, 4),
        "Product_ID": random.randint(1001, 1050),
        "Order_Date": faker.date_this_year(),
        "Delivery_Date": faker.date_this_year(),
        "Order_Quantity": random.randint(50, 1000),
        "Received_Quantity": random.randint(40, 1000),  # Simulate errors in delivery quantities
        "Cost_Per_Unit": round(random.uniform(5, 100), 2),
        "Status": random.choice(["Completed", "Pending", "Canceled"])
    })

df_purchase_orders = pd.DataFrame(data)
df_purchase_orders.to_csv("Purchase_Orders.csv", index=False)
print("Purchase_Orders.csv generated!")


Purchase_Orders.csv generated!


In [4]:
# Generate data
data = []
for _ in range(2000):
    data.append({
        "Sensor_ID": faker.uuid4(),
        "Warehouse_Location": faker.city(),
        "Product_ID": random.randint(1001, 1050),
        "Stock_Level": random.randint(0, 5000),
        "Update_Timestamp": faker.date_time_this_year(),
    })

df_iot_updates = pd.DataFrame(data)
df_iot_updates.to_csv("IoT_Stock_Updates.csv", index=False)
print("IoT_Stock_Updates.csv generated!")


IoT_Stock_Updates.csv generated!


In [5]:
# Generate data
data = []
for _ in range(2000):
    data.append({
        "Transaction_ID": faker.uuid4(),
        "Product_ID": random.randint(1001, 1090),
        "Store_Location": faker.city(),
        "Quantity_Sold": random.randint(1, 50),
        "Sale_Date": faker.date_this_year(),
        "Revenue": round(random.uniform(10, 500), 2)
    })

df_sales = pd.DataFrame(data)
df_sales.to_csv("Sales_History.csv", index=False)
print("Sales_History.csv generated!")


Sales_History.csv generated!


In [6]:
# Generate data
data = []
cities = ["London", "Cardiff", "Manchester", "Birmingham", "Edinburgh", "Glasgow"]
for _ in range(365):  # 1 year of daily data
    for city in cities:
        data.append({
            "City": city,
            "Date": faker.date_this_year(),
            "Temperature_C": round(random.uniform(-5, 30), 1),
            "Rainfall_mm": round(random.uniform(0, 20), 1),
            "Wind_Speed_kph": round(random.uniform(0, 50), 1),
        })

df_weather = pd.DataFrame(data)
df_weather.to_csv("Weather_Data.csv", index=False)
print("Weather_Data.csv generated!")


Weather_Data.csv generated!
