<a href="https://colab.research.google.com/github/MithunSrinivas28/Fluxo_AI_Supplychain/blob/main/dataset_generator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Step 1: Product Metadata Definition

In [1]:
import random
import numpy as np
import pandas as pd

random.seed(42)
np.random.seed(42)

# -------------------------
# Product Configuration
# -------------------------

products = [
    # Agriculture
    {"name": "Fertilizer", "category": "agriculture", "tier": "medium"},
    {"name": "Pesticide", "category": "agriculture", "tier": "low"},
    {"name": "Seeds", "category": "agriculture", "tier": "low"},

    # Dairy
    {"name": "Milk", "category": "dairy", "tier": "high"},
    {"name": "Curd", "category": "dairy", "tier": "medium"},
    {"name": "Butter", "category": "dairy", "tier": "medium"},

    # Poultry
    {"name": "Eggs", "category": "poultry", "tier": "high"},
    {"name": "Chicken", "category": "poultry", "tier": "medium"},

    # Grains
    {"name": "Rice", "category": "grains", "tier": "high"},
    {"name": "Wheat Flour", "category": "grains", "tier": "high"},
    {"name": "Corn", "category": "grains", "tier": "medium"},

    # Vegetables
    {"name": "Onion", "category": "vegetables", "tier": "high"},
    {"name": "Tomato", "category": "vegetables", "tier": "medium"},
    {"name": "Potato", "category": "vegetables", "tier": "high"},

    # Fruits
    {"name": "Apple", "category": "fruits", "tier": "medium"},
    {"name": "Banana", "category": "fruits", "tier": "high"},

    # Electronics
    {"name": "Mobile Phone", "category": "electronics", "tier": "low"},
    {"name": "LED Bulb", "category": "electronics", "tier": "high"},
    {"name": "Extension Cord", "category": "electronics", "tier": "low"},

    # Raw Materials
    {"name": "Steel Rod", "category": "raw_materials", "tier": "medium"},
    {"name": "Cement Bag", "category": "raw_materials", "tier": "medium"},
    {"name": "Plastic Granules", "category": "raw_materials", "tier": "low"},

    # Furniture
    {"name": "Office Chair", "category": "furniture", "tier": "low"},
    {"name": "Study Table", "category": "furniture", "tier": "low"},
]

# STEP 2 — Assign Base Demand, Price & Sensitivity

In [2]:
# Demand ranges by tier
demand_ranges = {
    "high": (800, 1200),
    "medium": (300, 700),
    "low": (50, 250)
}

# Price sensitivity by category
price_sensitivity_map = {
    "staple": 0.001,
    "dairy": 0.002,
    "poultry": 0.002,
    "vegetables": 0.002,
    "fruits": 0.002,
    "electronics": 0.005,
    "raw_materials": 0.003,
    "furniture": 0.004,
    "agriculture": 0.003,
    "grains": 0.0015
}

for idx, product in enumerate(products):
    product["product_id"] = idx + 1

    # Assign base demand
    low, high = demand_ranges[product["tier"]]
    product["base_demand"] = random.randint(low, high)

    # Assign base price (simple heuristic)
    product["base_price"] = random.randint(50, 2000)

    # Assign price sensitivity
    product["price_sensitivity"] = price_sensitivity_map.get(
        product["category"], 0.002
    )

products_df = pd.DataFrame(products)
products_df.head()

Unnamed: 0,name,category,tier,product_id,base_demand,base_price,price_sensitivity
0,Fertilizer,agriculture,medium,1,627,278,0.003
1,Pesticide,agriculture,low,2,56,1568,0.003
2,Seeds,agriculture,low,3,120,551,0.003
3,Milk,dairy,high,4,914,335,0.002
4,Curd,dairy,medium,5,677,259,0.002


# STEP 2 — Define Zones & Warehouses (Geographic Layer)

In [3]:
# -------------------------
# Zone Configuration
# -------------------------

zones = {
    "North": 1.05,
    "South": 0.95,
    "East": 1.10,
    "West": 0.90
}

# -------------------------
# Warehouse Configuration
# -------------------------

warehouses = {}

for zone in zones:
    warehouses[zone] = {
        "A": round(random.uniform(0.95, 1.05), 3),
        "B": round(random.uniform(0.95, 1.05), 3),
        "C": round(random.uniform(0.95, 1.05), 3),
    }

print("Zones:")
print(zones)

print("\nWarehouses:")
print(warehouses)

Zones:
{'North': 1.05, 'South': 0.95, 'East': 1.1, 'West': 0.9}

Warehouses:
{'North': {'A': 0.959, 'B': 0.96, 'C': 1.035}, 'South': {'A': 1.01, 'B': 1.031, 'C': 1.023}, 'East': {'A': 1.004, 'B': 1.047, 'C': 0.988}, 'West': {'A': 1.005, 'B': 1.033, 'C': 1.012}}


# STEP 3 — Build Time Structure (3 Years)

In [4]:
# -------------------------
# Time Configuration
# -------------------------

total_years = 3
weeks_per_year = 52
total_weeks = total_years * weeks_per_year

time_data = []

for week in range(1, total_weeks + 1):
    year = (week - 1) // 52 + 1
    week_in_year = (week - 1) % 52 + 1

    # Approximate month
    month = ((week_in_year - 1) // 4) + 1
    if month > 12:
        month = 12

    # Festival weeks (fixed example weeks)
    festival_weeks = [10, 25, 40, 52]
    is_festival = 1 if week_in_year in festival_weeks else 0

    # Yearly growth
    if year == 1:
        growth = 1.00
    elif year == 2:
        growth = 1.05
    else:
        growth = 1.08

    time_data.append({
        "global_week": week,
        "year": year,
        "week_in_year": week_in_year,
        "month": month,
        "is_festival": is_festival,
        "year_growth": growth
    })

time_df = pd.DataFrame(time_data)
time_df.head()

Unnamed: 0,global_week,year,week_in_year,month,is_festival,year_growth
0,1,1,1,1,0,1.0
1,2,1,2,1,0,1.0
2,3,1,3,1,0,1.0
3,4,1,4,1,0,1.0
4,5,1,5,2,0,1.0


# STEP 4 — Build the Demand Calculation Engine

In [5]:
# -------------------------
# Demand Calculation Engine
# -------------------------

def calculate_demand(product, zone, warehouse, time_row):

    base = product["base_demand"]

    zone_multiplier = zones[zone]
    warehouse_multiplier = warehouses[zone][warehouse]

    # Seasonal multiplier based on category
    seasonal_multiplier = 1.0

    # Winter boost example (weeks 1–8)
    if time_row["week_in_year"] <= 8:
        if product["category"] in ["dairy", "poultry"]:
            seasonal_multiplier *= 1.08

    # Monsoon boost example (weeks 25–35)
    if 25 <= time_row["week_in_year"] <= 35:
        if product["category"] in ["vegetables"]:
            seasonal_multiplier *= 1.10

    # Festival effect
    festival_multiplier = 1.15 if time_row["is_festival"] == 1 else 1.0

    # Year growth
    growth_multiplier = time_row["year_growth"]

    # Simulate price fluctuation
    price_variation = np.random.uniform(-0.1, 0.1)  # ±10%
    current_price = product["base_price"] * (1 + price_variation)

    # Price effect
    price_change = current_price - product["base_price"]
    price_effect = 1 - (price_change * product["price_sensitivity"])

    # Random discount (30% probability)
    discount_percent = np.random.choice([0, 5, 10, 15], p=[0.7, 0.1, 0.1, 0.1])
    discount_effect = 1 + (discount_percent * 0.02)

    structured_demand = (
        base
        * zone_multiplier
        * warehouse_multiplier
        * seasonal_multiplier
        * festival_multiplier
        * growth_multiplier
        * price_effect
        * discount_effect
    )

    # Add Gaussian noise (7% std deviation)
    noise = np.random.normal(0, 0.07 * structured_demand)

    final_demand = structured_demand + noise

    return max(0, int(final_demand)), current_price, discount_percent

# STEP 5 — Generate Full Dataset (Product × Zone × Warehouse × Time)

In [6]:
# -------------------------
# Generate Full Dataset
# -------------------------

data_rows = []

for product in products:
    for zone in zones:
        for warehouse in warehouses[zone]:
            for _, time_row in time_df.iterrows():

                demand, current_price, discount = calculate_demand(
                    product, zone, warehouse, time_row
                )

                data_rows.append({
                    "product_id": product["product_id"],
                    "product_name": product["name"],
                    "category": product["category"],
                    "zone": zone,
                    "warehouse": warehouse,
                    "year": time_row["year"],
                    "week": time_row["week_in_year"],
                    "month": time_row["month"],
                    "is_festival": time_row["is_festival"],
                    "base_demand": product["base_demand"],
                    "base_price": product["base_price"],
                    "current_price": current_price,
                    "discount_percent": discount,
                    "zone_multiplier": zones[zone],
                    "warehouse_multiplier": warehouses[zone][warehouse],
                    "year_growth": time_row["year_growth"],
                    "units_sold_next_week": demand
                })

dataset = pd.DataFrame(data_rows)

print("Dataset Shape:", dataset.shape)
dataset.head()

Dataset Shape: (44928, 17)


Unnamed: 0,product_id,product_name,category,zone,warehouse,year,week,month,is_festival,base_demand,base_price,current_price,discount_percent,zone_multiplier,warehouse_multiplier,year_growth,units_sold_next_week
0,1,Fertilizer,agriculture,North,A,1.0,1.0,1.0,0.0,627,278,271.024431,15,1.05,0.959,1.0,875
1,1,Fertilizer,agriculture,North,A,1.0,2.0,1.0,0.0,627,278,258.874636,0,1.05,0.959,1.0,738
2,1,Fertilizer,agriculture,North,A,1.0,3.0,1.0,0.0,627,278,253.429449,10,1.05,0.959,1.0,903
3,1,Fertilizer,agriculture,North,A,1.0,4.0,1.0,0.0,627,278,251.344498,15,1.05,0.959,1.0,934
4,1,Fertilizer,agriculture,North,A,1.0,5.0,2.0,0.0,627,278,296.483811,0,1.05,0.959,1.0,577


In [7]:
dataset.to_csv("synthetic_supplychain_data.csv", index=False)

In [8]:
from google.colab import files
files.download("synthetic_supplychain_data.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>