# AutoGen for supply chain management without UserProxy

In [1]:
import os
import re
import sys
import time
import numpy as np
from typing import List
from tqdm.notebook import tqdm

from autogen import ConversableAgent
from config import env_configs
# importing the packages

# setting the random seed for reproductibility
np.random.seed(42)

Variable demand for t=0: 4


In [2]:
# This code block checks whether the generated series are correct and comparable across different frameworks
for name, config in env_configs.items():
    # Evaluate demand function at a specific time, say t=0.
    demand_value = config['demand_fn'](0)
    print(f"{name} demand for t=0: {demand_value}")

constant_demand demand for t=0: 4
variable_demand demand for t=0: 3
larger_demand demand for t=0: 7
seasonal_demand demand for t=0: 4
normal_demand demand for t=0: 1
increasing_demand demand for t=0: 5
cyclical_demand demand for t=0: 5
demand_shock demand for t=0: 5
stochastic_demand demand for t=0: 0


## Initializing the environment

In [3]:
os.environ["DEEPSEEK_API_KEY"] = "YOUR_DEEPSEEK_KEY"
llm_config = {
    "model": "deepseek-chat",  # Replace with the correct DeepSeek model name
    "api_key": os.getenv("DEEPSEEK_API_KEY"), 
    "base_url": "https://api.deepseek.com"  
}

In [4]:
os.environ["OPENAI_API_KEY"] = "YOUR_OPENAI_KEY"

In [5]:
llm_config = {
    "model": "gpt-4.5-preview",   # Replace with the necessary OpenAI model
    "api_key": os.getenv('OPENAI_API_KEY'),
    "temperature": 0.7,
}

In [6]:
# Initializing the environment
from env import  DecentralizedInventoryEnvWithComm
from config import env_configs
env_config_name = "cyclical_demand"
env_config = env_configs[env_config_name]
print(env_config)  

# Printing to doublecheck
im_env =  DecentralizedInventoryEnvWithComm(**env_config)
print(im_env)  

{'num_stages': 4, 'num_periods': 12, 'init_inventories': [17, 17, 17, 17], 'lead_times': [2, 2, 2, 2], 'demand_fn': <function <lambda> at 0x0000022DD90A2840>, 'prod_capacities': [20, 25, 30, 35], 'sale_prices': [5, 5, 5, 5], 'order_costs': [5, 5, 5, 5], 'backlog_costs': [1, 1, 1, 1], 'holding_costs': [1, 1, 1, 1], 'stage_names': ['retailer', 'wholesaler', 'distributor', 'manufacturer'], 'comm_size': 3}
<DecentralizedInventoryEnvWithComm instance>


# Function definition

In [7]:
def parse_observation(obs, stage_index, max_lead_time, num_stages, comm_size):
    """
    Converting a single row from the environment's observation matrix into a user-friendly
    text description for that stage.

    The observation for each stage is structured as follows (see the env file):
      - Base state: first 9 + 2 * max_lead_time elements, where:
          [0]   : production capacity
          [1]   : sale price
          [2]   : order cost
          [3]   : backlog cost
          [4]   : holding cost
          [5]   : stage-specific lead time (but base state is allocated with max_lead_time)
          [6]   : inventory
          [7]   : backlog
          [8]   : upstream backlog (backlog of the next stage; for the last stage this might be unused)
          [9 : 9+max_lead_time]       : sales (using fixed max lead time slicing)
          [9+max_lead_time : 9+2*max_lead_time] : deliveries
      - Communication messages: the remaining elements (a flattened vector of shape (num_stages * comm_size,))

    Parameters:
      obs (np.array): The observation, which could be a single row or a 2D array (multi-stage).
      stage_index (int): The stage index to parse (only used if obs is multi-stage).
      max_lead_time (int): The maximum lead time used in the environment.
      num_stages (int): The total number of stages.
      comm_size (int): The size of the communication vector for each stage.
      
    Returns:
      str: A string summarizing the parsed observation for the given stage.
    """
    # Handle both single-stage (1D) and multi-stage (2D) observations.
    if obs.ndim == 1:
        row = obs
    else:
        row = obs[stage_index]
    
    # Computing the length of the base state from max lead time.
    base_dim = 9 + 2 * max_lead_time
    base = row[:base_dim]
    communications = row[base_dim:]
    
    # Extracting base state values.
    prod_capacity = base[0]
    sale_price = base[1]
    order_cost = base[2]
    backlog_cost = base[3]
    holding_cost = base[4]
    lead_time = int(base[5])
    inventory = base[6]
    backlog = base[7]
    next_stage_backlog = base[8]
    
    # Dynamically extracting only the relevant number of past sales & deliveries based on lead_time
    if lead_time > 0:
        sales = base[-2 * max_lead_time : -max_lead_time][-lead_time:].tolist()
        deliveries = base[-max_lead_time:][-lead_time:].tolist()
    else:
        sales = []
        deliveries = []
    
    # The communications vector is a flattened array of length num_stages * comm_size.
    communications_list = communications.tolist()
    
    info_str = (
        f"Production Capacity = {prod_capacity}, Sale Price = {sale_price}, Order Cost = {order_cost}, "
        f"Backlog Cost = {backlog_cost}, Holding Cost = {holding_cost}, Stage Lead Time = {lead_time}, "
        f"Inventory = {inventory}, Current Backlog (you owing to the downstream) = {backlog}, "
        f"Upstream Backlog (your upstream owing to you) = {next_stage_backlog}, "
        f"Previous Sales (in the recent period(s), from old to new)={sales}, "
        f"Arriving Deliveries (in this and the next period(s), from near to far) = {deliveries}"
    )
    
    return info_str

In [8]:
def get_demand_description(env_config_name):
    """
    Specifiing a description for the demand scenario based on the environment configuration name.
    """
    demand_description = {
        "constant_demand": "The expected demand at the retailer(stage 1) is a constant value of 4 units for all 12 periods.",
        "variable_demand": "The expected demand at the retailer (stage 1) is a discrete uniform distribution U{0, 4} for all 12 periods.",
        "larger_demand": "The expected demand at the retailer (stage 1) is a discrete uniform distribution U{0, 9} for all 12 periods.",
        "seasonal_demand": "For the first four periods, demand follows a discrete uniform distribution over {0, 1, 2, 3, 4}, and for the following eight periods, it follows a discrete uniform distribution over {5, 6, 7, 8}.",
        "normal_demand": "The expected demand at the retailer (stage 1) is a normal distribution N(4, 2^2), " \
            "truncated at 0, for all 12 rounds.",
        "increasing_demand": "The expected demand at the retailer (stage 1) is a linearly increasing demand by starting with an initial value 6 and growing by 1 unit every period",
        "cyclical_demand": " The expected demand at the retailer (stage 1) is computed as a seasonal sine wave—with a 12-round period, a 5-unit amplitude, and a 6-unit upward shift—whose value is rounded to yield an integer.",
        "demand_shock": "The expected demand at the retailer (stage 1) is normally 5, but it jumps by 8 units to 13 during periods 8 through 10, capturing a temporary demand shock. ",
        "stochastic_demand": "The expected demand at the retailer (stage 1) is an Integer-Valued Autoregressive INAR(1) process with a thinning probability of 0.5, meaning 50% of the previous period's demand carries over. New demand is added as Poisson arrivals with a mean of 2, ensuring the overall demand remains an integer count.",
    }
    return demand_description.get(env_config_name, "Unknown demand configuration.")

In [9]:
def parse_order_and_comm(response_str: str, comm_size: int):
    import re
    import logging
    logger = logging.getLogger(__name__)

    # 1) Parse order quantity strictly, in case of inadequate response from the LLM. Using regex pattern.
    order_pattern = r"\[Order\s*quantity:\s*(\d+)\]"
    order_match = re.search(order_pattern, response_str, re.IGNORECASE)
    if order_match:
        order_qty = int(order_match.group(1))
    else:
        logger.warning("Order quantity not found. Defaulting to 0.")
        order_qty = 0

    # 2) Parse the communication vector
    comm_pattern = r"\[Comm\s*vector:\s*([^\]]+)\]"
    comm_match = re.search(comm_pattern, response_str, re.IGNORECASE)
    if comm_match:
        values_str = comm_match.group(1)
        try:
            values = [float(x.strip()) for x in values_str.split(",")]
        except ValueError:
            logger.warning("Error parsing communication vector. Defaulting to zeros.")
            values = []
        comm_array = np.zeros(comm_size, dtype=np.float32)
        for i, val in enumerate(values[:comm_size]):
            comm_array[i] = val
    else:
        logger.warning("Communication vector not found. Defaulting to zeros.")
        comm_array = np.zeros(comm_size, dtype=np.float32)

    return order_qty, comm_array

## Agent creation and simulation definition

In [10]:
def create_agents(stage_names: List[str], llm_config) -> List[ConversableAgent]:
    agents = []
    num_stages = len(stage_names)
    
    demand_description = get_demand_description(env_config_name)
    
        # The goal statement depends on the scenario .
    if env_config_name in {"constant_demand", "seasonal_demand", "normal_demand", "increasing_demand"}:
         goal_statement = "Your goal is to maximize the total profit (revenue - total cost) by placing orders wisely AND providing a helpful communication message."
    else:
         goal_statement = "Your goal is to minimize the total cost (order, holding, backlog) by placing orders wisely AND providing a helpful communication message."
   
    for stage, stage_name in enumerate(stage_names):
        agent = ConversableAgent(
            name=f"{stage_name.capitalize()}_Agent",
            system_message=(
                f"You play a crucial role in a {num_stages}-stage supply chain as stage {stage + 1}, called '{stage_name}'. "
                f"{goal_statement}\n"
                f"{demand_description}\n"
                "In your final response each round:\n"
                "1. Provide a non-negative integer order quantity in the format: [Order quantity: X]\n"
                "2. Provide a communication vector in the format: [Comm vector: v1, v2, v3]\n\n"
                "Use the shortest text necessary and end with the bracketed fields.\n"
            ),
            llm_config=llm_config,
            code_execution_config=False,
            human_input_mode="NEVER",
        )
        agents.append(agent)
        
    return agents

In [11]:
def run_simulation_decentralized(env_config_name, env, stage_agents):

    # Reset the environment and get the initial observation dictionary.
    obs, _ = env.reset()  # obs is a dict: {"stage_0": state, "stage_1": state, ...}
    
    # If using stochastic demand, generate a new demand series. Reason of it is the definition of the stochastic demand as a series of observations 
    if env_config_name == 'stochastic_demand':
        print("Generating a new stochastic demand series...")
        env_config["demand_fn"].generate_new_series()  # Reset demand for a new episode 
    
    done = False
    total_reward = 0.0
    period = 0

    print(f"Simulation started: {env_config_name}\n{'=' * 80}")

    while not done and period < env.num_periods:
        print(f"--- PERIOD {period + 1} ---")

        # Initializing communication messages for this period.
        downstream_messages = [None] * env.num_stages
        
        actions = []  # List of tuples: (order_quantity, comm_vector)

        for stage_idx in range(env.num_stages):
            stage_name = env.stage_names[stage_idx].capitalize()
            # Gettinhg the local observation for this stage.
            local_obs = obs[f"stage_{stage_idx}"]
            downstream_msg = downstream_messages[stage_idx]

            # Building the prompt message for the agent.
            input_message = (
                f"--- {stage_name} (Stage {stage_idx + 1}) ---\n"
                f"Current period: {period + 1}\n"
                "Local state:\n"
                f"{parse_observation(local_obs, stage_idx, env.max_lead_time, env.num_stages, env.comm_size)}\n"
            )
            if downstream_msg:
                input_message += f" [From Stage {stage_idx + 1}]: {downstream_msg}\n"
            
            input_message += (
                "Provide your decision in the following bracketed format:\n"
                "[Order quantity: X]\n"
                "[Comm vector: v1, v2, v3]\n\n"
                "Where:\n"
                "- X is an integer number of units you wish to order.\n"
                "- v1, v2, v3 are integers representing current inventory, order quantity placed, and urgency level respectively.\n"
                "Explain your reasoning briefly, then end with the bracketed fields."
            )

            # Ask the agent for its decision.
            chat_result = stage_agents[stage_idx].initiate_chat(
                recipient=stage_agents[stage_idx],
                message=input_message,
                summary_method="last_msg",
                max_turns=1,
                clear_history=False,
            )
            assistant_message = chat_result.summary
            print(f"{stage_name} decided: {assistant_message}")

            # Parse the order quantity and communication vector from the agent's response.
            try:
                order_qty, comm_vec = parse_order_and_comm(assistant_message, env.comm_size)
            except Exception as e:
                print(f"Warning: Parsing failed for stage {stage_idx}, defaulting action.")
                order_qty, comm_vec = 0, np.zeros(env.comm_size, dtype=np.float32)

            actions.append((order_qty, comm_vec))

            # Passing a message to the downstream stage.
            if stage_idx < env.num_stages - 1:
                downstream_messages[stage_idx + 1] = (
                    f"Ordered {order_qty} units. Communication : {comm_vec.tolist()}"
                )

        # Ensure the actions list has entries equal to number of stages.
        if len(actions) > env.num_stages:
            actions = actions[:env.num_stages]
        elif len(actions) < env.num_stages:
            while len(actions) < env.num_stages:
                actions.append((0, np.zeros(env.comm_size, dtype=np.float32)))

        print(f"Final actions before step: {actions} (expected {env.num_stages})")

        # Creating an action dictionary from the list of actions.
        action_dict = {f"stage_{m}": actions[m] for m in range(env.num_stages)}
        
        # Take a step in the environment with the collected actions.
        next_obs, reward, terminations, truncations, info = env.step(action_dict)
        
        # Sum rewards over all stages.
        step_reward = sum(reward.values())
        total_reward += step_reward
        done = terminations["__all__"]

        print(f"Reward: {reward} (Step sum: {step_reward}), Total Reward: {total_reward}")
        print("=" * 80)

        obs = next_obs  # Updating the observation dictionary.
        period += 1

    print("Simulation finished.")
    return total_reward

In [12]:
# Initialize agents
stage_agents = create_agents(env_config["stage_names"], llm_config)
print(f"Stage Agents Created: {stage_agents}")

Stage Agents Created: [<autogen.agentchat.conversable_agent.ConversableAgent object at 0x0000022DD864CCD0>, <autogen.agentchat.conversable_agent.ConversableAgent object at 0x0000022DD92572D0>, <autogen.agentchat.conversable_agent.ConversableAgent object at 0x0000022DD902B2D0>, <autogen.agentchat.conversable_agent.ConversableAgent object at 0x0000022DD92A4F50>]


In [13]:
# Checking the system message
for stage_agent in stage_agents:
    print(stage_agent.system_message)

You play a crucial role in a 4-stage supply chain as stage 1, called 'retailer'. Your goal is to minimize the total cost (order, holding, backlog) by placing orders wisely AND providing a helpful communication message.
 The expected demand at the retailer (stage 1) is computed as a seasonal sine wave—with a 12-round period, a 5-unit amplitude, and a 6-unit upward shift—whose value is rounded to yield an integer.
In your final response each round:
1. Provide a non-negative integer order quantity in the format: [Order quantity: X]
2. Provide a communication vector in the format: [Comm vector: v1, v2, v3]

Use the shortest text necessary and end with the bracketed fields.

You play a crucial role in a 4-stage supply chain as stage 2, called 'wholesaler'. Your goal is to minimize the total cost (order, holding, backlog) by placing orders wisely AND providing a helpful communication message.
 The expected demand at the retailer (stage 1) is computed as a seasonal sine wave—with a 12-round p

## Running the simulation

In [14]:
from tqdm import tqdm

num_iterations = 1  # setting the number of iterations
all_rewards = []

for i in tqdm(range(num_iterations)):
    
    # Creating new agents for each iteration.
    im_env.reset()
    stage_agents = create_agents(env_config["stage_names"], llm_config)
    
    # Running the simulation.
    total_reward = run_simulation_decentralized(env_config_name, im_env, stage_agents)
    
    # Collecting the reward from this iteration.
    all_rewards.append(total_reward)
    print(f"Iteration {i+1} finished, total reward: {total_reward}")

# Print out summary statistics.
print("Simulation finished.")
print("All rewards:", all_rewards)
print("Average reward:", np.mean(all_rewards))
print("Standard deviation reward:", np.std(all_rewards))


  0%|          | 0/1 [00:00<?, ?it/s]

Simulation started: cyclical_demand
--- PERIOD 1 ---
[33mRetailer_Agent[0m (to Retailer_Agent):

--- Retailer (Stage 1) ---
Current period: 1
Local state:
Production Capacity = 20.0, Sale Price = 5.0, Order Cost = 5.0, Backlog Cost = 1.0, Holding Cost = 1.0, Stage Lead Time = 2, Inventory = 17.0, Current Backlog (you owing to the downstream) = 0.0, Upstream Backlog (your upstream owing to you) = 0.0, Previous Sales (in the recent period(s), from old to new)=[0.0, 0.0], Arriving Deliveries (in this and the next period(s), from near to far) = [0.0, 0.0]
Provide your decision in the following bracketed format:
[Order quantity: X]
[Comm vector: v1, v2, v3]

Where:
- X is an integer number of units you wish to order.
- v1, v2, v3 are integers representing current inventory, order quantity placed, and urgency level respectively.
Explain your reasoning briefly, then end with the bracketed fields.

--------------------------------------------------------------------------------


  0%|          | 0/1 [00:30<?, ?it/s]


KeyboardInterrupt: 


--------------------------------------------------------------------------------
Manufacturer decided: Given the current inventory level of 8 units and the recent sales indicating a demand of 4 units, I will place an order of 4 units. This decision is made to ensure that I maintain a buffer in case demand increases in the future, as the expected demand distribution of U{0, 4} suggests potential variability.

By ordering 4 units, I can prepare for any unexpected demand while managing holding costs effectively.

[Order quantity: 4]
[Comm vector: 8.0, 4.0, 2.0]
Final actions before step: [(4, array([5., 4., 2.], dtype=float32)), (4, array([12.,  4.,  2.], dtype=float32)), (4, array([8., 4., 2.], dtype=float32)), (4, array([8., 4., 2.], dtype=float32))] (expected 4)
Reward: {'stage_0': -7, 'stage_1': -8, 'stage_2': -8, 'stage_3': -8} (Step sum: -31), Total Reward: -373.0
--- PERIOD 11 ---
[33mRetailer_Agent[0m (to Retailer_Agent):

--- Retailer (Stage 1) ---
Local state:
Production Capa


--------------------------------------------------------------------------------
[33mManufacturer_Agent[0m (to Manufacturer_Agent):

With the current inventory level at 8 units and the recent sales indicating a demand of 4 units, I will not place an order this round. The expected demand distribution of U{0, 4} suggests that I can adequately cover potential demand, especially with the incoming delivery of 4 units in the next period.

This decision minimizes holding costs while ensuring I have sufficient inventory to meet potential demand.

[Order quantity: 0]
[Comm vector: 8.0, 0.0, 1.0]

--------------------------------------------------------------------------------
Manufacturer decided: With the current inventory level at 8 units and the recent sales indicating a demand of 4 units, I will not place an order this round. The expected demand distribution of U{0, 4} suggests that I can adequately cover potential demand, especially with the incoming delivery of 4 units in the next peri

--- Manufacturer (Stage 4) ---
Local state:
Production Capacity = 20.0, Sale Price = 0.0, Order Cost = 0.0, Backlog Cost = 1.0, Holding Cost = 1.0, Stage Lead Time = 2, Inventory = 8.0, Current Backlog (you owing to the downstream) = 0.0, Upstream Backlog (your upstream owing to you) = 0.0, Previous Sales (in the recent period(s), from old to new)=[4.0, 0.0], Arriving Deliveries (in this and the next period(s), from near to far) = [4.0, 0.0]
 [From Stage 4]: Ordered 4 units. Communication : [8.0, 4.0, 2.0]
Provide your decision in the following bracketed format:
[Order quantity: X]
[Comm vector: v1, v2, v3]

Where:
- X is an integer number of units you wish to order.
- v1, v2, v3 are floats representing current inventory, order quantity placed, and urgency level respectively.
Explain your reasoning briefly, then end with the bracketed fields.

--------------------------------------------------------------------------------
[33mManufacturer_Agent[0m (to Manufacturer_Agent):

Given the

 80%|████████  | 4/5 [08:51<02:08, 128.35s/it]

Manufacturer decided: Given the current inventory level of 8 units and the recent sales indicating a demand of 4 units, I will place an order of 4 units. This decision is made to ensure that I maintain a buffer in case demand increases in the future, as the expected demand distribution of U{0, 4} suggests potential variability.

By ordering 4 units, I can prepare for any unexpected demand while managing holding costs effectively.

[Order quantity: 4]
[Comm vector: 8.0, 4.0, 2.0]
Final actions before step: [(4, array([4., 4., 2.], dtype=float32)), (4, array([12.,  4.,  2.], dtype=float32)), (4, array([8., 4., 2.], dtype=float32)), (4, array([8., 4., 2.], dtype=float32))] (expected 4)
Reward: {'stage_0': -6, 'stage_1': -8, 'stage_2': -8, 'stage_3': -8} (Step sum: -30), Total Reward: -431.0
Simulation finished.
Iteration 4 finished, total reward: -431.0
Simulation started: variable_demand
--- PERIOD 1 ---
[33mRetailer_Agent[0m (to Retailer_Agent):

--- Retailer (Stage 1) ---
Local state


--------------------------------------------------------------------------------
[33mManufacturer_Agent[0m (to Manufacturer_Agent):

Given the current inventory level of 12 units and the expected demand distribution of U{0, 4}, I anticipate that the demand will be low in the upcoming periods. Since I have sufficient inventory to cover potential demand, I will not place an order this round to minimize holding costs.

Thus, I will maintain my current inventory without ordering additional units.

[Order quantity: 0]
[Comm vector: 12.0, 0.0, 1.0]

--------------------------------------------------------------------------------
Manufacturer decided: Given the current inventory level of 12 units and the expected demand distribution of U{0, 4}, I anticipate that the demand will be low in the upcoming periods. Since I have sufficient inventory to cover potential demand, I will not place an order this round to minimize holding costs.

Thus, I will maintain my current inventory without orderi


--------------------------------------------------------------------------------
Distributor decided: Given the current inventory level of 12.0 and the expected demand distribution of U{0, 4}, I have sufficient stock to meet potential demand in the upcoming periods. However, to ensure that I maintain a buffer against any unexpected increases in demand, I will place a small order of 2 units. 

The urgency level is set to 2.0, indicating a moderate risk, as I want to be prepared for any fluctuations in demand while still managing costs effectively.

Thus, my decision is as follows:

[Order quantity: 2]
[Comm vector: 12.0, 2.0, 2.0]
[33mManufacturer_Agent[0m (to Manufacturer_Agent):

--- Manufacturer (Stage 4) ---
Local state:
Production Capacity = 20.0, Sale Price = 0.0, Order Cost = 0.0, Backlog Cost = 1.0, Holding Cost = 1.0, Stage Lead Time = 2, Inventory = 12.0, Current Backlog (you owing to the downstream) = 0.0, Upstream Backlog (your upstream owing to you) = 0.0, Previous Sales

--- Distributor (Stage 3) ---
Local state:
Production Capacity = 20.0, Sale Price = 0.0, Order Cost = 0.0, Backlog Cost = 1.0, Holding Cost = 1.0, Stage Lead Time = 2, Inventory = 10.0, Current Backlog (you owing to the downstream) = 0.0, Upstream Backlog (your upstream owing to you) = 0.0, Previous Sales (in the recent period(s), from old to new)=[0.0, 2.0], Arriving Deliveries (in this and the next period(s), from near to far) = [0.0, 2.0]
 [From Stage 3]: Ordered 2 units. Communication : [10.0, 2.0, 1.0]
Provide your decision in the following bracketed format:
[Order quantity: X]
[Comm vector: v1, v2, v3]

Where:
- X is an integer number of units you wish to order.
- v1, v2, v3 are floats representing current inventory, order quantity placed, and urgency level respectively.
Explain your reasoning briefly, then end with the bracketed fields.

--------------------------------------------------------------------------------
[33mDistributor_Agent[0m (to Distributor_Agent):

With the c


--------------------------------------------------------------------------------
Wholesaler decided: Given the current inventory level of 8 units and the expected demand distribution of U{0, 4}, I will place an order for 2 units again. Here’s my reasoning:

1. **Demand Coverage**: The maximum expected demand remains at 4 units. With 8 units in inventory, I can cover the demand for the next two periods. However, ordering 2 units will help maintain a buffer in case demand increases unexpectedly.

2. **Minimizing Costs**: By ordering 2 units, I can continue to avoid potential backlog costs if demand exceeds my current inventory in the future. This strategy also helps in managing holding costs effectively.

3. **Urgency Level**: The urgency level is still low (1.0) as I have sufficient inventory to meet the expected demand, but I want to ensure I am prepared for any fluctuations.

Thus, my response is:
[Order quantity: 2]
[Comm vector: 8.0, 2.0, 1.0]
[33mDistributor_Agent[0m (to Distrib

--- Wholesaler (Stage 2) ---
Local state:
Production Capacity = 20.0, Sale Price = 0.0, Order Cost = 0.0, Backlog Cost = 1.0, Holding Cost = 1.0, Stage Lead Time = 2, Inventory = 8.0, Current Backlog (you owing to the downstream) = 0.0, Upstream Backlog (your upstream owing to you) = 0.0, Previous Sales (in the recent period(s), from old to new)=[2.0, 2.0], Arriving Deliveries (in this and the next period(s), from near to far) = [2.0, 2.0]
 [From Stage 2]: Ordered 4 units. Communication : [6.0, 4.0, 2.0]
Provide your decision in the following bracketed format:
[Order quantity: X]
[Comm vector: v1, v2, v3]

Where:
- X is an integer number of units you wish to order.
- v1, v2, v3 are floats representing current inventory, order quantity placed, and urgency level respectively.
Explain your reasoning briefly, then end with the bracketed fields.

--------------------------------------------------------------------------------
[33mWholesaler_Agent[0m (to Wholesaler_Agent):

Given the curre


--------------------------------------------------------------------------------
[33mRetailer_Agent[0m (to Retailer_Agent):

With a current inventory of 6 units and recent sales of 4 and 2 units, I need to consider the expected demand distribution of U{0, 4}. The maximum demand I could face in the next period is still 4 units. 

Given that I have 6 units, I can cover the maximum expected demand for the next period. However, considering the lead time of 2 periods for any future orders, I should maintain a buffer to avoid stockouts. 

To ensure I have enough inventory for the upcoming periods, I will place an order for 4 units. This will help replenish my stock and prepare for potential demand in the next periods.

The urgency level will be set to medium since I need to ensure I have enough inventory to meet future demand.

Thus, my decision is as follows:

[Order quantity: 4]
[Comm vector: 6, 4, 2]

--------------------------------------------------------------------------------
Reta


--------------------------------------------------------------------------------
Manufacturer decided: With the current inventory level at 4 units and the expected demand distribution of U{0, 4}, I need to be cautious as demand could reach up to 4 units in the next period. Since I have an incoming delivery of 4 units, my total inventory will be sufficient to meet potential demand. Therefore, I will not place any additional orders this round to avoid unnecessary holding costs.

[Order quantity: 0]
[Comm vector: 4.0, 0.0, 1.0]
Final actions before step: [(4, array([6., 4., 2.], dtype=float32)), (4, array([6., 4., 2.], dtype=float32)), (4, array([6., 4., 2.], dtype=float32)), (0, array([4., 0., 1.], dtype=float32))] (expected 4)
Reward: {'stage_0': -8, 'stage_1': -4, 'stage_2': -4, 'stage_3': 0} (Step sum: -16), Total Reward: -186.0
--- PERIOD 7 ---
[33mRetailer_Agent[0m (to Retailer_Agent):

--- Retailer (Stage 1) ---
Local state:
Production Capacity = 20.0, Sale Price = 0.0, Order Co

Distributor decided: With the current inventory level at 4.0 and recent sales of 4 units in the last two periods, I need to be cautious about future demand. Given the expected demand distribution of U{0, 4}, I will place an order of 4 units to ensure I have enough stock to cover potential increases in demand and to account for the lead time of 2 periods.

The urgency level remains at 2.0, indicating a moderate risk, as I want to maintain a buffer to avoid stockouts.

Thus, my decision is as follows:

[Order quantity: 4]
[Comm vector: 4.0, 4.0, 2.0]
[33mManufacturer_Agent[0m (to Manufacturer_Agent):

--- Manufacturer (Stage 4) ---
Local state:
Production Capacity = 20.0, Sale Price = 0.0, Order Cost = 0.0, Backlog Cost = 1.0, Holding Cost = 1.0, Stage Lead Time = 2, Inventory = 0.0, Current Backlog (you owing to the downstream) = 0.0, Upstream Backlog (your upstream owing to you) = 0.0, Previous Sales (in the recent period(s), from old to new)=[4.0, 4.0], Arriving Deliveries (in this 

--- Distributor (Stage 3) ---
Local state:
Production Capacity = 20.0, Sale Price = 0.0, Order Cost = 0.0, Backlog Cost = 1.0, Holding Cost = 1.0, Stage Lead Time = 2, Inventory = 4.0, Current Backlog (you owing to the downstream) = 0.0, Upstream Backlog (your upstream owing to you) = 0.0, Previous Sales (in the recent period(s), from old to new)=[4.0, 4.0], Arriving Deliveries (in this and the next period(s), from near to far) = [4.0, 4.0]
 [From Stage 3]: Ordered 2 units. Communication : [6.0, 2.0, 1.0]
Provide your decision in the following bracketed format:
[Order quantity: X]
[Comm vector: v1, v2, v3]

Where:
- X is an integer number of units you wish to order.
- v1, v2, v3 are floats representing current inventory, order quantity placed, and urgency level respectively.
Explain your reasoning briefly, then end with the bracketed fields.

--------------------------------------------------------------------------------
[33mDistributor_Agent[0m (to Distributor_Agent):

With the cur

[33mWholesaler_Agent[0m (to Wholesaler_Agent):

Given the current inventory level of 8 units and the expected demand distribution of U{0, 4}, I will place an order for 0 units this round. Here’s my reasoning:

1. **Demand Coverage**: The maximum expected demand is 4 units. With 8 units in inventory, I can comfortably meet the demand for the next two periods without needing to place an order.

2. **Minimizing Costs**: By not placing an order, I can avoid unnecessary holding costs associated with excess inventory. This decision helps maintain a lean inventory while still being able to meet expected demand.

3. **Urgency Level**: The urgency level is low (1.0) since I have sufficient inventory to cover the expected demand, and there is no immediate risk of stockout.

Thus, my response is:
[Order quantity: 0]
[Comm vector: 8.0, 0.0, 1.0]

--------------------------------------------------------------------------------
Wholesaler decided: Given the current inventory level of 8 units and t

Retailer decided: With a current inventory of 11 units and recent sales of 1 and 2 units, I need to consider the expected demand distribution of U{0, 4}. The maximum demand I could face in the next period is still 4 units. 

Given that I have 11 units, I can comfortably cover the maximum expected demand for the next period. Additionally, I have incoming deliveries of 2 units in the next period, which will increase my inventory to 13 units.

Since I have sufficient inventory to meet the expected demand and a buffer, I will not place an order this round to minimize holding costs. The urgency level will be set to low since I currently have enough stock to meet demand.

Thus, my decision is as follows:

[Order quantity: 0]
[Comm vector: 11, 0, 1]
[33mWholesaler_Agent[0m (to Wholesaler_Agent):

--- Wholesaler (Stage 2) ---
Local state:
Production Capacity = 20.0, Sale Price = 0.0, Order Cost = 0.0, Backlog Cost = 1.0, Holding Cost = 1.0, Stage Lead Time = 2, Inventory = 12.0, Current Back


--------------------------------------------------------------------------------
[33mRetailer_Agent[0m (to Retailer_Agent):

With a current inventory of 13 units and no incoming deliveries, I need to consider the expected demand distribution of U{0, 4}. The maximum demand I could face in the next period is still 4 units. 

Given that I have 13 units, I can comfortably cover the maximum expected demand for the next period. Since there are no incoming deliveries and I have sufficient inventory to meet the expected demand, I will not place an order this round to minimize holding costs.

The urgency level will be set to low since I currently have enough stock to meet demand.

Thus, my decision is as follows:

[Order quantity: 0]
[Comm vector: 13, 0, 1]

--------------------------------------------------------------------------------
Retailer decided: With a current inventory of 13 units and no incoming deliveries, I need to consider the expected demand distribution of U{0, 4}. The maxim

--- Retailer (Stage 1) ---
Local state:
Production Capacity = 20.0, Sale Price = 0.0, Order Cost = 0.0, Backlog Cost = 1.0, Holding Cost = 1.0, Stage Lead Time = 2, Inventory = 12.0, Current Backlog (you owing to the downstream) = 0.0, Upstream Backlog (your upstream owing to you) = 0.0, Previous Sales (in the recent period(s), from old to new)=[0.0, 1.0], Arriving Deliveries (in this and the next period(s), from near to far) = [0.0, 0.0]
Provide your decision in the following bracketed format:
[Order quantity: X]
[Comm vector: v1, v2, v3]

Where:
- X is an integer number of units you wish to order.
- v1, v2, v3 are floats representing current inventory, order quantity placed, and urgency level respectively.
Explain your reasoning briefly, then end with the bracketed fields.

--------------------------------------------------------------------------------
[33mRetailer_Agent[0m (to Retailer_Agent):

With a current inventory of 12 units and no incoming deliveries, I need to consider th


--------------------------------------------------------------------------------


100%|██████████| 5/5 [10:57<00:00, 131.51s/it]

Manufacturer decided: With the current inventory level at 4 units and no backlog, I have enough stock to meet potential demand in the upcoming period. Additionally, I have an incoming delivery of 2 units, which will further increase my inventory. Given the expected demand distribution of U{0, 4}, I will not place any additional orders this round to minimize holding costs.

[Order quantity: 0]
[Comm vector: 4.0, 0.0, 1.0]
Final actions before step: [(0, array([12.,  0.,  1.], dtype=float32)), (0, array([14.,  0.,  1.], dtype=float32)), (0, array([12.,  0.,  1.], dtype=float32)), (0, array([4., 0., 1.], dtype=float32))] (expected 4)
Reward: {'stage_0': -11, 'stage_1': -14, 'stage_2': -12, 'stage_3': -6} (Step sum: -43), Total Reward: -392.0
Simulation finished.
Iteration 5 finished, total reward: -392.0
Simulation finished.
All rewards: [-353.0, -342.0, -459.0, -431.0, -392.0]
Average reward: -395.4
Standard deviation reward: 44.66139272346979



