# AutoGen for Supply Chain Management

In [1]:
import os
import re
import sys
import time
import numpy as np
from typing import List
from tqdm.notebook import tqdm
from autogen import ConversableAgent
from env import env_creator
from config import env_configs

np.random.seed(42)

Variable demand for t=0: 4


In [2]:
for name, config in env_configs.items():
    # Evaluate demand function at a specific time, say t=0.
    demand_value = config['demand_fn'](0)
    print(f"{name} demand for t=0: {demand_value}")

two_agent demand for t=0: 4
constant_demand demand for t=0: 4
variable_demand demand for t=0: 3
larger_demand demand for t=0: 7
seasonal_demand demand for t=0: 4
normal_demand demand for t=0: 1
increasing_demand demand for t=0: 5
cyclical_demand demand for t=0: 5
demand_shock demand for t=0: 5
stochastic_demand demand for t=0: 0


In [3]:
os.environ["DEEPSEEK_API_KEY"] = "YOUR_DEEPSEEK_KEY"
llm_config = {
    "model": "deepseek-chat",  # Replace with the correct DeepSeek model name
    "api_key": os.getenv("DEEPSEEK_API_KEY"),  # Fetch the API key from environment variables
    "base_url": "https://api.deepseek.com"   # DeepSeek's API endpoint
}

In [4]:
os.environ["OPENAI_API_KEY"] = "YOUR_OPENAI_KEY"

In [6]:
llm_config = {"model": "o3-mini", "api_key": os.getenv('OPENAI_API_KEY')}

## Creating the Environment

In [7]:
env_config_name = "cyclical_demand"
env_config = env_configs[env_config_name]
im_env = env_creator(env_config)

## Getting Descriptions

In [8]:
def get_state_description(state):
    return (
        f" - Lead Time: {state['lead_time']} round(s)\n"
        f" - Inventory Level: {state['inventory']} unit(s)\n"
        f" - Current Backlog (you owing to the downstream): {state['backlog']} unit(s)\n"
        f" - Upstream Backlog (your upstream owing to you): {state['upstream_backlog']} unit(s)\n"
        f" - Previous Sales (in the recent round(s), from old to new): {state['sales']}\n"
        f" - Arriving Deliveries (in this and the next round(s), from near to far): {state['deliveries'][-state['lead_time']:]}"
    )

In [9]:
def get_demand_description(env_config_name):
    if env_config_name == "constant_demand":
        return "The expected demand at the retailer (stage 1) is a constant 4 units for all 12 rounds."
    elif env_config_name == "variable_demand":
        return "The expected demand at the retailer (stage 1) is a discrete uniform distribution U{0, 4} for all 12 rounds."
    elif env_config_name == "larger_demand":
        return "The expected demand at the retailer (stage 1) is a discrete uniform distribution U{0, 8} for all 12 rounds."
    elif env_config_name == "seasonal_demand":
        return "The expected demand at the retailer (stage 1) is a discrete uniform distribution U{0, 4} for the first 4 rounds, " \
            "and a discrete uniform distribution U{5, 8} for the last 8 rounds."
    elif env_config_name == "normal_demand":
        return "The expected demand at the retailer (stage 1) is a normal distribution N(4, 2^2), " \
            "truncated at 0, for all 12 rounds."
    elif env_config_name == "increasing_demand":
        return "The expected demand at the retailer (stage 1) is a linearly increasing demand by starting with an initial value 6, " \
            "and growing by 1 unit every round"
    elif env_config_name == "stochastic_demand":
        return "The expected demand at the retailer (stage 1) is an Integer-Valued Autoregressive INAR(1) process with a thinning probability of 0.5, meaning 50% of the previous period's " \
            "demand carries over. New demand is added as Poisson arrivals with a mean of 2, ensuring the overall demand remains an integer count."
    elif env_config_name == "cyclical_demand":
        return "The expected demand at the retailer (stage 1) is computed as a seasonal sine wave—with a 12-round period, a 5-unit amplitude, " \
            "and a 5-unit upward shift—whose value is rounded to yield an integer."
    elif env_config_name == "demand_shock":
        return "The expected demand at the retailer (stage 1) is is normally 5, but it jumps by 8 units to 13 during rounds 8 through 10," \
            "capturing a temporary demand shock."
    else:
        raise KeyError(f"Error: {env_config_name} not implemented.")

print(get_demand_description(env_config_name))

The expected demand at the retailer (stage 1) is computed as a seasonal sine wave—with a 12-round period, a 5-unit amplitude, and a 5-unit upward shift—whose value is rounded to yield an integer.


## Creating Agents

In [10]:
user_proxy = ConversableAgent(
    name="UserProxy",
    llm_config=False,
    human_input_mode="NEVER",
)

In [11]:
def create_agents(stage_names: List[str], llm_config) -> List[ConversableAgent]:
    agents = []
    num_stages = len(stage_names)
    
    for stage, stage_name in enumerate(stage_names):
        agent = ConversableAgent(
            name=f"{stage_name.capitalize()}_Agent",
            system_message=f"You play a crucial role in a {num_stages}-stage supply chain as the stage {stage + 1} ({stage_name}). "
                "Your goal is to minimize the total cost by managing inventory and orders effectively.",
            llm_config=llm_config,
            code_execution_config=False,
            human_input_mode="NEVER",
        )
        agents.append(agent)
        
    return agents

stage_agents = create_agents(env_config["stage_names"], llm_config)



## Running Simulations

In [12]:
def run_simulation(env_config_name, im_env, user_proxy, stage_agents):
    demand_description = get_demand_description(env_config_name) 
    all_state_dicts = {}
    all_action_dicts = {}
    all_reward_dicts = {}
    episode_reward = 0
    api_cost = 0
    
    im_env.reset()
    
    if env_config_name == 'stochastic_demand':
        print("Generating a new stochastic demand series...")  
        env_config["demand_fn"].generate_new_series()  # Reset demand for a new episode
        

    for period in range(im_env.num_periods):
        state_dict = im_env.parse_state(im_env.state_dict)
        all_state_dicts[period] = state_dict
        action_dict = {}
        
        for stage in range(im_env.num_stages):
            stage_state = state_dict[f'stage_{stage}']
            
            if stage != 0:
                downstream_order = f"Your downstream order from the stage {stage} for this round is {action_dict[f'stage_{stage - 1}']}. "
            else:
                downstream_order = ""

            message = (
                f"Now this is the round {period + 1}, "
                f"and you are at the stage {stage + 1} of {im_env.num_stages} in the supply chain. "
                f"Given your current state:\n{get_state_description(stage_state)}\n\n"
                f"{demand_description} {downstream_order}"
                "What is your action (order quantity) for this round?\n\n"
                "Golden rule of this game: Open orders should always equal to \"expected downstream orders + backlog\". "
                "If open orders are larger than this, the inventory will rise (once the open orders arrive). "
                "If open orders are smaller than this, the backlog will not go down and it may even rise. "
                "Please consider the lead time and place your order in advance. "
                "Remember that your upstream has its own lead time, so do not wait until your inventory runs out. "
                "Also, avoid ordering too many units at once. "
                "Try to spread your orders over multiple rounds to prevent the bullwhip effect. "
                "Anticipate future demand changes and adjust your orders accordingly to maintain a stable inventory level.\n\n"
                "Please state your reason in 1-2 sentences first "
                "and then provide your action as a non-negative integer within brackets (e.g. [0])."
            )


            chat_result = user_proxy.initiate_chat(
                stage_agents[stage],
                message=message,
                summary_method="last_msg",
                max_turns=1,
                clear_history=False,
            )
            chat_summary = chat_result.summary
            api_cost += chat_result.cost['usage_including_cached_inference']['total_cost']

            match = re.search(r'\[(\d+)\]', chat_summary)
            if match:
                stage_action = int(match.group(1))
            else:
                stage_action = 0
            action_dict[f'stage_{stage}'] = stage_action
            
        next_states, rewards, terminations, truncations, infos = im_env.step(action_dict)
        next_state_dict = im_env.parse_state(next_states)
        all_state_dicts[period + 1] = next_state_dict
        all_action_dicts[period + 1] = action_dict
        all_reward_dicts[period + 1] = rewards
        episode_reward += sum(rewards.values())
        print(
            f"period = {period}, action_dict = {action_dict}, rewards = {rewards}, episode_reward = {episode_reward}, " \
            f"api_cost = {api_cost}")
        print('=' * 80)

    return episode_reward

In [13]:
import numpy as np
import copy

rewards = []

for i in range(1):

    stage_agents = create_agents(env_config["stage_names"], llm_config)
    reward = run_simulation(env_config_name, im_env, user_proxy, stage_agents)
    rewards.append(reward)

mean_reward = np.mean(rewards)
std_reward = np.std(rewards)

print(f"Rewards: {rewards}")
print(f"Mean Episode Reward: {mean_reward}")
print(f"Standard Deviation of Episode Reward: {std_reward}") 

[33mUserProxy[0m (to Retailer_Agent):

Now this is the round 1, and you are at the stage 1 of 4 in the supply chain. Given your current state:
 - Lead Time: 2 round(s)
 - Inventory Level: 17 unit(s)
 - Current Backlog (you owing to the downstream): 0 unit(s)
 - Upstream Backlog (your upstream owing to you): 0 unit(s)
 - Previous Sales (in the recent round(s), from old to new): [0, 0]
 - Arriving Deliveries (in this and the next round(s), from near to far): [0, 0]

The expected demand at the retailer (stage 1) is computed as a seasonal sine wave—with a 12-round period, a 5-unit amplitude, and a 5-unit upward shift—whose value is rounded to yield an integer. What is your action (order quantity) for this round?

Golden rule of this game: Open orders should always equal to "expected downstream orders + backlog". If open orders are larger than this, the inventory will rise (once the open orders arrive). If open orders are smaller than this, the backlog will not go down and it may even ris

KeyboardInterrupt: 


--------------------------------------------------------------------------------
[33mManufacturer_Agent[0m (to UserProxy):

Currently, I have 8 units in inventory, and I will receive an additional 6 units from my previous order next round, bringing my total to 14 units. The expected demand will be 7 units for this round, and I should also anticipate the increase in demand in the next round, which will be 8 units. Therefore, I should balance my order to maintain inventory stability and prepare for future demand, I will place an order of 1 unit, ensuring that future anticipated orders can still be met adequately.

Action: [1]

--------------------------------------------------------------------------------
period = 1, action_dict = {'stage_0': 6, 'stage_1': 7, 'stage_2': 7, 'stage_3': 1}, rewards = {'stage_0': 13, 'stage_1': -4, 'stage_2': 6, 'stage_3': 36}, episode_reward = 41, api_cost = 0
[33mUserProxy[0m (to Retailer_Agent):

Now this is the round 3, and you are at the stage 1 o


--------------------------------------------------------------------------------
[33mManufacturer_Agent[0m (to UserProxy):

Currently, I have only 1 unit of inventory and am expecting a demand of 8 units for this round. Given that I will receive 6 units from a previous order in this round and also expect to get 1 unit in the next round, my total supplies will shift to 8 units. To meet the anticipated upcoming demand, I will place an order for 8 units this round, ensuring that I can cover this round's demand and prepare for continuous increasing demand in the future.

Action: [8]

--------------------------------------------------------------------------------
period = 2, action_dict = {'stage_0': 8, 'stage_1': 8, 'stage_2': 8, 'stage_3': 8}, rewards = {'stage_0': 8, 'stage_1': 6, 'stage_2': 14, 'stage_3': 1}, episode_reward = 70, api_cost = 0
[33mUserProxy[0m (to Retailer_Agent):

Now this is the round 4, and you are at the stage 1 of 4 in the supply chain. Given your current stat


--------------------------------------------------------------------------------
[33mManufacturer_Agent[0m (to UserProxy):

I currently have no inventory and a backlog of 1 unit from the previous round, while facing a demand of 10 units for this round. Additionally, I anticipate receiving 1 unit this round and 8 units in the next round, which won’t be enough to cover the current demand. To meet both the current demand and eliminate the backlog efficiently, I will place an order for 11 units in this round, ensuring I can fulfill the demand and prevent increasing backlog in the future.

Action: [11]

--------------------------------------------------------------------------------
period = 3, action_dict = {'stage_0': 9, 'stage_1': 10, 'stage_2': 10, 'stage_3': 11}, rewards = {'stage_0': -21, 'stage_1': 23, 'stage_2': 40, 'stage_3': -59}, episode_reward = 53, api_cost = 0
[33mUserProxy[0m (to Retailer_Agent):

Now this is the round 5, and you are at the stage 1 of 4 in the supply cha


--------------------------------------------------------------------------------
[33mManufacturer_Agent[0m (to UserProxy):

I currently have no units in inventory and a backlog of 10 units, with an expected demand of 10 units for this round due to the linearly increasing demand. I anticipate receiving 8 units from the previous order this round and 11 units in the next round, but that will not be enough to cover the backlog and current demand. Therefore, to meet both the existing backlog and fulfill the orders, I will place an order for 12 units this round to ensure that I can handle my backlog and expected future demand.

Action: [12]

--------------------------------------------------------------------------------
period = 4, action_dict = {'stage_0': 10, 'stage_1': 10, 'stage_2': 10, 'stage_3': 12}, rewards = {'stage_0': 3, 'stage_1': 13, 'stage_2': -5, 'stage_3': -24}, episode_reward = 40, api_cost = 0
[33mUserProxy[0m (to Retailer_Agent):

Now this is the round 6, and you are 


--------------------------------------------------------------------------------
[33mManufacturer_Agent[0m (to UserProxy):

I currently have no inventory and a backlog of 12 units, while facing an expected downstream order of 13 units this round. With the expected deliveries of 11 units this round and 12 units in the next round, I will not be able to fully meet the current demand and backlog. Therefore, I will place an order for 14 units this round, which will help cover my backlog, anticipated demand, and ensure stabilizing inventory levels as the demand continues to grow in future rounds.

Action: [14]

--------------------------------------------------------------------------------
period = 5, action_dict = {'stage_0': 11, 'stage_1': 13, 'stage_2': 13, 'stage_3': 14}, rewards = {'stage_0': 18, 'stage_1': 43, 'stage_2': -77, 'stage_3': -18}, episode_reward = 6, api_cost = 0
[33mUserProxy[0m (to Retailer_Agent):

Now this is the round 7, and you are at the stage 1 of 4 in the sup


--------------------------------------------------------------------------------
[33mManufacturer_Agent[0m (to UserProxy):

I currently have no inventory and a backlog of 14 units, facing an expected downstream order of 29 units this round. With the upcoming deliveries of 12 units this round and 14 units in the next round, I need to place an order that will cover both the backlog and the expected future demand increase. Therefore, I will place an order for 31 units this round to ensure that I can meet the current backlog and anticipated demand efficiently.

Action: [31]

--------------------------------------------------------------------------------
period = 6, action_dict = {'stage_0': 12, 'stage_1': 11, 'stage_2': 29, 'stage_3': 31}, rewards = {'stage_0': 5, 'stage_1': -11, 'stage_2': -37, 'stage_3': -114}, episode_reward = -151, api_cost = 0
[33mUserProxy[0m (to Retailer_Agent):

Now this is the round 8, and you are at the stage 1 of 4 in the supply chain. Given your current s


--------------------------------------------------------------------------------
[33mManufacturer_Agent[0m (to UserProxy):

I have no inventory and a backlog of 31 units while facing a downstream order of 44 units this round. Considering that I will receive 14 units from the previous order this round and 31 units in the next round, I need to cover both the backlog and the anticipated future demand. To ensure that I can meet the current and future needs effectively, I will place an order for 61 units this round, which includes the backlog and the expected demand increase for the next round.

Action: [61]

--------------------------------------------------------------------------------
period = 7, action_dict = {'stage_0': 13, 'stage_1': 23, 'stage_2': 44, 'stage_3': 61}, rewards = {'stage_0': 38, 'stage_1': -92, 'stage_2': -40, 'stage_3': -282}, episode_reward = -527, api_cost = 0
[33mUserProxy[0m (to Retailer_Agent):

Now this is the round 9, and you are at the stage 1 of 4 in the


--------------------------------------------------------------------------------
[33mManufacturer_Agent[0m (to UserProxy):

I currently have no inventory and a backlog of 61 units, with a downstream order of 62 units this round. I will receive 31 units from the previous order this round and 61 units in the next round. To adequately cover my backlog and meet the current demand, I will place an order for 92 units this round, which will help satisfy the backlog, current demand, and prepare for future demand increases.

Action: [92]

--------------------------------------------------------------------------------
period = 8, action_dict = {'stage_0': 14, 'stage_1': 29, 'stage_2': 62, 'stage_3': 92}, rewards = {'stage_0': -25, 'stage_1': -49, 'stage_2': -86, 'stage_3': -454}, episode_reward = -1141, api_cost = 0
[33mUserProxy[0m (to Retailer_Agent):

Now this is the round 10, and you are at the stage 1 of 4 in the supply chain. Given your current state:
 - Lead Time: 2 round(s)
 - Inve


--------------------------------------------------------------------------------
[33mManufacturer_Agent[0m (to UserProxy):

I currently have 11 units in inventory and a backlog of 103 units, facing a downstream order of 92 units for this round. I will receive 61 units from a previous order this round and 92 units in the next round. To ensure that both the backlog is sufficiently addressed and the expected requirements are met, I will place an order for 124 units this round, which will cover the backlog, current demand, and anticipated future increases in demand.

Action: [124]

--------------------------------------------------------------------------------
period = 9, action_dict = {'stage_0': 24, 'stage_1': 42, 'stage_2': 92, 'stage_3': 124}, rewards = {'stage_0': -117, 'stage_1': -52, 'stage_2': -100, 'stage_3': -727}, episode_reward = -2137, api_cost = 0
[33mUserProxy[0m (to Retailer_Agent):

Now this is the round 11, and you are at the stage 1 of 4 in the supply chain. Given 


--------------------------------------------------------------------------------
[33mManufacturer_Agent[0m (to UserProxy):

I currently have 52 units in inventory and a backlog of 175 units, with a downstream order of 146 units for this round. With anticipated deliveries of 92 units this round and 124 units in the next round, my total inventory situation necessitates that I order enough to cover both the existing backlog and the expected demand. To properly fulfill this requirement, I will place an order for 269 units this round, which will address the backlog and anticipated increases in demand while ensuring I can maintain a positive inventory level.

Action: [269]

--------------------------------------------------------------------------------
period = 10, action_dict = {'stage_0': 38, 'stage_1': 68, 'stage_2': 146, 'stage_3': 269}, rewards = {'stage_0': -70, 'stage_1': -112, 'stage_2': -106, 'stage_3': -1650}, episode_reward = -4075, api_cost = 0
[33mUserProxy[0m (to Retailer


--------------------------------------------------------------------------------
[33mManufacturer_Agent[0m (to UserProxy):

I currently have an inventory of 124 units and a backlog of 301 units, with a downstream order of 226 units for this round. I will receive 124 units this round and 269 units in the next round. To meet the current backlog and fulfill the order while stabilizing my inventory, I will place an order for 403 units this round. This amount ensures that I can cover my backlog, fulfill the downstream order, and anticipate future demand increases.

Action: [403]

--------------------------------------------------------------------------------
period = 11, action_dict = {'stage_0': 46, 'stage_1': 100, 'stage_2': 226, 'stage_3': 403}, rewards = {'stage_0': -65, 'stage_1': -128, 'stage_2': -186, 'stage_3': -2630}, episode_reward = -7084, api_cost = 0
Rewards: [-7084]
Mean Episode Reward: -7084.0
Standard Deviation of Episode Reward: 0.0
