# AutoGen for Supply Chain Management

In [1]:
import os
import re
import sys
import time
import numpy as np
from typing import List
from tqdm.notebook import tqdm
from autogen import ConversableAgent
from env import env_creator
from config import env_configs

np.random.seed(42)

Variable demand for t=0: 4


In [2]:
for name, config in env_configs.items():
    # Evaluate demand function at a specific time, say t=0.
    demand_value = config['demand_fn'](0)
    print(f"{name} demand for t=0: {demand_value}")

two_agent demand for t=0: 4
constant_demand demand for t=0: 4
variable_demand demand for t=0: 3
larger_demand demand for t=0: 7
seasonal_demand demand for t=0: 4
normal_demand demand for t=0: 1
increasing_demand demand for t=0: 5
cyclical_demand demand for t=0: 5
demand_shock demand for t=0: 5
stochastic_demand demand for t=0: 0


In [3]:
os.environ["DEEPSEEK_API_KEY"] = "YOUR_DEEPSEEK_KEY"
llm_config = {
    "model": "deepseek-chat",  # Replace with the correct DeepSeek model name
    "api_key": os.getenv("DEEPSEEK_API_KEY"),  # Fetch the API key from environment variables
    "base_url": "https://api.deepseek.com"   # DeepSeek's API endpoint
}

In [4]:
os.environ["OPENAI_API_KEY"] = "YOUR_OPENAI_KEY"

In [5]:
llm_config = {"model": "gpt-4o-mini", "api_key": os.getenv('OPENAI_API_KEY')}

## Creating the Environment

In [6]:
env_config_name ="demand_shock"
env_config = env_configs[env_config_name]
im_env = env_creator(env_config)

## Getting Descriptions

In [7]:
def get_state_description(state):
    return (
        f" - Lead Time: {state['lead_time']} round(s)\n"
        f" - Inventory Level: {state['inventory']} unit(s)\n"
        f" - Current Backlog (you owing to the downstream): {state['backlog']} unit(s)\n"
        f" - Upstream Backlog (your upstream owing to you): {state['upstream_backlog']} unit(s)\n"
        f" - Previous Sales (in the recent round(s), from old to new): {state['sales']}\n"
        f" - Arriving Deliveries (in this and the next round(s), from near to far): {state['deliveries'][-state['lead_time']:]}"
    )

In [8]:
def get_demand_description(env_config_name):
    if env_config_name == "constant_demand":
        return "The expected demand at the retailer (stage 1) is a constant 4 units for all 12 rounds."
    elif env_config_name == "variable_demand":
        return "The expected demand at the retailer (stage 1) is a discrete uniform distribution U{0, 4} for all 12 rounds."
    elif env_config_name == "larger_demand":
        return "The expected demand at the retailer (stage 1) is a discrete uniform distribution U{0, 8} for all 12 rounds."
    elif env_config_name == "seasonal_demand":
        return "The expected demand at the retailer (stage 1) is a discrete uniform distribution U{0, 4} for the first 4 rounds, " \
            "and a discrete uniform distribution U{5, 8} for the last 8 rounds."
    elif env_config_name == "normal_demand":
        return "The expected demand at the retailer (stage 1) is a normal distribution N(4, 2^2), " \
            "truncated at 0, for all 12 rounds."
    elif env_config_name == "increasing_demand":
        return "The expected demand at the retailer (stage 1) is a linearly increasing demand by starting with an initial value 6, " \
            "and growing by 1 unit every period"
    elif env_config_name == "stochastic_demand":
        return "The expected demand at the retailer (stage 1) is an Integer-Valued Autoregressive INAR(1) process with a thinning probability of 0.5, meaning 50% of the previous period's " \
            "demand carries over. New demand is added as Poisson arrivals with a mean of 2, ensuring the overall demand remains an integer count."
    elif env_config_name == "cyclical_demand":
        return "The expected demand at the retailer (stage 1) is computed as a seasonal sine wave—with a 12-round period, a 5-unit amplitude, " \
            "and a 6-unit upward shift—whose value is rounded to yield an integer."
    elif env_config_name == "demand_shock":
        return "The expected demand at the retailer (stage 1) is is normally 5, but it jumps by 8 units to 13 during periods 8 through 10," \
            "capturing a temporary demand shock."
    else:
        raise KeyError(f"Error: {env_config_name} not implemented.")

print(get_demand_description(env_config_name))

The expected demand at the retailer (stage 1) is is normally 5, but it jumps by 8 units to 13 during periods 8 through 10,capturing a temporary demand shock.


## Creating Agents

In [9]:
user_proxy = ConversableAgent(
    name="UserProxy",
    llm_config=False,
    human_input_mode="NEVER",
)

In [10]:
def create_agents(stage_names: List[str], llm_config) -> List[ConversableAgent]:
    agents = []
    num_stages = len(stage_names)
    
    for stage, stage_name in enumerate(stage_names):
        agent = ConversableAgent(
            name=f"{stage_name.capitalize()}_Agent",
            system_message=f"You play a crucial role in a {num_stages}-stage supply chain as the stage {stage + 1} ({stage_name}). "
                "Your goal is to minimize the total cost by managing inventory and orders effectively.",
            llm_config=llm_config,
            code_execution_config=False,
            human_input_mode="NEVER",
        )
        agents.append(agent)
        
    return agents

stage_agents = create_agents(env_config["stage_names"], llm_config)



## Running Simulations

In [11]:
def run_simulation(env_config_name, im_env, user_proxy, stage_agents):
    demand_description = get_demand_description(env_config_name) 
    all_state_dicts = {}
    all_action_dicts = {}
    all_reward_dicts = {}
    episode_reward = 0
    api_cost = 0
    im_env.reset()
    
    if env_config_name == 'stochastic_demand':
        print("Generating a new stochastic demand series...")  
        env_config["demand_fn"].generate_new_series()  # Reset demand for a new episode
    
    
    for period in range(im_env.num_periods):
        state_dict = im_env.parse_state(im_env.state_dict)
        all_state_dicts[period] = state_dict
        action_dict = {}
        
        for stage in range(im_env.num_stages):
            stage_state = state_dict[f'stage_{stage}']
            
            if stage != 0:
                downstream_order = f"Your downstream order from the stage {stage} for this round is {action_dict[f'stage_{stage - 1}']}. "
            else:
                downstream_order = ""

            message = (
                f"Now this is the round {period + 1}, "
                f"and you are at the stage {stage + 1} of {im_env.num_stages} in the supply chain. "
                f"Given your current state:\n{get_state_description(stage_state)}\n\n"
                f"{demand_description} {downstream_order}"
                "What is your action (order quantity) for this round?\n\n"
                "Please state your reason in 1-2 sentences first "
                "and then provide your action as a non-negative integer within brackets (e.g. [0])."
            )

            chat_result = user_proxy.initiate_chat(
                stage_agents[stage],
                message=message,
                summary_method="last_msg",
                max_turns=1,
                clear_history=False,
            )
            chat_summary = chat_result.summary
            api_cost += chat_result.cost['usage_including_cached_inference']['total_cost']

            match = re.search(r'\[(\d+)\]', chat_summary)
            if match:
                stage_action = int(match.group(1))
            else:
                stage_action = 0
            action_dict[f'stage_{stage}'] = stage_action
            
        next_states, rewards, terminations, truncations, infos = im_env.step(action_dict)
        next_state_dict = im_env.parse_state(next_states)
        all_state_dicts[period + 1] = next_state_dict
        all_action_dicts[period + 1] = action_dict
        all_reward_dicts[period + 1] = rewards
        episode_reward += sum(rewards.values())
        print(
            f"period = {period}, action_dict = {action_dict}, rewards = {rewards}, episode_reward = {episode_reward}, " \
            f"api_cost = {api_cost}")
        print('=' * 80)

    return episode_reward

In [12]:
from tqdm import tqdm
import numpy as np

rewards = []

for _ in tqdm(range(1)):


    stage_agents = create_agents(env_config["stage_names"], llm_config)
    reward = run_simulation(env_config_name, im_env, user_proxy, stage_agents)
    rewards.append(reward)
    print(f"rewards = {rewards}")

mean_reward = np.mean(rewards)
std_reward = np.std(rewards)

print(f"Rewards: {rewards}")
print(f"Mean Episode Reward: {mean_reward}")
print(f"Standard Deviation of Episode Reward: {std_reward}")



  0%|          | 0/1 [00:00<?, ?it/s]

[33mUserProxy[0m (to Retailer_Agent):

Now this is the round 1, and you are at the stage 1 of 4 in the supply chain. Given your current state:
 - Lead Time: 2 round(s)
 - Inventory Level: 12 unit(s)
 - Current Backlog (you owing to the downstream): 0 unit(s)
 - Upstream Backlog (your upstream owing to you): 0 unit(s)
 - Previous Sales (in the recent round(s), from old to new): [0, 0]
 - Arriving Deliveries (in this and the next round(s), from near to far): [0, 0]

The expected demand at the retailer (stage 1) is is normally 5, but it jumps by 8 units to 13 during periods 8 through 10,capturing a temporary demand shock. What is your action (order quantity) for this round?

Please state your reason in 1-2 sentences first and then provide your action as a non-negative integer within brackets (e.g. [0]).

--------------------------------------------------------------------------------
[33mRetailer_Agent[0m (to UserProxy):

Given the expected demand of 5 units plus a temporary jump to 1

  0%|          | 0/1 [00:08<?, ?it/s]


KeyboardInterrupt: 


--------------------------------------------------------------------------------
[33mRetailer_Agent[0m (to UserProxy):

To prepare for the demand shock starting in round 8, I will place a larger order to ensure sufficient inventory is available. The arriving delivery of 12 units in this round will help replenish stock, but I need to account for the upstream backlog and the expected spike in demand.

[15]

--------------------------------------------------------------------------------
[33mUserProxy[0m (to Wholesaler_Agent):

Now this is the round 7, and you are at the stage 2 of 4 in the supply chain. Given your current state:
 - Lead Time: 2 round(s)
 - Inventory Level: 0 unit(s)
 - Current Backlog (you owing to the downstream): 24 unit(s)
 - Upstream Backlog (your upstream owing to you): 29 unit(s)
 - Previous Sales (in the recent round(s), from old to new): [12, 0]
 - Arriving Deliveries (in this and the next round(s), from near to far): [13, 14]

The expected demand at the ret


--------------------------------------------------------------------------------
[33mDistributor_Agent[0m (to UserProxy):

To address the backlog of 20 units and meet the downstream order of 13 units, I need to place an order that covers the current demand and replenishes inventory. Considering the lead time of 2 rounds and the temporary demand shock, I will order enough to fulfill the downstream order and maintain a buffer.

[33]

--------------------------------------------------------------------------------
[33mUserProxy[0m (to Manufacturer_Agent):

Now this is the round 8, and you are at the stage 4 of 4 in the supply chain. Given your current state:
 - Lead Time: 2 round(s)
 - Inventory Level: 20 unit(s)
 - Current Backlog (you owing to the downstream): 95 unit(s)
 - Upstream Backlog (your upstream owing to you): 0 unit(s)
 - Previous Sales (in the recent round(s), from old to new): [26, 26]
 - Arriving Deliveries (in this and the next round(s), from near to far): [43, 44]




--------------------------------------------------------------------------------
[33mRetailer_Agent[0m (to UserProxy):

To meet the increased demand of 13 units in this final round of the demand shock and prepare for the return to normal demand, I will place a moderate order to replenish inventory. The arriving deliveries of 14 and 22 units in the next two rounds will help rebuild stock levels.

[10]

--------------------------------------------------------------------------------
[33mUserProxy[0m (to Wholesaler_Agent):

Now this is the round 10, and you are at the stage 2 of 4 in the supply chain. Given your current state:
 - Lead Time: 2 round(s)
 - Inventory Level: 2 unit(s)
 - Current Backlog (you owing to the downstream): 16 unit(s)
 - Upstream Backlog (your upstream owing to you): 0 unit(s)
 - Previous Sales (in the recent round(s), from old to new): [14, 22]
 - Arriving Deliveries (in this and the next round(s), from near to far): [24, 22]

The expected demand at the retail


--------------------------------------------------------------------------------
[33mDistributor_Agent[0m (to UserProxy):

To maintain sufficient inventory and meet the downstream order of 5 units, I will place an order that covers the current demand and replenishes inventory. Considering the lead time of 2 rounds and the temporary demand shock, I will order enough to fulfill the downstream order and maintain a buffer.

[5]

--------------------------------------------------------------------------------
[33mUserProxy[0m (to Manufacturer_Agent):

Now this is the round 11, and you are at the stage 4 of 4 in the supply chain. Given your current state:
 - Lead Time: 2 round(s)
 - Inventory Level: 62 unit(s)
 - Current Backlog (you owing to the downstream): 82 unit(s)
 - Upstream Backlog (your upstream owing to you): 0 unit(s)
 - Previous Sales (in the recent round(s), from old to new): [26, 26]
 - Arriving Deliveries (in this and the next round(s), from near to far): [22, 10]

The ex

100%|██████████| 1/1 [00:01<00:00,  1.41s/it]

period = 11, action_dict = {'stage_0': 5, 'stage_1': 5, 'stage_2': 5, 'stage_3': 5}, rewards = {'stage_0': -35, 'stage_1': -22, 'stage_2': -171, 'stage_3': 23}, episode_reward = -2564, api_cost = 0
rewards = [-2564]
Rewards: [-2564]
Mean Episode Reward: -2564.0
Standard Deviation of Episode Reward: 0.0



