# AutoGen for Supply Chain Management

In [1]:
import os
import re
import sys
import time
import numpy as np
from typing import List
from tqdm.notebook import tqdm
from autogen import ConversableAgent
sys.path.append('../src')
from env import env_creator
from config import env_configs
from openai import AzureOpenAI

np.random.seed(42)



In [2]:
# llm_config = {"model": "gpt-4", "api_key": os.environ["OPENAI_API_KEY"]}

os.environ["OPENAI_API_BASE"] = "http://gavis-openai.openai.azure.com/"
os.environ["OPENAI_API_KEY"] = "2c40n3KzyzdevzaVINKQY2mvQIbMHxsMAdkCIYJUNqMnNLxenZNpJQQJ99AJACi0881XJ3w3AAABACOGXz9b"
os.environ["OPENAI_API_TYPE"] = "azure"
os.environ["OPENAI_API_VERSION"] = "2024-08-01-preview"  # Adjust version as needed
# Define the Azure OpenAI model deployment details
AZURE_MODEL_NAME = "gpt-4o"  # Replace with your deployed model's name


config_list = [
  {
    "model": AZURE_MODEL_NAME,
    "api_type": os.environ["OPENAI_API_TYPE"],
    "api_key": os.environ['OPENAI_API_KEY'],
    "base_url": "https://gavis-openai.openai.azure.com/",
    "api_version": os.environ["OPENAI_API_VERSION"]
  }
]



## Creating the Environment

In [3]:
env_config_name = "constant_demand"
env_config = env_configs[env_config_name]
im_env = env_creator(env_config)
print(env_config)

{'num_stages': 4, 'num_periods': 12, 'num_agents_per_stage': 4, 'init_inventories': [12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12], 'lead_times': [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2], 'demand_fn': <function <lambda> at 0x17ee8c1f0>, 'prod_capacities': [20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20], 'sale_prices': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'order_costs': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'backlog_costs': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'holding_costs': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'stage_names': ['retailer', 'wholesaler', 'distributor', 'manufacturer']}


## Getting Descriptions

In [4]:
def get_state_description(state):
    return (
        f" - Lead Time: {state['lead_time']} round(s)\n"
        f" - Inventory Level: {state['inventory']} unit(s)\n"
        f" - Current Backlog (you owing to the downstream): {state['backlog']} unit(s)\n"
        f" - Upstream Backlog (your upstream owing to you): {state['upstream_backlog']} unit(s)\n"
        f" - Previous Sales (in the recent round(s), from old to new): {state['sales']}\n"
        f" - Arriving Deliveries (in this and the next round(s), from near to far): {state['deliveries'][-state['lead_time']:]}"
    )

In [5]:
def get_demand_description(env_config_name):
    if env_config_name == "constant_demand":
        return "The expected demand at the retailer (stage 1) is a constant 4 units for all 12 rounds."
    elif env_config_name == "variable_demand":
        return "The expected demand at the retailer (stage 1) is a discrete uniform distribution U{0, 4} for all 12 rounds."
    elif env_config_name == "larger_demand":
        return "The expected demand at the retailer (stage 1) is a discrete uniform distribution U{0, 8} for all 12 rounds."
    elif env_config_name == "seasonal_demand":
        return "The expected demand at the retailer (stage 1) is a discrete uniform distribution U{0, 4} for the first 4 rounds, " \
            "and a discrete uniform distribution U{5, 8} for the last 8 rounds."
    elif env_config_name == "normal_demand":
        return "The expected demand at the retailer (stage 1) is a normal distribution N(4, 2^2), " \
            "truncated at 0, for all 12 rounds."
    else:
        raise KeyError(f"Error: {env_config_name} not implemented.")

print(get_demand_description(env_config_name))

The expected demand at the retailer (stage 1) is a constant 4 units for all 12 rounds.


## Creating Agents

In [6]:
user_proxy = ConversableAgent(
    name="UserProxy",
    llm_config=False,
    human_input_mode="NEVER",
)

In [7]:
def create_agents(stage_names: List[str], num_agents_per_stage: int, llm_config) -> List[ConversableAgent]:
    agents = []
    num_stages = len(stage_names)
    
    for stage, stage_name in enumerate(stage_names):
        for sa_ind in range(num_agents_per_stage):
            agent = ConversableAgent(
                name=f"{stage_name.capitalize()}Agent_{sa_ind}",
                system_message=f"You play a crucial role in a {num_stages}-stage supply chain as the stage {stage + 1} ({stage_name}). "
                    "Your goal is to minimize the total cost by managing inventory and orders effectively.",
                llm_config=llm_config,
                code_execution_config=False,
                human_input_mode="NEVER",
            )
            agents.append(agent)
        
    return agents

stage_agents = create_agents(env_config["stage_names"], env_config["num_agents_per_stage"], llm_config={"config_list": config_list})

In [8]:
for stage_agent in stage_agents:
    print(stage_agent.system_message)

You play a crucial role in a 4-stage supply chain as the stage 1 (retailer). Your goal is to minimize the total cost by managing inventory and orders effectively.
You play a crucial role in a 4-stage supply chain as the stage 1 (retailer). Your goal is to minimize the total cost by managing inventory and orders effectively.
You play a crucial role in a 4-stage supply chain as the stage 1 (retailer). Your goal is to minimize the total cost by managing inventory and orders effectively.
You play a crucial role in a 4-stage supply chain as the stage 1 (retailer). Your goal is to minimize the total cost by managing inventory and orders effectively.
You play a crucial role in a 4-stage supply chain as the stage 2 (wholesaler). Your goal is to minimize the total cost by managing inventory and orders effectively.
You play a crucial role in a 4-stage supply chain as the stage 2 (wholesaler). Your goal is to minimize the total cost by managing inventory and orders effectively.
You play a crucial

## Running Simulations

In [14]:
def run_simulation(env_config_name, im_env, user_proxy, stage_agents):
    demand_description = get_demand_description(env_config_name) 
    all_state_dicts = {}
    all_action_dicts = {}
    all_reward_dicts = {}
    episode_reward = 0
    api_cost = 0
    im_env.reset()
    
    for period in range(im_env.num_periods):
        state_dict = im_env.parse_state(im_env.state_dict)
        all_state_dicts[period] = state_dict
        action_dict = {}
        
        for stage in range(im_env.num_stages):
            for agent in range(im_env.num_agents_per_stage):
                stage_state = state_dict[f'stage_{stage}_agent_{agent}']
                
                if stage != 0:
                    downstream_order = f"Your downstream order from the stage {stage} for this round is {action_dict[f'stage_{stage - 1}_agent_{agent}']}. "
                else:
                    downstream_order = ""

                message = (
                    f"Now this is the round {period + 1}, "
                    f"and you are at the stage {stage + 1}: {im_env.stage_names[stage]} of {im_env.num_stages} in the supply chain. "
                    f"Given your current state:\n{get_state_description(stage_state)}\n\n"
                    f"{demand_description} {downstream_order}"
                    "What is your action (order quantity) for this round?\n\n"
                    "Golden rule of this game: Open orders should always equal to \"expected downstream orders + backlog\". "
                    "If open orders are larger than this, the inventory will rise (once the open orders arrive). "
                    "If open orders are smaller than this, the backlog will not go down and it may even rise. "
                    "Please consider the lead time and place your order in advance. "
                    "Remember that your upstream has its own lead time, so do not wait until your inventory runs out. "
                    "Also, avoid ordering too many units at once. "
                    "Try to spread your orders over multiple rounds to prevent the bullwhip effect. "
                    "Anticipate future demand changes and adjust your orders accordingly to maintain a stable inventory level.\n\n"
                    "Please state your reason in 1-2 sentences first "
                    "and then provide your action as a non-negative integer within brackets (e.g. [0])."
                )


                chat_result = user_proxy.initiate_chat(
                    stage_agents[stage],
                    message={'content': ''.join(message)},
                    summary_method="last_msg",
                    max_turns=1,
                    clear_history=False,
                )
                chat_summary = chat_result.summary
                api_cost += chat_result.cost['usage_including_cached_inference']['total_cost']

                match = re.search(r'\[(\d+)\]', chat_summary)
                if match:
                    stage_action = int(match.group(1))
                else:
                    stage_action = 0
                action_dict[f'stage_{stage}_agent_{agent}'] = stage_action
            
        next_states, rewards, terminations, truncations, infos = im_env.step(action_dict)
        next_state_dict = im_env.parse_state(next_states)
        all_state_dicts[period + 1] = next_state_dict
        all_action_dicts[period + 1] = action_dict
        all_reward_dicts[period + 1] = rewards
        episode_reward += sum(rewards.values())
        print(
            f"period = {period}, action_dict = {action_dict}, rewards = {rewards}, episode_reward = {episode_reward}, " \
            f"api_cost = {api_cost}")
        print('=' * 80)

    return episode_reward

In [15]:
rewards = []

for _ in tqdm(range(1)):
    stage_agents = create_agents(stage_names=env_config["stage_names"], num_agents_per_stage=env_config['num_agents_per_stage'], llm_config={'config_list':config_list})
    reward = run_simulation(env_config_name, im_env, user_proxy, stage_agents)
    rewards.append(reward)
    print(f"rewards = {rewards}")

mean_reward = np.mean(rewards)
std_reward = np.std(rewards)

print(f"Rewards: {rewards}")
print(f"Mean Episode Reward: {mean_reward}")
print(f"Standard Deviation of Episode Reward: {std_reward}")

  0%|          | 0/1 [00:00<?, ?it/s]

[33mUserProxy[0m (to RetailerAgent_0):

Now this is the round 1, and you are at the stage 1: retailer of 4 in the supply chain. Given your current state:
 - Lead Time: 2 round(s)
 - Inventory Level: 12 unit(s)
 - Current Backlog (you owing to the downstream): 0 unit(s)
 - Upstream Backlog (your upstream owing to you): 0 unit(s)
 - Previous Sales (in the recent round(s), from old to new): [0, 0]
 - Arriving Deliveries (in this and the next round(s), from near to far): [0, 0]

The expected demand at the retailer (stage 1) is a constant 4 units for all 12 rounds. What is your action (order quantity) for this round?

Golden rule of this game: Open orders should always equal to "expected downstream orders + backlog". If open orders are larger than this, the inventory will rise (once the open orders arrive). If open orders are smaller than this, the backlog will not go down and it may even rise. Please consider the lead time and place your order in advance. Remember that your upstream has 