# 初始化环境

In [5]:
import functools
import random
from copy import copy

import numpy as np
from gymnasium.spaces import Discrete, MultiDiscrete

from pettingzoo import ParallelEnv

In [None]:
class warehouse_env(ParallelEnv):

    """The metadata holds environment constants.

    The "name" metadata allows the environment to be pretty printed.
    """

    metadata = {
        "name": "warehouse_env_v0",
    }

    def __init__(self):
        """The init method takes in environment arguments.

        Should define the following attributes:
        - manager x and y coordinates
        - picker x and y coordinates
        - timestamp
        - possible_agents

        Note: as of v1.18.1, the action_spaces and observation_spaces attributes are deprecated.
        Spaces should be defined in the action_space() and observation_space() methods.
        If these methods are not overridden, spaces will be inferred from self.observation_spaces/action_spaces, raising a warning.

        These attributes should not be changed after initialization.
        """
        self.escape_y = None
        self.escape_x = None
        self.guard_y = None
        self.guard_x = None
        self.prisoner_y = None
        self.prisoner_x = None
        self.timestep = None
        self.possible_agents = ["manager", "picker"]
        self.height = 10 # 仓库高度
        self.width = 10

    def reset(self, seed=None, options=None):
        pass

    def step(self, actions):
        """Takes in an action for the current agent (specified by agent_selection).

        Needs to update:
        - prisoner x and y coordinates
        - guard x and y coordinates
        - terminations
        - truncations
        - rewards
        - timestamp
        - infos

        And any internal state used by observe() or render()
        """

        # Execute actions
        manager_action = actions["manager"]
        picker_action = actions["picker"]

        #经理的角色是宏观调控和任务分配，目标是优化整个系统的效率。
        #它的动作空间应该反映其决策的核心：分配什么任务给谁（或者决定分配哪个任务。
        #结合嵌套 Logit (NL) 策略，最核心的动作应该是：
        # 1. 观察当前的状态，利用 NL 模型计算出每个待分配概率的选择概率决定分配哪个任务给谁 
        # AssignTaskToPicker(task_id, picker_id)
        # 2. 观察当前的状态，
        

        # Update picker position
        if picker_action == "up" and self.picker_y > 0:
            self.picker_y -= 1
        elif picker_action == "down" and self.picker_y < self.height - 1:
            self.picker_y += 1
        elif picker_action == "left" and self.picker_x > 0:
            self.picker_x -= 1
        elif picker_action == "right" and self.picker_x < self.width - 1:
            self.picker_x += 1

        # Check termination conditions
        terminations = {a: False for a in self.agents}
        rewards = {a: 0 for a in self.agents}
        if self.prisoner_x == self.guard_x and self.prisoner_y == self.guard_y:
            rewards = {"prisoner": -1, "guard": 1}
            terminations = {a: True for a in self.agents}

        elif self.prisoner_x == self.escape_x and self.prisoner_y == self.escape_y:
            rewards = {"prisoner": 1, "guard": -1}
            terminations = {a: True for a in self.agents}

        # Check truncation conditions (overwrites termination conditions)
        truncations = {a: False for a in self.agents}
        if self.timestep > 100:
            rewards = {"prisoner": 0, "guard": 0}
            truncations = {"prisoner": True, "guard": True}
        self.timestep += 1

    def render(self):
        pass

    def observation_space(self, agent):
        return self.observation_spaces[agent]

    def action_space(self, agent):
        return self.action_spaces[agent]