In [39]:
import platform
print(platform.python_version())


3.8.5


In [18]:
try:
    import mlagents
    from mlagents_envs.environment import UnityEnvironment as UE
    from mlagents_envs.side_channel.engine_configuration_channel import EngineConfigurationChannel
    print("ml-agents already installed")
except ImportError:
    !pip install mlagents==0.26.0
    print("Installed ml-agents")

ml-agents already installed


In [19]:
# -----------------
# This code is used to close an env that might not have been closed before
try:
    env.close()
except:
    pass
# -----------------

In [22]:
# env = UE(file_name='run31', seed=1, side_channels=[])
channel = EngineConfigurationChannel()
env = UE("run31", base_port=5000, side_channels=[channel])
channel.set_configuration_parameters(time_scale = 20.0)

In [23]:
env.reset()

In [24]:
behavior_name = list(env.behavior_specs)[0]
print(f"Name of the behavior : {behavior_name}")
spec = env.behavior_specs[behavior_name]

Name of the behavior : Hummingbird?team=0


In [25]:
spec

BehaviorSpec(observation_specs=[ObservationSpec(shape=(44,), dimension_property=(<DimensionProperty.NONE: 1>,), observation_type=<ObservationType.DEFAULT: 0>, name='RayPerceptionSensor'), ObservationSpec(shape=(3,), dimension_property=(<DimensionProperty.NONE: 1>,), observation_type=<ObservationType.DEFAULT: 0>, name='StackingSensor_size3_VectorSensor_size1')], action_spec=ActionSpec(continuous_size=0, discrete_branches=(3, 3, 3)))

In [26]:
print("Number of observations : ", len(spec.observation_specs))

Number of observations :  2


In [27]:
# Is there a visual observation ?
# Visual observation have 3 dimensions: Height, Width and number of channels
vis_obs = any(len(spec.shape) == 3 for spec in spec.observation_specs)
print("Is there a visual observation ?", vis_obs)

Is there a visual observation ? False


In [28]:
# Is the Action continuous or multi-discrete ?
if spec.action_spec.continuous_size > 0:
    print(f"There are {spec.action_spec.continuous_size} continuous actions")
if spec.action_spec.is_discrete():
    print(f"There are {spec.action_spec.discrete_size} discrete actions")


# How many actions are possible ?
#print(f"There are {spec.action_size} action(s)")

# For discrete actions only : How many different options does each action has ?
if spec.action_spec.discrete_size > 0:
    for action, branch_size in enumerate(spec.action_spec.discrete_branches):
        print(f"Action number {action} has {branch_size} different options")
    


There are 3 discrete actions
Action number 0 has 3 different options
Action number 1 has 3 different options
Action number 2 has 3 different options


In [29]:
decision_steps, terminal_steps = env.get_steps(behavior_name)

In [30]:
env.set_actions(behavior_name, spec.action_spec.empty_action(len(decision_steps)))

In [31]:
env.step()

In [32]:
import matplotlib.pyplot as plt
%matplotlib inline

for index, obs_spec in enumerate(spec.observation_specs):
    if len(obs_spec.shape) == 3:
        print("Here is the first visual observation")
        plt.imshow(decision_steps.obs[index][0,:,:,:])
        plt.show()

for index, obs_spec in enumerate(spec.observation_specs):
    if len(obs_spec.shape) == 1:
        print("First vector observations : ", decision_steps.obs[index][0,:])

First vector observations :  [0.         1.         0.         0.23652978 0.         1.
 0.         0.2127816  0.         1.         0.         0.28532526
 0.         1.         0.         0.20440233 0.         1.
 0.         0.24007748 0.         1.         0.         0.20857991
 0.         1.         0.         0.21439953 0.         1.
 0.         0.22667955 0.         1.         0.         0.204668
 0.         1.         0.         0.26548997 0.         1.
 0.         0.20757797]
First vector observations :  [0. 0. 0.]


In [33]:
len(decision_steps.obs)

2

In [34]:
decision_steps.obs

[array([[0.        , 1.        , 0.        , 0.23652978, 0.        ,
         1.        , 0.        , 0.2127816 , 0.        , 1.        ,
         0.        , 0.28532526, 0.        , 1.        , 0.        ,
         0.20440233, 0.        , 1.        , 0.        , 0.24007748,
         0.        , 1.        , 0.        , 0.20857991, 0.        ,
         1.        , 0.        , 0.21439953, 0.        , 1.        ,
         0.        , 0.22667955, 0.        , 1.        , 0.        ,
         0.204668  , 0.        , 1.        , 0.        , 0.26548997,
         0.        , 1.        , 0.        , 0.20757797]], dtype=float32),
 array([[0., 0., 0.]], dtype=float32)]

In [35]:
for obs in decision_steps.obs:
    print(len(obs[0]))

44
3


In [36]:
action = spec.action_spec.random_action(len(decision_steps))
action.discrete

array([[0, 1, 2]], dtype=int32)

In [49]:
for episode in range(1):
    env.reset()
    decision_steps, terminal_steps = env.get_steps(behavior_name)
    tracked_agent = -1 # -1 indicates not yet tracking
    done = False # For the tracked_agent
    episode_rewards = 0 # For the tracked_agent
    while not done:
        # Track the first agent we see if not tracking 
        # Note : len(decision_steps) = [number of agents that requested a decision]
        if tracked_agent == -1 and len(decision_steps) >= 1:
            tracked_agent = decision_steps.agent_id[0] 

        # Generate an action for all agents
        print(decision_steps[0])
        action = spec.action_spec.random_action(len(decision_steps))
#         print(action.discrete)
        # Set the actions
        env.set_actions(behavior_name, action)

        # Move the simulation forward
        env.step()

        # Get the new simulation results
        decision_steps, terminal_steps = env.get_steps(behavior_name)
        if tracked_agent in decision_steps: # The agent requested a decision
            episode_rewards += decision_steps[tracked_agent].reward
        if tracked_agent in terminal_steps: # The agent terminated its episode
            episode_rewards += terminal_steps[tracked_agent].reward
            done = True
    print(f"Total rewards for episode {episode} is {episode_rewards}")

DecisionStep(obs=[array([0.        , 1.        , 0.        , 0.20258905, 0.        ,
       1.        , 0.        , 0.21149291, 0.        , 1.        ,
       0.        , 0.20615737, 0.        , 1.        , 0.        ,
       0.23587246, 0.        , 1.        , 0.        , 0.22336105,
       0.        , 1.        , 0.        , 0.28579575, 0.        ,
       1.        , 0.        , 0.26058814, 0.        , 1.        ,
       0.        , 0.2364544 , 0.        , 1.        , 0.        ,
       0.25763774, 1.        , 0.        , 0.        , 0.05811008,
       0.        , 1.        , 0.        , 0.22361378], dtype=float32), array([0., 0., 0.], dtype=float32)], reward=0.049999997, agent_id=0, action_mask=[array([False, False, False]), array([False, False, False]), array([False, False, False])], group_id=0, group_reward=0.0)
DecisionStep(obs=[array([1.        , 0.        , 0.        , 0.04938836, 1.        ,
       0.        , 0.        , 0.06931686, 1.        , 0.        ,
       0.        , 

DecisionStep(obs=[array([1.        , 0.        , 0.        , 0.02609956, 1.        ,
       0.        , 0.        , 0.02691203, 1.        , 0.        ,
       0.        , 0.02714112, 1.        , 0.        , 0.        ,
       0.03007886, 1.        , 0.        , 0.        , 0.03033603,
       1.        , 0.        , 0.        , 0.04427288, 1.        ,
       0.        , 0.        , 0.06151402, 0.        , 1.        ,
       0.        , 0.23254126, 0.        , 1.        , 0.        ,
       0.20717324, 0.        , 1.        , 0.        , 0.26724955,
       0.        , 1.        , 0.        , 0.19984712], dtype=float32), array([0., 0., 0.], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False, False, False]), array([False, False, False]), array([False, False, False])], group_id=0, group_reward=0.0)
DecisionStep(obs=[array([1.        , 0.        , 0.        , 0.02629238, 1.        ,
       0.        , 0.        , 0.02595958, 1.        , 0.        ,
       0.        , 0.028801

DecisionStep(obs=[array([0.        , 1.        , 0.        , 0.20407139, 0.        ,
       1.        , 0.        , 0.24796864, 1.        , 0.        ,
       0.        , 0.0387903 , 0.        , 1.        , 0.        ,
       0.22836521, 1.        , 0.        , 0.        , 0.02530293,
       0.        , 1.        , 0.        , 0.20504917, 1.        ,
       0.        , 0.        , 0.02095279, 0.        , 1.        ,
       0.        , 0.1966538 , 1.        , 0.        , 0.        ,
       0.02043993, 0.        , 1.        , 0.        , 0.20035344,
       1.        , 0.        , 0.        , 0.02126597], dtype=float32), array([0., 0., 0.], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False, False, False]), array([False, False, False]), array([False, False, False])], group_id=0, group_reward=0.0)
DecisionStep(obs=[array([0.        , 1.        , 0.        , 0.18427019, 0.        ,
       1.        , 0.        , 0.21040714, 1.        , 0.        ,
       0.        , 0.023421

       1.        , 0.        , 0.        , 0.01707965], dtype=float32), array([0., 0., 0.], dtype=float32)], reward=0.049999997, agent_id=0, action_mask=[array([False, False, False]), array([False, False, False]), array([False, False, False])], group_id=0, group_reward=0.0)
DecisionStep(obs=[array([1.        , 0.        , 0.        , 0.0246366 , 0.        ,
       1.        , 0.        , 0.16154242, 1.        , 0.        ,
       0.        , 0.02096637, 0.        , 1.        , 0.        ,
       0.1704603 , 1.        , 0.        , 0.        , 0.01925248,
       0.        , 1.        , 0.        , 0.19252636, 1.        ,
       0.        , 0.        , 0.01883517, 0.        , 1.        ,
       0.        , 0.23735234, 1.        , 0.        , 0.        ,
       0.01957591, 0.        , 1.        , 0.        , 0.20541367,
       1.        , 0.        , 0.        , 0.02548898], dtype=float32), array([0., 0., 0.], dtype=float32)], reward=0.049999997, agent_id=0, action_mask=[array([False, Fal

DecisionStep(obs=[array([1.        , 0.        , 0.        , 0.0306082 , 1.        ,
       0.        , 0.        , 0.03156983, 1.        , 0.        ,
       0.        , 0.03415142, 1.        , 0.        , 0.        ,
       0.03475907, 1.        , 0.        , 0.        , 0.04560624,
       1.        , 0.        , 0.        , 0.05133442, 0.        ,
       1.        , 0.        , 0.23500901, 0.        , 1.        ,
       0.        , 0.17230079, 0.        , 1.        , 0.        ,
       0.24290134, 0.        , 1.        , 0.        , 0.18346775,
       0.        , 1.        , 0.        , 0.26780495], dtype=float32), array([0., 0., 0.], dtype=float32)], reward=0.049999997, agent_id=0, action_mask=[array([False, False, False]), array([False, False, False]), array([False, False, False])], group_id=0, group_reward=0.0)
DecisionStep(obs=[array([1.        , 0.        , 0.        , 0.02903995, 1.        ,
       0.        , 0.        , 0.02846587, 1.        , 0.        ,
       0.        , 

DecisionStep(obs=[array([1.        , 0.        , 0.        , 0.04777866, 0.        ,
       1.        , 0.        , 0.17306851, 1.        , 0.        ,
       0.        , 0.0403995 , 0.        , 1.        , 0.        ,
       0.16744548, 1.        , 0.        , 0.        , 0.04055957,
       0.        , 1.        , 0.        , 0.1720775 , 1.        ,
       0.        , 0.        , 0.04353159, 0.        , 1.        ,
       0.        , 0.18848959, 1.        , 0.        , 0.        ,
       0.06375527, 0.        , 1.        , 0.        , 0.22300777,
       0.        , 1.        , 0.        , 0.25341853], dtype=float32), array([0., 0., 0.], dtype=float32)], reward=0.049999997, agent_id=0, action_mask=[array([False, False, False]), array([False, False, False]), array([False, False, False])], group_id=0, group_reward=0.0)
DecisionStep(obs=[array([1.        , 0.        , 0.        , 0.04768229, 0.        ,
       1.        , 0.        , 0.17587401, 1.        , 0.        ,
       0.        , 

DecisionStep(obs=[array([0.        , 1.        , 0.        , 0.28414416, 0.        ,
       1.        , 0.        , 0.2701007 , 0.        , 1.        ,
       0.        , 0.3197668 , 1.        , 0.        , 0.        ,
       0.06302106, 0.        , 1.        , 0.        , 0.3356416 ,
       1.        , 0.        , 0.        , 0.05520091, 0.        ,
       1.        , 0.        , 0.28003016, 1.        , 0.        ,
       0.        , 0.05447435, 0.        , 1.        , 0.        ,
       0.2531282 , 1.        , 0.        , 0.        , 0.06276799,
       0.        , 1.        , 0.        , 0.24417672], dtype=float32), array([0., 0., 0.], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False, False, False]), array([False, False, False]), array([False, False, False])], group_id=0, group_reward=0.0)
DecisionStep(obs=[array([0.        , 1.        , 0.        , 0.2821442 , 0.        ,
       1.        , 0.        , 0.26819956, 0.        , 1.        ,
       0.        , 0.317516

       0.        , 1.        , 0.        , 0.14248946], dtype=float32), array([0., 0., 0.], dtype=float32)], reward=0.049999997, agent_id=0, action_mask=[array([False, False, False]), array([False, False, False]), array([False, False, False])], group_id=0, group_reward=0.0)
DecisionStep(obs=[array([0.        , 1.        , 0.        , 0.2657317 , 0.        ,
       1.        , 0.        , 0.2410208 , 0.        , 1.        ,
       0.        , 0.21021271, 0.        , 1.        , 0.        ,
       0.23318963, 0.        , 1.        , 0.        , 0.17051241,
       0.        , 1.        , 0.        , 0.23963982, 0.        ,
       1.        , 0.        , 0.15096952, 0.        , 1.        ,
       0.        , 0.2624952 , 0.        , 1.        , 0.        ,
       0.1430714 , 0.        , 1.        , 0.        , 0.3105651 ,
       0.        , 1.        , 0.        , 0.14409335], dtype=float32), array([0., 0., 0.], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False, False, Fals

DecisionStep(obs=[array([0.        , 1.        , 0.        , 0.17737497, 0.        ,
       1.        , 0.        , 0.21552892, 0.        , 1.        ,
       0.        , 0.1587083 , 0.        , 1.        , 0.        ,
       0.25814062, 0.        , 1.        , 0.        , 0.15175349,
       0.        , 1.        , 0.        , 0.23178406, 0.        ,
       1.        , 0.        , 0.1541563 , 0.        , 1.        ,
       0.        , 0.22229366, 0.        , 1.        , 0.        ,
       0.16669907, 0.        , 1.        , 0.        , 0.22647516,
       0.        , 1.        , 0.        , 0.19401053], dtype=float32), array([0., 0., 0.], dtype=float32)], reward=0.049999997, agent_id=0, action_mask=[array([False, False, False]), array([False, False, False]), array([False, False, False])], group_id=0, group_reward=0.0)
DecisionStep(obs=[array([0.        , 1.        , 0.        , 0.18059297, 0.        ,
       1.        , 0.        , 0.21943912, 0.        , 1.        ,
       0.        , 

DecisionStep(obs=[array([0.        , 1.        , 0.        , 0.26253515, 0.        ,
       1.        , 0.        , 0.31900772, 0.        , 1.        ,
       0.        , 0.23490614, 0.        , 1.        , 0.        ,
       0.29852468, 0.        , 1.        , 0.        , 0.22461201,
       0.        , 1.        , 0.        , 0.26804504, 0.        ,
       1.        , 0.        , 0.2281683 , 0.        , 1.        ,
       0.        , 0.25707015, 0.        , 1.        , 0.        ,
       0.24673285, 0.        , 1.        , 0.        , 0.261906  ,
       0.        , 1.        , 0.        , 0.24281266], dtype=float32), array([0., 0., 0.], dtype=float32)], reward=0.049999997, agent_id=0, action_mask=[array([False, False, False]), array([False, False, False]), array([False, False, False])], group_id=0, group_reward=0.0)
DecisionStep(obs=[array([0.        , 1.        , 0.        , 0.30238283, 0.        ,
       1.        , 0.        , 0.31173745, 0.        , 1.        ,
       0.        , 

DecisionStep(obs=[array([0.        , 1.        , 0.        , 0.240836  , 0.        ,
       1.        , 0.        , 0.249665  , 0.        , 1.        ,
       0.        , 0.2467696 , 1.        , 0.        , 0.        ,
       0.06819221, 0.        , 1.        , 0.        , 0.26941267,
       1.        , 0.        , 0.        , 0.05698504, 0.        ,
       1.        , 0.        , 0.31739163, 1.        , 0.        ,
       0.        , 0.05443179, 0.        , 1.        , 0.        ,
       0.25988588, 1.        , 0.        , 0.        , 0.0606092 ,
       0.        , 1.        , 0.        , 0.22760262], dtype=float32), array([0., 0., 0.], dtype=float32)], reward=0.049999997, agent_id=0, action_mask=[array([False, False, False]), array([False, False, False]), array([False, False, False])], group_id=0, group_reward=0.0)
DecisionStep(obs=[array([0.        , 1.        , 0.        , 0.24153273, 0.        ,
       1.        , 0.        , 0.23981994, 0.        , 1.        ,
       0.        , 

DecisionStep(obs=[array([1.        , 0.        , 0.        , 0.06107131, 1.        ,
       0.        , 0.        , 0.05333437, 0.        , 1.        ,
       0.        , 0.25890085, 1.        , 0.        , 0.        ,
       0.05601126, 0.        , 1.        , 0.        , 0.24755494,
       1.        , 0.        , 0.        , 0.0689126 , 0.        ,
       1.        , 0.        , 0.2514742 , 0.        , 1.        ,
       0.        , 0.19080319, 0.        , 1.        , 0.        ,
       0.2719347 , 0.        , 1.        , 0.        , 0.19439273,
       0.        , 1.        , 0.        , 0.31648678], dtype=float32), array([0., 0., 0.], dtype=float32)], reward=0.049999997, agent_id=0, action_mask=[array([False, False, False]), array([False, False, False]), array([False, False, False])], group_id=0, group_reward=0.0)
DecisionStep(obs=[array([1.        , 0.        , 0.        , 0.05459903, 1.        ,
       0.        , 0.        , 0.05486688, 1.        , 0.        ,
       0.        , 

DecisionStep(obs=[array([1.        , 0.        , 0.        , 0.05447363, 1.        ,
       0.        , 0.        , 0.06198249, 1.        , 0.        ,
       0.        , 0.05495572, 0.        , 1.        , 0.        ,
       0.19690432, 1.        , 0.        , 0.        , 0.0637047 ,
       0.        , 1.        , 0.        , 0.20235129, 0.        ,
       1.        , 0.        , 0.25317878, 0.        , 1.        ,
       0.        , 0.22165072, 0.        , 1.        , 0.        ,
       0.239933  , 0.        , 1.        , 0.        , 0.25674352,
       0.        , 1.        , 0.        , 0.24164641], dtype=float32), array([0., 0., 0.], dtype=float32)], reward=0.049999997, agent_id=0, action_mask=[array([False, False, False]), array([False, False, False]), array([False, False, False])], group_id=0, group_reward=0.0)
DecisionStep(obs=[array([1.        , 0.        , 0.        , 0.06242618, 0.        ,
       1.        , 0.        , 0.19862162, 1.        , 0.        ,
       0.        , 

DecisionStep(obs=[array([0.        , 1.        , 0.        , 0.22185239, 0.        ,
       1.        , 0.        , 0.24227561, 0.        , 1.        ,
       0.        , 0.19850442, 0.        , 1.        , 0.        ,
       0.20033358, 0.        , 1.        , 0.        , 0.18980515,
       0.        , 1.        , 0.        , 0.17987974, 1.        ,
       0.        , 0.        , 0.0577983 , 0.        , 1.        ,
       0.        , 0.17251498, 1.        , 0.        , 0.        ,
       0.04827728, 0.        , 1.        , 0.        , 0.17576057,
       1.        , 0.        , 0.        , 0.04558873], dtype=float32), array([0., 0., 0.], dtype=float32)], reward=0.049999997, agent_id=0, action_mask=[array([False, False, False]), array([False, False, False]), array([False, False, False])], group_id=0, group_reward=0.0)
DecisionStep(obs=[array([0.        , 1.        , 0.        , 0.20149249, 0.        ,
       1.        , 0.        , 0.23007247, 0.        , 1.        ,
       0.        , 

DecisionStep(obs=[array([0.        , 1.        , 0.        , 0.21028677, 0.        ,
       1.        , 0.        , 0.19073226, 0.        , 1.        ,
       0.        , 0.25091854, 0.        , 1.        , 0.        ,
       0.18453534, 0.        , 1.        , 0.        , 0.22052352,
       0.        , 1.        , 0.        , 0.18964015, 0.        ,
       1.        , 0.        , 0.19524822, 0.        , 1.        ,
       0.        , 0.20772728, 1.        , 0.        , 0.        ,
       0.07277134, 0.        , 1.        , 0.        , 0.24576846,
       1.        , 0.        , 0.        , 0.04087997], dtype=float32), array([0., 0., 0.], dtype=float32)], reward=0.049999997, agent_id=0, action_mask=[array([False, False, False]), array([False, False, False]), array([False, False, False])], group_id=0, group_reward=0.0)
DecisionStep(obs=[array([0.        , 1.        , 0.        , 0.23986386, 0.        ,
       1.        , 0.        , 0.20609812, 0.        , 1.        ,
       0.        , 

DecisionStep(obs=[array([1.        , 0.        , 0.        , 0.05251805, 0.        ,
       1.        , 0.        , 0.20040624, 1.        , 0.        ,
       0.        , 0.04147992, 0.        , 1.        , 0.        ,
       0.22170506, 1.        , 0.        , 0.        , 0.04066066,
       0.        , 1.        , 0.        , 0.26573357, 1.        ,
       0.        , 0.        , 0.04336974, 0.        , 1.        ,
       0.        , 0.2251836 , 1.        , 0.        , 0.        ,
       0.05049053, 0.        , 1.        , 0.        , 0.20009767,
       0.        , 1.        , 0.        , 0.23501177], dtype=float32), array([0., 0., 0.], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False, False, False]), array([False, False, False]), array([False, False, False])], group_id=0, group_reward=0.0)
DecisionStep(obs=[array([1.        , 0.        , 0.        , 0.04191026, 1.        ,
       0.        , 0.        , 0.05518297, 1.        , 0.        ,
       0.        , 0.039955

DecisionStep(obs=[array([1.        , 0.        , 0.        , 0.05080375, 0.        ,
       1.        , 0.        , 0.21194972, 1.        , 0.        ,
       0.        , 0.04726599, 0.        , 1.        , 0.        ,
       0.23447542, 1.        , 0.        , 0.        , 0.04759557,
       0.        , 1.        , 0.        , 0.28103995, 1.        ,
       0.        , 0.        , 0.05511796, 0.        , 1.        ,
       0.        , 0.23919792, 0.        , 1.        , 0.        ,
       0.25388217, 0.        , 1.        , 0.        , 0.21255076,
       0.        , 1.        , 0.        , 0.22234483], dtype=float32), array([0., 0., 0.], dtype=float32)], reward=0.049999997, agent_id=0, action_mask=[array([False, False, False]), array([False, False, False]), array([False, False, False])], group_id=0, group_reward=0.0)
DecisionStep(obs=[array([1.        , 0.        , 0.        , 0.04773924, 1.        ,
       0.        , 0.        , 0.05492992, 1.        , 0.        ,
       0.        , 

DecisionStep(obs=[array([0.        , 1.        , 0.        , 0.23452885, 0.        ,
       1.        , 0.        , 0.26456982, 0.        , 1.        ,
       0.        , 0.20984696, 0.        , 1.        , 0.        ,
       0.21876802, 1.        , 0.        , 0.        , 0.0645474 ,
       0.        , 1.        , 0.        , 0.19643183, 1.        ,
       0.        , 0.        , 0.04996762, 0.        , 1.        ,
       0.        , 0.18838924, 1.        , 0.        , 0.        ,
       0.04763253, 0.        , 1.        , 0.        , 0.19193333,
       1.        , 0.        , 0.        , 0.05038977], dtype=float32), array([0., 0., 0.], dtype=float32)], reward=0.049999997, agent_id=0, action_mask=[array([False, False, False]), array([False, False, False]), array([False, False, False])], group_id=0, group_reward=0.0)
DecisionStep(obs=[array([0.        , 1.        , 0.        , 0.2150548 , 0.        ,
       1.        , 0.        , 0.24555819, 0.        , 1.        ,
       0.        , 

DecisionStep(obs=[array([0.        , 1.        , 0.        , 0.21964286, 0.        ,
       1.        , 0.        , 0.19921827, 0.        , 1.        ,
       0.        , 0.2620825 , 0.        , 1.        , 0.        ,
       0.19274563, 0.        , 1.        , 0.        , 0.2305144 ,
       0.        , 1.        , 0.        , 0.19807748, 0.        ,
       1.        , 0.        , 0.20409408, 0.        , 1.        ,
       0.        , 0.2169693 , 1.        , 0.        , 0.        ,
       0.05263651, 0.        , 1.        , 0.        , 0.25670293,
       1.        , 0.        , 0.        , 0.04013925], dtype=float32), array([0., 0., 0.], dtype=float32)], reward=0.049999997, agent_id=0, action_mask=[array([False, False, False]), array([False, False, False]), array([False, False, False])], group_id=0, group_reward=0.0)
DecisionStep(obs=[array([0.        , 1.        , 0.        , 0.25037768, 0.        ,
       1.        , 0.        , 0.21513182, 0.        , 1.        ,
       0.        , 

DecisionStep(obs=[array([0.        , 1.        , 0.        , 0.23705527, 0.        ,
       1.        , 0.        , 0.20368469, 0.        , 1.        ,
       0.        , 0.25067285, 0.        , 1.        , 0.        ,
       0.18835923, 0.        , 1.        , 0.        , 0.21705943,
       0.        , 1.        , 0.        , 0.18542352, 0.        ,
       1.        , 0.        , 0.20197509, 0.        , 1.        ,
       0.        , 0.19392177, 1.        , 0.        , 0.        ,
       0.05727837, 0.        , 1.        , 0.        , 0.21673049,
       1.        , 0.        , 0.        , 0.04808125], dtype=float32), array([0., 0., 0.], dtype=float32)], reward=0.049999997, agent_id=0, action_mask=[array([False, False, False]), array([False, False, False]), array([False, False, False])], group_id=0, group_reward=0.0)
DecisionStep(obs=[array([0.        , 1.        , 0.        , 0.2635755 , 0.        ,
       1.        , 0.        , 0.22331738, 0.        , 1.        ,
       0.        , 

DecisionStep(obs=[array([0.        , 1.        , 0.        , 0.19562835, 0.        ,
       1.        , 0.        , 0.18595962, 0.        , 1.        ,
       0.        , 0.22015408, 0.        , 1.        , 0.        ,
       0.18783504, 0.        , 1.        , 0.        , 0.26605132,
       0.        , 1.        , 0.        , 0.20186324, 0.        ,
       1.        , 0.        , 0.22197019, 0.        , 1.        ,
       0.        , 0.23312327, 0.        , 1.        , 0.        ,
       0.20064601, 0.        , 1.        , 0.        , 0.2753106 ,
       1.        , 0.        , 0.        , 0.05790929], dtype=float32), array([0., 0., 0.], dtype=float32)], reward=0.049999997, agent_id=0, action_mask=[array([False, False, False]), array([False, False, False]), array([False, False, False])], group_id=0, group_reward=0.0)
DecisionStep(obs=[array([0.        , 1.        , 0.        , 0.19830373, 0.        ,
       1.        , 0.        , 0.18850276, 0.        , 1.        ,
       0.        , 

       0.        , 1.        , 0.        , 0.18997051], dtype=float32), array([0., 0., 0.], dtype=float32)], reward=0.049999997, agent_id=0, action_mask=[array([False, False, False]), array([False, False, False]), array([False, False, False])], group_id=0, group_reward=0.0)
DecisionStep(obs=[array([0.        , 1.        , 0.        , 0.24321055, 0.        ,
       1.        , 0.        , 0.2311905 , 0.        , 1.        ,
       0.        , 0.27370107, 0.        , 1.        , 0.        ,
       0.23352247, 0.        , 1.        , 0.        , 0.25852644,
       0.        , 1.        , 0.        , 0.25096315, 0.        ,
       1.        , 0.        , 0.2156916 , 0.        , 1.        ,
       0.        , 0.28982735, 0.        , 1.        , 0.        ,
       0.1949702 , 0.        , 1.        , 0.        , 0.28231886,
       0.        , 1.        , 0.        , 0.18807514], dtype=float32), array([0., 0., 0.], dtype=float32)], reward=0.049999997, agent_id=0, action_mask=[array([False, Fal

DecisionStep(obs=[array([0.        , 1.        , 0.        , 0.2732217 , 0.        ,
       1.        , 0.        , 0.2478152 , 0.        , 1.        ,
       0.        , 0.27891445, 0.        , 1.        , 0.        ,
       0.23976392, 0.        , 1.        , 0.        , 0.22623807,
       0.        , 1.        , 0.        , 0.24639674, 0.        ,
       1.        , 0.        , 0.20030755, 0.        , 1.        ,
       0.        , 0.2698975 , 0.        , 1.        , 0.        ,
       0.18982765, 0.        , 1.        , 0.        , 0.3193244 ,
       0.        , 1.        , 0.        , 0.19118297], dtype=float32), array([0., 0., 0.], dtype=float32)], reward=0.049999997, agent_id=0, action_mask=[array([False, False, False]), array([False, False, False]), array([False, False, False])], group_id=0, group_reward=0.0)
DecisionStep(obs=[array([0.        , 1.        , 0.        , 0.2752217 , 0.        ,
       1.        , 0.        , 0.24962924, 0.        , 1.        ,
       0.        , 

DecisionStep(obs=[array([0.        , 1.        , 0.        , 0.17564133, 0.        ,
       1.        , 0.        , 0.18208055, 0.        , 1.        ,
       0.        , 0.17996854, 0.        , 1.        , 0.        ,
       0.20143203, 0.        , 1.        , 0.        , 0.19648182,
       0.        , 1.        , 0.        , 0.24143499, 0.        ,
       1.        , 0.        , 0.23147239, 0.        , 1.        ,
       0.        , 0.26862848, 0.        , 1.        , 0.        ,
       0.22350822, 0.        , 1.        , 0.        , 0.23870312,
       0.        , 1.        , 0.        , 0.19574364], dtype=float32), array([0., 0., 0.], dtype=float32)], reward=0.049999997, agent_id=0, action_mask=[array([False, False, False]), array([False, False, False]), array([False, False, False])], group_id=0, group_reward=0.0)
DecisionStep(obs=[array([0.        , 1.        , 0.        , 0.18137069, 0.        ,
       1.        , 0.        , 0.19676186, 0.        , 1.        ,
       0.        , 

In [38]:
env.close()
print("Closed environment")

Closed environment
