In [39]:
import platform
print(platform.python_version())


3.8.5


In [40]:
try:
    import mlagents
    from mlagents_envs.environment import UnityEnvironment as UE
    print("ml-agents already installed")
except ImportError:
    !pip install -q mlagents==0.26.0
    print("Installed ml-agents")

ml-agents already installed


In [41]:
# -----------------
# This code is used to close an env that might not have been closed before
try:
    env.close()
except:
    pass
# -----------------

In [42]:
env = UE(file_name='run27_linux', seed=1, side_channels=[])

In [43]:
env.reset()

In [44]:
behavior_name = list(env.behavior_specs)[0]
print(f"Name of the behavior : {behavior_name}")
spec = env.behavior_specs[behavior_name]

Name of the behavior : Hummingbird?team=0


In [45]:
spec

BehaviorSpec(observation_specs=[ObservationSpec(shape=(4,), dimension_property=(<DimensionProperty.NONE: 1>,), observation_type=<ObservationType.DEFAULT: 0>, name='RayDown'), ObservationSpec(shape=(52,), dimension_property=(<DimensionProperty.NONE: 1>,), observation_type=<ObservationType.DEFAULT: 0>, name='RayPerceptionSensor'), ObservationSpec(shape=(28,), dimension_property=(<DimensionProperty.NONE: 1>,), observation_type=<ObservationType.DEFAULT: 0>, name='RaysForward'), ObservationSpec(shape=(4,), dimension_property=(<DimensionProperty.NONE: 1>,), observation_type=<ObservationType.DEFAULT: 0>, name='RayUp'), ObservationSpec(shape=(3,), dimension_property=(<DimensionProperty.NONE: 1>,), observation_type=<ObservationType.DEFAULT: 0>, name='StackingSensor_size3_VectorSensor_size1')], action_spec=ActionSpec(continuous_size=0, discrete_branches=(3, 3, 3)))

In [46]:
print("Number of observations : ", len(spec.observation_specs))

Number of observations :  5


In [47]:
# Is there a visual observation ?
# Visual observation have 3 dimensions: Height, Width and number of channels
vis_obs = any(len(spec.shape) == 3 for spec in spec.observation_specs)
print("Is there a visual observation ?", vis_obs)

Is there a visual observation ? False


In [48]:
# Is the Action continuous or multi-discrete ?
if spec.action_spec.continuous_size > 0:
    print(f"There are {spec.action_spec.continuous_size} continuous actions")
if spec.action_spec.is_discrete():
    print(f"There are {spec.action_spec.discrete_size} discrete actions")


# How many actions are possible ?
#print(f"There are {spec.action_size} action(s)")

# For discrete actions only : How many different options does each action has ?
if spec.action_spec.discrete_size > 0:
    for action, branch_size in enumerate(spec.action_spec.discrete_branches):
        print(f"Action number {action} has {branch_size} different options")
    


There are 3 discrete actions
Action number 0 has 3 different options
Action number 1 has 3 different options
Action number 2 has 3 different options


In [49]:
decision_steps, terminal_steps = env.get_steps(behavior_name)

In [50]:
env.set_actions(behavior_name, spec.action_spec.empty_action(len(decision_steps)))

In [71]:
env.step()

In [52]:
import matplotlib.pyplot as plt
%matplotlib inline

for index, obs_spec in enumerate(spec.observation_specs):
    if len(obs_spec.shape) == 3:
        print("Here is the first visual observation")
        plt.imshow(decision_steps.obs[index][0,:,:,:])
        plt.show()

for index, obs_spec in enumerate(spec.observation_specs):
    if len(obs_spec.shape) == 1:
        print("First vector observations : ", decision_steps.obs[index][0,:])

First vector observations :  [0.         0.         0.         0.03944738]
First vector observations :  [0.         1.         0.         0.20423949 0.         1.
 0.         0.20863327 0.         1.         0.         0.20846266
 0.         1.         0.         0.222621   0.         1.
 0.         0.22224079 0.         1.         0.         0.24967669
 0.         1.         0.         0.24898499 0.         1.
 0.         0.2802666  0.         1.         0.         0.28131625
 0.         1.         0.         0.23967367 0.         1.
 0.         0.24026023 0.         1.         0.         0.2171906
 0.         1.         0.         0.21750405]
First vector observations :  [0.         0.         0.         0.10212053 0.         0.
 0.         0.10875282 0.         0.         0.         0.10859609
 0.         0.         0.         0.13353087 0.         0.
 0.         0.13308765 0.         0.         0.         0.11893507
 0.         0.         0.         0.11921204]
First vector observa

In [63]:
len(decision_steps.obs)

5

In [72]:
decision_steps.obs

[array([[0.        , 0.        , 0.        , 0.03944738]], dtype=float32),
 array([[0.        , 1.        , 0.        , 0.20423949, 0.        ,
         1.        , 0.        , 0.20863327, 0.        , 1.        ,
         0.        , 0.20846266, 0.        , 1.        , 0.        ,
         0.222621  , 0.        , 1.        , 0.        , 0.22224079,
         0.        , 1.        , 0.        , 0.24967669, 0.        ,
         1.        , 0.        , 0.24898499, 0.        , 1.        ,
         0.        , 0.2802666 , 0.        , 1.        , 0.        ,
         0.28131625, 0.        , 1.        , 0.        , 0.23967367,
         0.        , 1.        , 0.        , 0.24026023, 0.        ,
         1.        , 0.        , 0.2171906 , 0.        , 1.        ,
         0.        , 0.21750405]], dtype=float32),
 array([[0.        , 0.        , 0.        , 0.10212053, 0.        ,
         0.        , 0.        , 0.10875282, 0.        , 0.        ,
         0.        , 0.10859609, 0.        , 0

In [73]:
for obs in decision_steps.obs:
    print(len(obs[0]))

4
52
28
4
3


In [60]:
action = spec.action_spec.random_action(len(decision_steps))
action.discrete

array([[1, 0, 0]], dtype=int32)

In [37]:
for episode in range(1):
    env.reset()
    decision_steps, terminal_steps = env.get_steps(behavior_name)
    tracked_agent = -1 # -1 indicates not yet tracking
    done = False # For the tracked_agent
    episode_rewards = 0 # For the tracked_agent
    while not done:
        # Track the first agent we see if not tracking 
        # Note : len(decision_steps) = [number of agents that requested a decision]
        if tracked_agent == -1 and len(decision_steps) >= 1:
            tracked_agent = decision_steps.agent_id[0] 

        # Generate an action for all agents
        action = spec.action_spec.random_action(len(decision_steps))

        # Set the actions
        env.set_actions(behavior_name, action)

        # Move the simulation forward
        env.step()

        # Get the new simulation results
        decision_steps, terminal_steps = env.get_steps(behavior_name)
        if tracked_agent in decision_steps: # The agent requested a decision
            episode_rewards += decision_steps[tracked_agent].reward
        if tracked_agent in terminal_steps: # The agent terminated its episode
            episode_rewards += terminal_steps[tracked_agent].reward
            done = True
    print(f"Total rewards for episode {episode} is {episode_rewards}")

Total rewards for episode 0 is 66.9999960064888


In [38]:
env.close()
print("Closed environment")

Closed environment
