## Environment Settings

In [1]:
try:
  import mlagents
  print("ml-agents already installed")
except ImportError:
  !python -m pip install -q mlagents==0.27.0
  print("Installed ml-agents")

ml-agents already installed


### Unity환경을 build한 파일 위치(.exe가 포함된)설정

In [2]:
env_id = "E:/RL_Algorithm/RollerBall/ML_test/ML_test"

In [3]:
# -----------------
# This code is used to close an env that might not have been closed before
try:
  env.close()
except:
  pass
# -----------------

from mlagents_envs.environment import UnityEnvironment

env = UnityEnvironment(file_name=env_id, seed=1, side_channels=[])

### Environment Reset

In [4]:
env.reset()

In [5]:
# We will only consider the first Behavior
behavior_name = list(env.behavior_specs)[0]
print(f"Name of the behavior : {behavior_name}")
spec = env.behavior_specs[behavior_name]

Name of the behavior : RollerBall?team=0


In [6]:
# Examine the number of observations per Agent
print("Number of observations : ", len(spec.observation_specs))

# Is there a visual observation ?
# Visual observation have 3 dimensions: Height, Width and number of channels
vis_obs = any(len(spec.shape) == 3 for spec in spec.observation_specs)
print("Is there a visual observation ?", vis_obs)

Number of observations :  1
Is there a visual observation ? False


In [7]:
# Is the Action continuous or multi-discrete ?
if spec.action_spec.continuous_size > 0:
  print(f"There are {spec.action_spec.continuous_size} continuous actions")
if spec.action_spec.is_discrete():
  print(f"There are {spec.action_spec.discrete_size} discrete actions")


# How many actions are possible ?
#print(f"There are {spec.action_size} action(s)")

# For discrete actions only : How many different options does each action has ?
if spec.action_spec.discrete_size > 0:
  for action, branch_size in enumerate(spec.action_spec.discrete_branches):
    print(f"Action number {action} has {branch_size} different options")

There are 2 continuous actions
Action number 0 has 1 different options


In [8]:
decision_steps, terminal_steps = env.get_steps(behavior_name)

In [9]:
env.set_actions(behavior_name, spec.action_spec.empty_action(len(decision_steps)))

In [None]:
env.step()

In [10]:
import matplotlib.pyplot as plt
%matplotlib inline

for index, obs_spec in enumerate(spec.observation_specs):
  if len(obs_spec.shape) == 3:
    print("Here is the first visual observation")
    plt.imshow(decision_steps.obs[index][0,:,:,:])
    plt.show()

for index, obs_spec in enumerate(spec.observation_specs):
  if len(obs_spec.shape) == 1:
    print("First vector observations : ", decision_steps.obs[index][0,:])

First vector observations :  [3.9974775 0.5       2.1941023 0.        0.5       0.        0.
 0.       ]


## Run the Environment for a few episodes

In [11]:
for episode in range(100):
  env.reset()
  decision_steps, terminal_steps = env.get_steps(behavior_name)
  tracked_agent = -1 # -1 indicates not yet tracking
  done = False # For the tracked_agent
  episode_rewards = 0 # For the tracked_agent
  while not done:
    # Track the first agent we see if not tracking
    # Note : len(decision_steps) = [number of agents that requested a decision]
    if tracked_agent == -1 and len(decision_steps) >= 1:
      tracked_agent = decision_steps.agent_id[0]

    # Generate an action for all agents
    action = spec.action_spec.random_action(len(decision_steps))

    # Set the actions
    env.set_actions(behavior_name, action)

    # Move the simulation forward
    env.step()

    # Get the new simulation results
    decision_steps, terminal_steps = env.get_steps(behavior_name)
    if tracked_agent in decision_steps: # The agent requested a decision
      episode_rewards += decision_steps[tracked_agent].reward
    if tracked_agent in terminal_steps: # The agent terminated its episode
      episode_rewards += terminal_steps[tracked_agent].reward
      done = True
  print(f"Total rewards for episode {episode} is {episode_rewards}")


Total rewards for episode 0 is 1.0
Total rewards for episode 1 is 1.0
Total rewards for episode 2 is 0.0
Total rewards for episode 3 is 0.0
Total rewards for episode 4 is 0.0
Total rewards for episode 5 is 0.0
Total rewards for episode 6 is 0.0
Total rewards for episode 7 is 0.0
Total rewards for episode 8 is 1.0
Total rewards for episode 9 is 0.0
Total rewards for episode 10 is 1.0
Total rewards for episode 11 is 0.0
Total rewards for episode 12 is 0.0
Total rewards for episode 13 is 1.0
Total rewards for episode 14 is 0.0
Total rewards for episode 15 is 1.0
Total rewards for episode 16 is 0.0
Total rewards for episode 17 is 0.0
Total rewards for episode 18 is 1.0
Total rewards for episode 19 is 0.0
Total rewards for episode 20 is 0.0
Total rewards for episode 21 is 0.0
Total rewards for episode 22 is 0.0
Total rewards for episode 23 is 0.0
Total rewards for episode 24 is 0.0
Total rewards for episode 25 is 0.0
Total rewards for episode 26 is 0.0
Total rewards for episode 27 is 0.0
To

## Close the Environment to free the port it is using

In [12]:
env.close()
print("Closed environment")

Closed environment
