# Rolling a Ball

![](rollingaball1.png)

# Interaction test

> This comes from the getting started tutorial applied to the 3D Ball Agent

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import sys

from mlagents_envs.environment import UnityEnvironment
from mlagents_envs.side_channel.engine_configuration_channel import EngineConfig, EngineConfigurationChannel

%matplotlib inline

print("Python version:")
print(sys.version)

# check Python version
if (sys.version_info[0] < 3):
    raise Exception("ERROR: ML-Agents Toolkit (v0.3 onwards) requires Python 3")

Python version:
3.6.4 |Anaconda, Inc.| (default, Jan 16 2018, 10:22:32) [MSC v.1900 64 bit (AMD64)]


In [3]:
engine_configuration_channel = EngineConfigurationChannel()
env = UnityEnvironment(base_port = 5004)#, file_name=env_name, side_channels = [engine_configuration_channel])

INFO:mlagents_envs:Listening on port 5004. Start training by pressing the Play button in the Unity Editor.


In [4]:
#Reset the environment
env.reset()

# Set the default brain to work with
group_name = env.get_agent_groups()[0]
group_spec = env.get_agent_group_spec(group_name)

INFO:mlagents_envs:Connected new brain:
3DBall?team=0


In [7]:
# Get the state of the agents
step_result = env.get_step_result(group_name)

# Examine the number of observations per Agent
print("Number of observations : ", len(group_spec.observation_shapes))

# Examine the state space for the first observation for all agents
print("Agent state looks like: \n{}".format(step_result.obs[0]))

# Examine the state space for the first observation for the first agent
print("Agent state looks like: \n{}".format(step_result.obs[0][0]))

# Is there a visual observation ?
vis_obs = any([len(shape) == 3 for shape in group_spec.observation_shapes])
print("Is there a visual observation ?", vis_obs)

# Examine the visual observations
if vis_obs:
    vis_obs_index = next(i for i,v in enumerate(group_spec.observation_shapes) if len(v) == 3)
    print("Agent visual observation look like:")
    obs = step_result.obs[vis_obs_index]
    plt.imshow(obs[0,:,:,:])

Number of observations :  1
Agent state looks like: 
[[-1.4673042e-02 -1.4683060e-02 -5.2082062e-01  4.0000000e+00
  -7.9952097e-01  0.0000000e+00  0.0000000e+00  0.0000000e+00]
 [-2.6140258e-02  3.4010161e-02 -4.5768166e-01  4.0000000e+00
  -5.5027008e-03  0.0000000e+00  0.0000000e+00  0.0000000e+00]
 [ 6.3632242e-02  3.7996579e-02 -1.1360741e+00  4.0000000e+00
  -4.1505909e-01  0.0000000e+00  0.0000000e+00  0.0000000e+00]
 [-4.6871606e-02 -3.9161425e-02 -6.1104012e-01  4.0000000e+00
   5.6867313e-01  0.0000000e+00  0.0000000e+00  0.0000000e+00]
 [ 3.8746696e-02  7.7085062e-03  1.1423024e+00  4.0000000e+00
  -1.4589405e-01  0.0000000e+00  0.0000000e+00  0.0000000e+00]
 [ 4.8017994e-02 -7.4483551e-02 -5.7353783e-01  4.0000000e+00
  -3.8447380e-03  0.0000000e+00  0.0000000e+00  0.0000000e+00]
 [ 3.9585244e-02 -8.3357669e-02 -9.4123268e-01  4.0000000e+00
  -7.9583311e-01  0.0000000e+00  0.0000000e+00  0.0000000e+00]
 [ 8.0520153e-02 -2.9333552e-02  1.7612720e-01  4.0000000e+00
   5.68483

In [9]:
for episode in range(10):
    env.reset()
    step_result = env.get_step_result(group_name)
    done = False
    episode_rewards = 0
    while not done:
        action_size = group_spec.action_size
        if group_spec.is_action_continuous():
            action = np.random.randn(step_result.n_agents(), group_spec.action_size)
            
        if group_spec.is_action_discrete():
            branch_size = group_spec.discrete_action_branches
            action = np.column_stack([np.random.randint(0, branch_size[i], size=(step_result.n_agents())) for i in range(len(branch_size))])
        env.set_actions(group_name, action)
        env.step()
        step_result = env.get_step_result(group_name)
        episode_rewards += step_result.reward[0]
        done = step_result.done[0]
    print("Total reward this episode: {}".format(episode_rewards))

Total reward this episode: 1.1000000312924385
Total reward this episode: 0.6000000238418579
Total reward this episode: 0.6000000238418579
Total reward this episode: 2.300000049173832
Total reward this episode: 1.1000000312924385
Total reward this episode: 2.0000000447034836
Total reward this episode: 1.1000000312924385
Total reward this episode: 0.6000000238418579
Total reward this episode: 1.4901161193847656e-08
Total reward this episode: 1.2000000327825546


# Interaction test with custom environment

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import sys

from mlagents_envs.environment import UnityEnvironment
from mlagents_envs.side_channel.engine_configuration_channel import EngineConfig, EngineConfigurationChannel

engine_configuration_channel = EngineConfigurationChannel()
env = UnityEnvironment(base_port = 5004, side_channels = [engine_configuration_channel])

INFO:mlagents_envs:Listening on port 5004. Start training by pressing the Play button in the Unity Editor.
