# Watch a trained Agent

In [1]:
from unityagents import UnityEnvironment
import numpy as np
from collections import deque
import torch
import matplotlib.pyplot as plt

In [2]:
from agent import Agent

agent = Agent(state_size=33, action_size=4, seed=0, hidden_layers=[256,128], drop_p=0, \
              use_bn=True, use_reset=True, noise="OU", mode="multi")

In [3]:
agent.actor_local

Actor(
  (batchnorm_input): BatchNorm1d(33, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batchnorm_layers): ModuleList(
    (0): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (hidden_layers): ModuleList(
    (0): Linear(in_features=33, out_features=256, bias=False)
    (1): Linear(in_features=256, out_features=128, bias=False)
  )
  (output): Linear(in_features=128, out_features=4, bias=True)
  (dropout): Dropout(p=0)
)

In [4]:
agent.critic_local

Critic(
  (bn0): BatchNorm1d(33, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fcs1): Linear(in_features=33, out_features=256, bias=False)
  (fcs2): Linear(in_features=260, out_features=128, bias=True)
  (fcs3): Linear(in_features=128, out_features=1, bias=True)
)

In [5]:
# load the weights from file

agent.actor_local.load_state_dict(torch.load('weights/DDPG_actor.pth'))
agent.critic_local.load_state_dict(torch.load('weights/DDPG_critic.pth'))

In [6]:
env = UnityEnvironment(file_name="Reacher_Windows_x86_64_Multi_Agent/Reacher.exe")

INFO:unityagents:
'Academy' started successfully!
Unity Academy name: Academy
        Number of Brains: 1
        Number of External Brains : 1
        Lesson number : 0
        Reset Parameters :
		goal_speed -> 1.0
		goal_size -> 5.0
Unity brain name: ReacherBrain
        Number of Visual Observations (per agent): 0
        Vector Observation space type: continuous
        Vector Observation space size (per agent): 33
        Number of stacked Vector Observation: 1
        Vector Action space type: continuous
        Vector Action space size (per agent): 4
        Vector Action descriptions: , , , 


In [7]:
# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

In [8]:
# Run this cell several times to change the initial state and watch the agent.

env_info = env.reset(train_mode=False)[brain_name]      # reset the environment    
num_agents = len(env_info.agents)
print('Number of agents:', num_agents)
states = env_info.vector_observations                   # get the current state (for each agent)
scores = np.zeros(num_agents)                           # initialize the score (for each agent)

step = 0

Number of agents: 20


In [10]:

while True:
    actions = agent.act(states)                     # select an action (for each agent)
    env_info = env.step(actions)[brain_name]        # send all actions to tne environment
    next_states = env_info.vector_observations      # get next state (for each agent)
    rewards = env_info.rewards                      # get reward (for each agent)
    dones = env_info.local_done                     # see if episode finished
    states = next_states                            # roll over states to next time step
    scores += rewards                               # update the score (for each agent)
    step += 1
    if np.any(dones):                               # exit loop if episode finished
        print("time step :", step-1)
        break
    
print("Score: {}".format(np.mean(scores)))

time step : 1000
Score: 38.227999145537616


In [11]:
env.close()

## Random Agents

In [9]:
env_info = env.reset(train_mode=False)[brain_name]     # reset the environment 
num_agents = len(env_info.agents)
states = env_info.vector_observations                  # get the current state (for each agent)
scores = np.zeros(num_agents)                          # initialize the score (for each agent)

while True:
    actions = np.random.randn(num_agents, 4)           # select an action (for each agent)
    actions = np.clip(actions, -1, 1)                  # all actions between -1 and 1
    env_info = env.step(actions)[brain_name]           # send all actions to tne environment
    next_states = env_info.vector_observations         # get next state (for each agent)
    rewards = env_info.rewards                         # get reward (for each agent)
    dones = env_info.local_done                        # see if episode finished
    scores += env_info.rewards                         # update the score (for each agent)
    states = next_states                               # roll over states to next time step
    if np.any(dones):                                  # exit loop if episode finished
        break
print('Total score (averaged over agents) this episode: {}'.format(np.mean(scores)))

Total score (averaged over agents) this episode: 0.16899999622255563


In [10]:
env.close()