## Watch the Trained Agent

### 1.Start the Environment for Trained Agent

In [1]:
from unityagents import UnityEnvironment
import numpy as np
import torch
from ddpg_agent import Agent

env = UnityEnvironment(file_name='Reacher_Windows_x86_64/Reacher.exe')
agent = Agent(33, 4, 8)

# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

INFO:unityagents:
'Academy' started successfully!
Unity Academy name: Academy
        Number of Brains: 1
        Number of External Brains : 1
        Lesson number : 0
        Reset Parameters :
		goal_speed -> 1.0
		goal_size -> 5.0
Unity brain name: ReacherBrain
        Number of Visual Observations (per agent): 0
        Vector Observation space type: continuous
        Vector Observation space size (per agent): 33
        Number of stacked Vector Observation: 1
        Vector Action space type: continuous
        Vector Action space size (per agent): 4
        Vector Action descriptions: , , , 


### 2. Prepare Player

In [2]:
def play(agent, episodes=5):
    action_size=4
    num_agents=20
    for i_episode in range(episodes):
        env_info = env.reset(train_mode=False)[brain_name]     # reset the environment    
        states = env_info.vector_observations                  # get the current state (for each agent)
        scores = np.zeros(num_agents)                          # initialize the score (for each agent)
        while True:
            actions = np.random.randn(num_agents, action_size) # select an action (for each agent)
            actions = agent.act(states, add_noise=False)       # all actions between -1 and 1
            env_info = env.step(actions)[brain_name]           # send all actions to tne environment
            next_states = env_info.vector_observations         # get next state (for each agent)
            rewards = env_info.rewards                         # get reward (for each agent)
            dones = env_info.local_done                        # see if episode finished
            scores += env_info.rewards                         # update the score (for each agent)
            states = next_states                               # roll over states to next time step
            if np.any(dones):                                  # exit loop if episode finished
                break
            #break
        print('Episode: {} Average Score (over agents): {}'.format(i_episode, np.mean(scores)))

### 3. Play Before  Training

In [3]:
play(agent, episodes=2)

Episode: 0 Average Score (over agents): 0.0
Episode: 1 Average Score (over agents): 0.0


### 4. Load Trained Weights

In [4]:
def load(agent, actor_file, critic_file):
    agent.actor_local.load_state_dict(torch.load(actor_file))
    agent.actor_target.load_state_dict(torch.load(actor_file))
    agent.critic_local.load_state_dict(torch.load(critic_file))
    agent.critic_target.load_state_dict(torch.load(critic_file))
    
load(agent, 'checkpoint_actor.pth', 'checkpoint_critic.pth')   

### 5. Play After Training

In [5]:
play(agent, episodes=3)

Episode: 0 Average Score (over agents): 38.78149913316592
Episode: 1 Average Score (over agents): 38.71349913468585
Episode: 2 Average Score (over agents): 38.77949913321063


In [6]:
env.close()