## Watch the Trained Agent

### 1.Start the Environment for Trained Agent

In [1]:
from unityagents import UnityEnvironment
import numpy as np
import torch
from ppo_agent import Agent

env = UnityEnvironment(file_name='Crawler.exe')
agent_1024 = Agent(state_size=129, action_size=20, random_seed=8, n_agent=12, fc1_units=1024, fc2_units=1024)
agent_128 = Agent(state_size=129, action_size=20, random_seed=8, n_agent=12, fc1_units=128, fc2_units=128)

# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

INFO:unityagents:
'Academy' started successfully!
Unity Academy name: Academy
        Number of Brains: 1
        Number of External Brains : 1
        Lesson number : 0
        Reset Parameters :
		
Unity brain name: CrawlerBrain
        Number of Visual Observations (per agent): 0
        Vector Observation space type: continuous
        Vector Observation space size (per agent): 129
        Number of stacked Vector Observation: 1
        Vector Action space type: continuous
        Vector Action space size (per agent): 20
        Vector Action descriptions: , , , , , , , , , , , , , , , , , , , 


### 2. Prepare Player and Load Weights

In [2]:
def play(agent, episodes=5, max_t=1000):
    action_size=20
    n_agents=12
    t_max = 0
    for i_episode in range(episodes):
        env_info = env.reset(train_mode=False)[brain_name]     # reset the environment    
        states = env_info.vector_observations                  # get the current state (for each agent)
        agent_scores = np.zeros(n_agents)
        for t in range(max_t):    
            actions, log_probs, _, values = agent.act(states)
            env_info = env.step(actions)[brain_name]           # send all actions to tne environment
            next_states = env_info.vector_observations         # get next state (for each agent)
            rewards = env_info.rewards                         # get reward (for each agent)
            dones = np.array([1 if t else 0 for t in env_info.local_done])
            agent.save_step([states, values.detach(), actions, log_probs.detach(), rewards, 1 - dones])
            agent_scores += env_info.rewards                         # update the score (for each agent)
            states = next_states                               # roll over states to next time step
            t_max = t
            if np.any(dones):                                  # exit loop if episode finished
                break
        print('Episode: {}, Average Score (over agents): {:.2f}, Max Timestep: {} '\
              .format(i_episode, np.mean(agent_scores), t_max)) 
              

### 3. Play Before  Training, Trained with Max Step = 1000

In [3]:
play(agent=agent_1024, episodes=3, max_t=300)

Episode: 0, Average Score (over agents): 14.45, Max Timestep: 299 
Episode: 1, Average Score (over agents): 15.26, Max Timestep: 299 
Episode: 2, Average Score (over agents): 16.00, Max Timestep: 299 


### 4. Prepare Loader 

In [4]:
def load(agent, actor_file, critic_file):
    agent.actor_critic.actor.load_state_dict(torch.load(actor_file))
    agent.actor_critic.critic.load_state_dict(torch.load(critic_file))     

### 5. Load and Play:  Trained Weights with Max Step = 1000,  Score = 500, agent_1024

In [6]:
load(agent_1024, 'checkpoints/checkpoint_actor_676epis_500score.pth', 'checkpoints/checkpoint_critic_676epis_500score.pth') 
play(agent_1024, episodes=3, max_t=1000)

Episode: 0, Average Score (over agents): 22.43, Max Timestep: 54 
Episode: 1, Average Score (over agents): 105.05, Max Timestep: 188 
Episode: 2, Average Score (over agents): 19.97, Max Timestep: 61 


### 6. Load and Play: Trained Weights with Max Step = 1500, Score = 650, agent_1024

In [7]:
load(agent_1024, 'checkpoints/checkpoint_actor_550epis_650score.pth', 'checkpoints/checkpoint_critic_550epis_650score.pth')  
play(agent_1024, episodes=3, max_t=1500)

Episode: 0, Average Score (over agents): 427.84, Max Timestep: 867 
Episode: 1, Average Score (over agents): 196.80, Max Timestep: 399 
Episode: 2, Average Score (over agents): 441.65, Max Timestep: 901 


### 7. Load and Play: Trained Weights with Max Step = 2000, Score = 800, agent_128

In [9]:
load(agent_128, 'checkpoints/checkpoint_actor_532epis_800score.pth', 'checkpoints/checkpoint_critic_532epis_800score.pth')  
play(agent_128, episodes=3, max_t=2000)

Episode: 0, Average Score (over agents): 147.95, Max Timestep: 234 
Episode: 1, Average Score (over agents): 605.50, Max Timestep: 943 
Episode: 2, Average Score (over agents): 576.79, Max Timestep: 914 


### 8. Load and Play: Trained Weights with Max Step = 2000, Score = 1200, agent_128

In [10]:
load(agent_128, 'checkpoints/checkpoint_actor_678epis_1200score.pth', 'checkpoints/checkpoint_critic_678epis_1200score.pth')  
play(agent_128, episodes=3, max_t=2000)

Episode: 0, Average Score (over agents): 80.24, Max Timestep: 117 
Episode: 1, Average Score (over agents): 81.68, Max Timestep: 118 
Episode: 2, Average Score (over agents): 19.43, Max Timestep: 40 


In [11]:
env.close()