# Let's see how the trained agent works
This notebook shall be run after the agent is trained.

In [1]:
import sys
import numpy as np
import torch
import pickle

from time import time, strftime, gmtime

from unityagents import UnityEnvironment
from ddpg_agent import Agent

In [2]:
env = UnityEnvironment(file_name='./Reacher_Windows_x86_64_OneAgent/Reacher.exe')
brain_name = env.brain_names[0]
env_info = env.reset(train_mode=True)[brain_name]
brain = env.brains[brain_name]
action_size = brain.vector_action_space_size

states = env_info.vector_observations
state_size = states.shape[1]

INFO:unityagents:
'Academy' started successfully!
Unity Academy name: Academy
        Number of Brains: 1
        Number of External Brains : 1
        Lesson number : 0
        Reset Parameters :
		goal_speed -> 1.0
		goal_size -> 5.0
Unity brain name: ReacherBrain
        Number of Visual Observations (per agent): 0
        Vector Observation space type: continuous
        Vector Observation space size (per agent): 33
        Number of stacked Vector Observation: 1
        Vector Action space type: continuous
        Vector Action space size (per agent): 4
        Vector Action descriptions: , , , 


In [3]:
agent = Agent(state_size=state_size, action_size=action_size, random_seed=2)

In [4]:
def test_results(episodes = 100):
    test_scores = []
    for i in range(episodes):
        env_info = env.reset(train_mode=True)[brain_name]
        state = env_info.vector_observations    
        score = 0
        step = 0
        while True:
            action = agent.act(state, add_noise=False)
            env_info = env.step(action)[brain_name]        
            next_state = env_info.vector_observations[0]        
            reward = env_info.rewards[0]                       
            done = env_info.local_done[0] 
            score += reward                         
            state = next_state                               
            step += 1
            if done:    
                sys.stdout.write('Episode: {}, Score: {:.2f}         \r'.format(i+1, score))
                test_scores.append(score)
                break
    return test_scores

In [5]:
scores_dic = {}
t = time()
for i in range(100, 2001, 100):
    agent.actor_local.load_state_dict(torch.load('./models/checkpoint_actor_{}.pth'.format(i)))
    agent.critic_local.load_state_dict(torch.load('./models/checkpoint_critic_{}.pth'.format(i)))
    print ('Testing agent saved after {} episodes'.format(i))
    scores_dic[i] = test_results()
    print ('Mean score: {:.2f}. Computation time: ({})'.format(
        np.mean(scores_dic[i]), strftime('%H:%M:%S', gmtime(time() - t))          
    ))
    

Testing agent saved after 100 episodes
Mean score: 2.70. Computation time: (00:05:20)
Testing agent saved after 200 episodes
Mean score: 8.70. Computation time: (00:10:40)
Testing agent saved after 300 episodes
Mean score: 35.62. Computation time: (00:15:58)
Testing agent saved after 400 episodes
Mean score: 35.50. Computation time: (00:21:17)
Testing agent saved after 500 episodes
Mean score: 36.24. Computation time: (00:26:38)
Testing agent saved after 600 episodes
Mean score: 31.22. Computation time: (00:31:55)
Testing agent saved after 700 episodes
Mean score: 37.04. Computation time: (00:37:11)
Testing agent saved after 800 episodes
Mean score: 35.47. Computation time: (00:42:27)
Testing agent saved after 900 episodes
Mean score: 35.08. Computation time: (00:47:43)
Testing agent saved after 1000 episodes
Mean score: 33.64. Computation time: (00:52:59)
Testing agent saved after 1100 episodes
Mean score: 36.12. Computation time: (00:58:15)
Testing agent saved after 1200 episodes
Mea

In [6]:
pickle.dump(scores_dic, open('./test_scores.p', 'wb'))

In [None]:
env.close()