# Continuous Control - Test

-----

Run the following code to test trained agents

In [1]:
from unityagents import UnityEnvironment
from collections import deque
from ddpg_agent import Agent
import numpy as np
import torch
import matplotlib.pyplot as plt
%matplotlib inline

env = UnityEnvironment(file_name='D:/Projects/drl_tennis/Tennis_Windows_x86_64/Tennis.exe')

# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

# reset the environment
env_info = env.reset(train_mode=True)[brain_name]

# number of agents
num_agents = len(env_info.agents)
print('Number of agents:', num_agents)

# size of each action
action_size = brain.vector_action_space_size
print('Size of each action:', action_size)

# examine the state space 
states = env_info.vector_observations
state_size = states.shape[1]
print('There are {} agents. Each observes a state with length: {}'.format(states.shape[0], state_size))
print('The state for the first agent looks like:', states[0])

agent = Agent(state_size, action_size, num_agents, random_seed=np.random.randint(19920320))

agent.actor_local.load_state_dict(torch.load('checkpoint_actor.pth'))
agent.critic_local.load_state_dict(torch.load('checkpoint_critic.pth'))


INFO:unityagents:
'Academy' started successfully!
Unity Academy name: Academy
        Number of Brains: 1
        Number of External Brains : 1
        Lesson number : 0
        Reset Parameters :
		
Unity brain name: TennisBrain
        Number of Visual Observations (per agent): 0
        Vector Observation space type: continuous
        Vector Observation space size (per agent): 8
        Number of stacked Vector Observation: 3
        Vector Action space type: continuous
        Vector Action space size (per agent): 2
        Vector Action descriptions: , 


Number of agents: 2
Size of each action: 2
There are 2 agents. Each observes a state with length: 24
The state for the first agent looks like: [ 0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.         -6.65278625 -1.5
 -0.          0.          6.83172083  6.         -0.          0.        ]


IncompatibleKeys(missing_keys=[], unexpected_keys=[])

In [2]:
env_info = env.reset(train_mode=False)[brain_name]
states = env_info.vector_observations
agent.reset()
score = np.zeros(num_agents)
for t in range(2000):
    actions = agent.act(states)
    env_info = env.step(actions)[brain_name]
    next_states = env_info.vector_observations
    rewards = env_info.rewards
    dones = env_info.local_done
    states = next_states
    score += rewards
    print("Score:", np.average(score))
    if any(dones):
        break


Score: 0.0
Score: 0.0
Score: 0.0
Score: 0.0
Score: 0.0
Score: 0.0
Score: 0.0
Score: 0.0
Score: 0.0
Score: 0.0
Score: 0.0
Score: 0.0
Score: 0.0
Score: 0.0
Score: 0.0
Score: 0.0
Score: 0.0
Score: 0.0
Score: 0.05000000074505806
Score: 0.05000000074505806
Score: 0.05000000074505806
Score: 0.05000000074505806
Score: 0.05000000074505806
Score: 0.05000000074505806
Score: 0.05000000074505806
Score: 0.05000000074505806
Score: 0.05000000074505806
Score: 0.05000000074505806
Score: 0.05000000074505806
Score: 0.05000000074505806
Score: 0.05000000074505806
Score: 0.05000000074505806
Score: 0.05000000074505806
Score: 0.05000000074505806
Score: 0.05000000074505806
Score: 0.05000000074505806
Score: 0.05000000074505806
Score: 0.05000000074505806
Score: 0.05000000074505806
Score: 0.10000000149011612
Score: 0.10000000149011612
Score: 0.10000000149011612
Score: 0.10000000149011612
Score: 0.10000000149011612
Score: 0.10000000149011612
Score: 0.10000000149011612
Score: 0.10000000149011612
Score: 0.1000000014

Score: 0.850000012665987
Score: 0.850000012665987
Score: 0.850000012665987
Score: 0.850000012665987
Score: 0.850000012665987
Score: 0.850000012665987
Score: 0.850000012665987
Score: 0.850000012665987
Score: 0.850000012665987
Score: 0.850000012665987
Score: 0.850000012665987
Score: 0.850000012665987
Score: 0.850000012665987
Score: 0.850000012665987
Score: 0.850000012665987
Score: 0.850000012665987
Score: 0.850000012665987
Score: 0.850000012665987
Score: 0.850000012665987
Score: 0.9000000134110451
Score: 0.9000000134110451
Score: 0.9000000134110451
Score: 0.9000000134110451
Score: 0.9000000134110451
Score: 0.9000000134110451
Score: 0.9000000134110451
Score: 0.9000000134110451
Score: 0.9000000134110451
Score: 0.9000000134110451
Score: 0.9000000134110451
Score: 0.9000000134110451
Score: 0.9000000134110451
Score: 0.9000000134110451
Score: 0.9000000134110451
Score: 0.9000000134110451
Score: 0.9000000134110451
Score: 0.9000000134110451
Score: 0.9000000134110451
Score: 0.9000000134110451
Score

Score: 1.650000024586916
Score: 1.650000024586916
Score: 1.650000024586916
Score: 1.650000024586916
Score: 1.700000025331974
Score: 1.700000025331974
Score: 1.700000025331974
Score: 1.700000025331974
Score: 1.700000025331974
Score: 1.700000025331974
Score: 1.700000025331974
Score: 1.700000025331974
Score: 1.700000025331974
Score: 1.700000025331974
Score: 1.700000025331974
Score: 1.700000025331974
Score: 1.700000025331974
Score: 1.700000025331974
Score: 1.700000025331974
Score: 1.700000025331974
Score: 1.700000025331974
Score: 1.700000025331974
Score: 1.700000025331974
Score: 1.750000026077032
Score: 1.750000026077032
Score: 1.750000026077032
Score: 1.750000026077032
Score: 1.750000026077032
Score: 1.750000026077032
Score: 1.750000026077032
Score: 1.750000026077032
Score: 1.750000026077032
Score: 1.750000026077032
Score: 1.750000026077032
Score: 1.750000026077032
Score: 1.750000026077032
Score: 1.750000026077032
Score: 1.750000026077032
Score: 1.750000026077032
Score: 1.750000026077032


In [3]:
env.close()