# Continuous Control - Test

-----

Run the following code to test trained agents

In [1]:
from unityagents import UnityEnvironment
from collections import deque
from ddpg_agent import Agent
import numpy as np
import torch
import matplotlib.pyplot as plt
%matplotlib inline

env = UnityEnvironment(file_name='D:/Projects/deep-reinforcement-learning/p2_continuous-control/Reacher_multi_agent_Windows_x86_64/Reacher.exe')

# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

# reset the environment
env_info = env.reset(train_mode=True)[brain_name]

# number of agents
num_agents = len(env_info.agents)
print('Number of agents:', num_agents)

# size of each action
action_size = brain.vector_action_space_size
print('Size of each action:', action_size)

# examine the state space 
states = env_info.vector_observations
state_size = states.shape[1]
print('There are {} agents. Each observes a state with length: {}'.format(states.shape[0], state_size))
print('The state for the first agent looks like:', states[0])

agent = Agent(state_size, action_size, num_agents, random_seed=np.random.randint(19920320))

agent.actor_local.load_state_dict(torch.load('checkpoint_actor.pth'))
agent.critic_local.load_state_dict(torch.load('checkpoint_critic.pth'))


INFO:unityagents:
'Academy' started successfully!
Unity Academy name: Academy
        Number of Brains: 1
        Number of External Brains : 1
        Lesson number : 0
        Reset Parameters :
		goal_speed -> 1.0
		goal_size -> 5.0
Unity brain name: ReacherBrain
        Number of Visual Observations (per agent): 0
        Vector Observation space type: continuous
        Vector Observation space size (per agent): 33
        Number of stacked Vector Observation: 1
        Vector Action space type: continuous
        Vector Action space size (per agent): 4
        Vector Action descriptions: , , , 


Number of agents: 20
Size of each action: 4
There are 20 agents. Each observes a state with length: 33
The state for the first agent looks like: [ 0.00000000e+00 -4.00000000e+00  0.00000000e+00  1.00000000e+00
 -0.00000000e+00 -0.00000000e+00 -4.37113883e-08  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00 -1.00000000e+01  0.00000000e+00
  1.00000000e+00 -0.00000000e+00 -0.00000000e+00 -4.37113883e-08
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  5.75471878e+00 -1.00000000e+00
  5.55726624e+00  0.00000000e+00  1.00000000e+00  0.00000000e+00
 -1.68164849e-01]


IncompatibleKeys(missing_keys=[], unexpected_keys=[])

In [2]:
env_info = env.reset(train_mode=False)[brain_name]
states = env_info.vector_observations
agent.reset()
score = np.zeros(num_agents)
for t in range(2000):
    actions = agent.act(states)
    env_info = env.step(actions)[brain_name]
    next_states = env_info.vector_observations
    rewards = env_info.rewards
    dones = env_info.local_done
    states = next_states
    score += rewards
    print("Score:", np.average(score))
    if any(dones):
        break


Score: 0.0
Score: 0.0
Score: 0.0
Score: 0.0
Score: 0.0
Score: 0.0
Score: 0.0
Score: 0.0
Score: 0.004499999899417162
Score: 0.013499999698251487
Score: 0.023999999463558196
Score: 0.035499999206513165
Score: 0.050999998860061166
Score: 0.06749999849125743
Score: 0.0874999980442226
Score: 0.10899999756366015
Score: 0.135499996971339
Score: 0.16549999630078674
Score: 0.19699999559670686
Score: 0.22899999488145112
Score: 0.25899999421089887
Score: 0.290499993506819
Score: 0.3244999927468598
Score: 0.3584999919869006
Score: 0.3904999912716448
Score: 0.4279999904334545
Score: 0.46499998960644007
Score: 0.5029999887570739
Score: 0.5409999879077076
Score: 0.5774999870918691
Score: 0.6134999862872064
Score: 0.6494999854825437
Score: 0.685499984677881
Score: 0.7214999838732183
Score: 0.7594999830238521
Score: 0.7974999821744859
Score: 0.8354999813251197
Score: 0.8749999804422259
Score: 0.9149999795481563
Score: 0.9549999786540866
Score: 0.9949999777600169
Score: 1.0349999768659472
Score: 1.07499

Score: 11.696499738562853
Score: 11.736499737668783
Score: 11.776499736774714
Score: 11.816499735880644
Score: 11.853499735053628
Score: 11.889499734248966
Score: 11.925499733444303
Score: 11.961499732639641
Score: 11.997499731834978
Score: 12.033499731030314
Score: 12.070999730192124
Score: 12.10899972934276
Score: 12.146999728493393
Score: 12.184999727644026
Score: 12.223999726772309
Score: 12.26399972587824
Score: 12.30399972498417
Score: 12.3439997240901
Score: 12.38399972319603
Score: 12.42399972230196
Score: 12.463999721407891
Score: 12.502999720536172
Score: 12.540999719686805
Score: 12.57899971883744
Score: 12.616999717988074
Score: 12.654999717138708
Score: 12.692999716289341
Score: 12.729999715462327
Score: 12.765999714657664
Score: 12.80299971383065
Score: 12.838999713025988
Score: 12.874999712221324
Score: 12.910999711416661
Score: 12.946999710612
Score: 12.982999709807336
Score: 13.019499708991498
Score: 13.057499708142132
Score: 13.096499707270414
Score: 13.13449970642104

Score: 23.643999471515418
Score: 23.683499470632523
Score: 23.721499469783158
Score: 23.75949946893379
Score: 23.797499468084425
Score: 23.832499467302114
Score: 23.866499466542155
Score: 23.898999465815724
Score: 23.930999465100466
Score: 23.964999464340508
Score: 23.99899946358055
Score: 24.03299946282059
Score: 24.06699946206063
Score: 24.10099946130067
Score: 24.134999460540712
Score: 24.168999459780753
Score: 24.202999459020795
Score: 24.236999458260833
Score: 24.272499457467347
Score: 24.308499456662684
Score: 24.34449945585802
Score: 24.38049945505336
Score: 24.416499454248697
Score: 24.452499453444034
Score: 24.48849945263937
Score: 24.524499451834707
Score: 24.560499451030047
Score: 24.596499450225384
Score: 24.632999449409546
Score: 24.670999448560178
Score: 24.708999447710813
Score: 24.746999446861444
Score: 24.78499944601208
Score: 24.822999445162715
Score: 24.860999444313347
Score: 24.898999443463982
Score: 24.936999442614614
Score: 24.97499944176525
Score: 25.012999440915

Score: 35.65949920294806
Score: 35.6954992021434
Score: 35.731499201338735
Score: 35.765999200567606
Score: 35.80149919977411
Score: 35.83549919901416
Score: 35.87249919818714
Score: 35.91049919733778
Score: 35.94499919656664
Score: 35.980499195773156
Score: 36.01649919496849
Score: 36.05249919416383
Score: 36.08849919335917
Score: 36.1244991925545
Score: 36.16049919174984
Score: 36.196499190945175
Score: 36.23299919012934
Score: 36.2694991893135
Score: 36.30549918850884
Score: 36.341999187693
Score: 36.3814991868101
Score: 36.42049918593839
Score: 36.46049918504432
Score: 36.50049918415025
Score: 36.54049918325618
Score: 36.58049918236211
Score: 36.618499181512746
Score: 36.656499180663374
Score: 36.69449917981401
Score: 36.732499178964645
Score: 36.77049917811528


In [3]:
env.close()