## Show the agent perform

In [1]:
import numpy as np
import torch
from collections import deque
from maddpg import MADDPG
from unityagents import UnityEnvironment

# Load environment
env = UnityEnvironment(file_name="Tennis_Windows_x86_64/Tennis.exe")

# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

# reset the environment
env_info = env.reset(train_mode=True)[brain_name]

# number of agents
num_agents = len(env_info.agents)
print('Number of agents:', num_agents)

# number of actions
action_size = brain.vector_action_space_size
print('Number of actions:', action_size)

# examine the state space 
state = env_info.vector_observations[0]
state_size = len(state)
print('States have length:', state_size)

INFO:unityagents:
'Academy' started successfully!
Unity Academy name: Academy
        Number of Brains: 1
        Number of External Brains : 1
        Lesson number : 0
        Reset Parameters :
		
Unity brain name: TennisBrain
        Number of Visual Observations (per agent): 0
        Vector Observation space type: continuous
        Vector Observation space size (per agent): 8
        Number of stacked Vector Observation: 3
        Vector Action space type: continuous
        Vector Action space size (per agent): 2
        Vector Action descriptions: , 


Number of agents: 2
Number of actions: 2
States have length: 24


### Load trained Agent

In [2]:
# load weights from file
agent = MADDPG(state_size=state_size, action_size=action_size, n_agents=2, random_seed=0)
agent.actor_local.load_state_dict(torch.load('checkpoint_actor.pth', map_location=lambda storage, loc: storage))

In [3]:
# Play the trained Agent
env_info = env.reset(train_mode=False)[brain_name] # reset the environment
states = env_info.vector_observations              # get the current state
scores = np.zeros(num_agents)                      # initialize the score

#while True:
for _ in range(5000):
    actions = agent.act(states)                     # select an action     
    env_info = env.step(actions)[brain_name]        # send the action to the environment                  
    next_states = env_info.vector_observations      # get the next state 
    rewards = env_info.rewards                      # get the reward
    dones = env_info.local_done                     # see if episode has finished
    
    scores += rewards                               # update the score
    states = next_states                            # roll over the state to next time step
    if np.any(dones):
        break 
    
print("Final Score: {}".format(scores))

env.close()

Final Score: [2.60000004 2.60000004]
