# Demostration

## Installation

In [1]:
!pip -q install ./python

## Import functionality

In [2]:
from unityagents import UnityEnvironment
import numpy as np
import time

# Invite our agent & import utils
from ddpg_agent import Agent
#from random import random as rnd
import torch

## Initialization

In [3]:
vm_ = False
if vm_:
    # select version 1 (with a single agent) of the environment
    env = UnityEnvironment(file_name="/data/Tennis_Linux_NoVis/Tennis")
else:
    env = UnityEnvironment(file_name="data/Tennis_Linux/Tennis.x86_64")
    
# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

# reset the environment
env_info = env.reset(train_mode=True)[brain_name]

# number of agents 
num_agents = len(env_info.agents)
print('Number of agents:', num_agents)

# size of each action
action_size = brain.vector_action_space_size
print('Size of each action:', action_size)

# examine the state space 
states = env_info.vector_observations
state_size = states.shape[1]
print('There are {} agents. Each observes a state with length: {}'.format(states.shape[0], state_size))
print('The state for the first agent looks like:', states[0])

INFO:unityagents:
'Academy' started successfully!
Unity Academy name: Academy
        Number of Brains: 1
        Number of External Brains : 1
        Lesson number : 0
        Reset Parameters :
		
Unity brain name: TennisBrain
        Number of Visual Observations (per agent): 0
        Vector Observation space type: continuous
        Vector Observation space size (per agent): 8
        Number of stacked Vector Observation: 3
        Vector Action space type: continuous
        Vector Action space size (per agent): 2
        Vector Action descriptions: , 


Number of agents: 2
Size of each action: 2
There are 2 agents. Each observes a state with length: 24
The state for the first agent looks like: [ 0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.         -6.65278625 -1.5
 -0.          0.          6.83172083  6.         -0.          0.        ]


## Useful Functions

In [4]:
# Useful Functions

def act(env, actions, brain_name="TennisBrain") -> tuple:
    """Sends actions to the environment env and observes the results.
    Returns a tuple of rewards, next_states, dones (One per agent)"""
    action_result = env.step(actions)[brain_name] # Act on the environment and observe the result
    return (action_result.rewards,
            action_result.vector_observations, # next states
            action_result.local_done) # True if the episode ended
    
def reset(env, training=True, brain_name="TennisBrain") -> np.ndarray:
    """Resetting the unity environment"""
    return env.reset(train_mode=training)[brain_name].vector_observations

def visualize(agents, env): 
    states = reset(env)
    scores = np.zeros(num_agents)
    done = False
    while not done:
        actions = np.vstack([agent.decide(np.expand_dims(state, 0), as_tensor=False) 
                             for agent, state in zip(agents, states)]) # Choose actions
        rewards, next_states, dones = act(env, actions)    # Send the action to the environment
        scores += rewards[0]                                # Update the score
        states = next_states                             # Roll over the state to next time step
        done = np.any(dones)
        time.sleep(.03)
    print("Scores: {}".format(scores))


## Simulate!

In [5]:
# Loading the agents weights
agents = []
state_dict = torch.load('my_weights.pth')
for agent_name in ('A', 'B'):
    agent = Agent(state_size=state_size, action_size=action_size, num_agents=num_agents)
    agent.actor_local.load_state_dict(state_dict[f'{agent_name}_actor_state_dict'])
    agent.critic_local.load_state_dict(state_dict[f'{agent_name}_critic_state_dict'])
    agents.append(agent)


In [6]:
visualize(agents, env)

Scores: [1.60000002 1.60000002]


When finished, you can close the environment.

In [7]:
env.close()