# Demonstration

## Installation

In [None]:
!pip -q install ./python

## Import functionality

In [None]:
from unityagents import UnityEnvironment
import numpy as np

# Invite our agent & import utils
from ddpg_agent import Agent
#from random import random as rnd
import torch

## Initialization

In [None]:
vm_ = True
if vm_:
    # select version 1 (with a single agent) of the environment
    env = UnityEnvironment(file_name='/data/Reacher_One_Linux_NoVis/Reacher_One_Linux_NoVis.x86_64')
else:
    # Obtain from local path
    env = UnityEnvironment(file_name='Reacher_Linux/Reacher.x86')
    
# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

# reset the environment
env_info = env.reset(train_mode=True)[brain_name]

# number of agents
num_agents = len(env_info.agents)
print('Number of agents:', num_agents)

# size of each action
action_size = brain.vector_action_space_size
print('Size of each action:', action_size)

# examine the state space 
states = env_info.vector_observations
state_size = states.shape[1]
print('There are {} agents. Each observes a state with length: {}'.format(states.shape[0], state_size))
print('The state for the first agent looks like:', states[0])

## Useful Functions

In [None]:
def act(env, actions, brain_name="ReacherBrain") -> tuple:
    """Sends actions to the environment env and observes the results.
    Returns a tuple of rewards, next_states, dones (One per agent)"""
    action_result = env.step(actions)[brain_name] # Act on the environment and observe the result
    return (action_result.rewards,
            action_result.vector_observations, # next states
            action_result.local_done) # True if the episode ended
    
def reset(env, training=True, brain_name="ReacherBrain") -> np.ndarray:
    """Syntactic sugar for resetting the unity environment"""
    return env.reset(train_mode=training)[brain_name].vector_observations

def visualize(agent, env): 
    states = reset(env)
    score = 0
    done = False
    while not done:
        actions = agent.decide(states)      # Choose an action based on the state
        rewards, next_states, dones = act(env, actions)    # Send the action to the environment
        score += rewards[0]                                # Update the score
        states = next_states                             # Roll over the state to next time step
        done = any(dones)
    print("Score: {}".format(score))

## Simulate!

In [None]:
agent = Agent(state_size, action_size, 0)
state_dict = torch.load('my_weights.pth')

Obtain weights

In [None]:
agent.actor_local.load_state_dict(state_dict['actor_state_dict'])
agent.critic_local.load_state_dict(state_dict['critic_state_dict'])

Visualize

In [None]:
visualize(agent, env)

When finished, you can close the environment.

In [None]:
env.close()