# Continuous Control

---

## 1. Import the Necessary Packages

In [1]:
#import gym
from unityagents import UnityEnvironment
import random
import torch
import numpy as np
from collections import deque
import matplotlib.pyplot as plt
%matplotlib inline

from ddpg_agent import Agent

## 2. Instantiate the Environment and Agent

In [2]:
env = UnityEnvironment(file_name='./Reacher_1.app')

# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

INFO:unityagents:
'Academy' started successfully!
Unity Academy name: Academy
        Number of Brains: 1
        Number of External Brains : 1
        Lesson number : 0
        Reset Parameters :
		goal_speed -> 1.0
		goal_size -> 5.0
Unity brain name: ReacherBrain
        Number of Visual Observations (per agent): 0
        Vector Observation space type: continuous
        Vector Observation space size (per agent): 33
        Number of stacked Vector Observation: 1
        Vector Action space type: continuous
        Vector Action space size (per agent): 4
        Vector Action descriptions: , , , 


In [3]:
# reset the environment
env_info = env.reset(train_mode=True)[brain_name]

# number of agents
num_agents = len(env_info.agents)
print('Number of agents:', num_agents)

# size of each action
action_size = brain.vector_action_space_size
print('Size of each action:', action_size)

# examine the state space 
states = env_info.vector_observations
state_size = states.shape[1]
print('There are {} agents. Each observes a state with length: {}'.format(states.shape[0], state_size))
print('The state for the first agent looks like:', states[0])

Number of agents: 1
Size of each action: 4
There are 1 agents. Each observes a state with length: 33
The state for the first agent looks like: [ 0.00000000e+00 -4.00000000e+00  0.00000000e+00  1.00000000e+00
 -0.00000000e+00 -0.00000000e+00 -4.37113883e-08  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00 -1.00000000e+01  0.00000000e+00
  1.00000000e+00 -0.00000000e+00 -0.00000000e+00 -4.37113883e-08
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  5.75471878e+00 -1.00000000e+00
  5.55726671e+00  0.00000000e+00  1.00000000e+00  0.00000000e+00
 -1.68164849e-01]


In [4]:
agent = Agent(state_size=33, action_size=4, random_seed=2, num_agents=1)

## 3. Train the Agent with DDPG

In [5]:
def ddpg(n_episodes=1000, max_t=300, print_every=100, 
         eps_start=1.0, eps_end=0.01, eps_decay=0.995):
    scores_deque = deque(maxlen=print_every)
    scores = []
    eps = eps_start
    
    for i_episode in range(1, n_episodes+1):
        # reset the environment
        env_info = env.reset(train_mode=True)[brain_name]
        # get the current state
        state = env_info.vector_observations
        # initialize the score
        score = 0
        for t in range(max_t):
            # select an action (for each agent)
            action = agent.act(state, eps)
            #action = np.random.randn(1, 4) 
            #action = np.clip(action, -1, 1)  
            # send action to the environment
            env_info = env.step(action)[brain_name]
            # get the next_state, reward, done
            next_state = env_info.vector_observations
            reward = env_info.rewards
            done = env_info.local_done
            # store experience and train the agent
            agent.step(state, action, reward, next_state, done)
            # roll over state to next time step
            state = next_state
            # update the score
            score += reward[0]
            # exit loop if episode finished
            if np.any(done):
                break 
        # save most recent score
        scores_deque.append(score)
        scores.append(score)
        # decrease epsilon after each epsilon
        eps = max(eps_end, eps_decay*eps)
        
        print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_deque)), end="")
        torch.save(agent.actor_local.state_dict(), 'checkpoint_actor.pth')
        torch.save(agent.critic_local.state_dict(), 'checkpoint_critic.pth')
        if i_episode % print_every == 0:
            print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_deque)))
            
    return scores

In [None]:
scores = ddpg(n_episodes=100, max_t=300, print_every=10)

fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(1, len(scores)+1), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.show()

Episode 10	Average Score: 0.04
Episode 11	Average Score: 0.04

In [None]:
env.close()

In [18]:
import sys
import time

a = 0  
for x in range (0,3):  
    a = a + 1  
    b = ("Loading" + "." * a)
    # \r prints a carriage return first, so `b` is printed on top of the previous line.
    sys.stdout.write('\r'+b)
    #time.sleep(0.5)
print (a)

Loading.Loading..Loading...3
