In [1]:
from unityagents import UnityEnvironment
import numpy as np

import random
import torch

from collections import deque
import matplotlib.pyplot as plt
%matplotlib inline

from ddpg_agent import Agent

env = UnityEnvironment(file_name="Reacher.app")

# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]
num_agents = 1

action_size = brain.vector_action_space_size


env_info = env.reset(train_mode=True)[brain_name]     # reset the environment 

agent = Agent(state_size=33, action_size=action_size, random_seed=11) #Refer to DDPG.py #original random seed was 2        

state_size = 33

def ddpg(n_episodes=6000, max_t=1200, print_every=100):
    
    scores_deque = deque(maxlen=print_every)
    scores = []

    for i_episode in range(1, n_episodes+1):
        
        env_info = env.reset(train_mode = True)[brain_name]     # reset the environment  
        agent.reset()
        scores_a = np.zeros(num_agents)                          # initialize the score (for each agent)
        states = env_info.vector_observations             # get the current state (for each agent)
        
        for t in range(max_t):
        
            actions = agent.act(state = states, add_noise = True)         # select an action (for each agent) ,changed add noise to False
            actions = np.clip(actions, -1, 1)                  # all actions between -1 and 1
            env_info = env.step(actions)[brain_name]           # send all actions to tne environment
            next_states = env_info.vector_observations         # get next state (for each agent)
            
              
            
            rewards = env_info.rewards                         # get reward (for each agent)   
            dones = env_info.local_done                        # see if episode finished
            scores_a += env_info.rewards                         # update the score (for each agent)
            
            for k in range(0,num_agents):
                
               
                agent.step(state = states[k], action = actions[k], 
                           reward = rewards[k], next_state = next_states[k], 
                           done = dones[k])



            states = next_states                              # roll over states to next time step
            
            if np.any(dones):                                  # exit loop if episode finished
                break

        
        scores.append(np.mean(scores_a))
        scores_deque.append(scores)
        print('\rEpisode {}\tAverage Score of current episode: {:.2f}'.format(i_episode, np.mean(scores_a)), end="")
        print('\rEpisode {}\tOverall Average Score: {:.2f}'.format(i_episode, np.mean(scores_deque)), end="")
 
        torch.save(agent.actor_local.state_dict(), 'checkpoint_actor.pth')
        torch.save(agent.critic_local.state_dict(), 'checkpoint_critic.pth')
        if i_episode % print_every == 0:
            print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_deque)))
            
        if np.mean(scores_deque)>=31:
            print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_deque))) 
            break
            
    return scores

scores = ddpg()

fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(1, len(scores)+1), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.show()

INFO:unityagents:
'Academy' started successfully!
Unity Academy name: Academy
        Number of Brains: 1
        Number of External Brains : 1
        Lesson number : 0
        Reset Parameters :
		goal_speed -> 1.0
		goal_size -> 5.0
Unity brain name: ReacherBrain
        Number of Visual Observations (per agent): 0
        Vector Observation space type: continuous
        Vector Observation space size (per agent): 33
        Number of stacked Vector Observation: 1
        Vector Action space type: continuous
        Vector Action space size (per agent): 4
        Vector Action descriptions: , , , 


TypeError: 'torch.Device' object is not callable