# Project 2 - Continuous Control (1-agent Reacher environment)
## Payam Mousavi

In [4]:
import numpy as np
import matplotlib.pyplot as plt
from collections import deque

# Torch:
import torch
import torch.nn as nn
import torch.optim as optim
from unityagents import UnityEnvironment

# Modified from UdacityRL github:
from config import Config
from model import Actor, Critic 
from agent import DDPGAgent, OUNoise, ReplayBuffer

## Setting up the environment:

In [5]:
env = UnityEnvironment(file_name='Reacher_Linux/Reacher.x86_64')
brain_name = env.brain_names[0]
brain = env.brains[brain_name]
env_info = env.reset(train_mode=True)[brain_name]

UnityTimeOutException: The Unity environment took too long to respond. Make sure that :
	 The environment does not need user interaction to launch
	 The Academy and the External Brain(s) are attached to objects in the Scene
	 The environment and the Python interface have compatible versions.

## Setting up Parameters:

In [6]:
config = Config()

config.seed = 0
config.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
config.action_size = brain.vector_action_space_size
config.states = env_info.vector_observations
config.state_size = config.states.shape[1]
config.num_agents = len(env_info.agents)

config.actor_hidden_units = (256, 128)
config.actor_learning_rate = 1e-4
config.actor_network_fn = lambda: Actor(config.action_size,
                                        config.state_size,
                                        config.actor_hidden_units,
                                        config.seed).to(config.device)

config.actor_optimizer_fn = lambda params: torch.optim.Adam(params,
                                                            lr=config.actor_learning_rate)

config.critic_hidden_units = (256, 128)
config.critic_learning_rate = 3e-4
config.weight_decay = 0
config.critic_network_fn = lambda: Critic(config.action_size,
                                          config.state_size,
                                          config.critic_hidden_units,
                                          config.seed).to(config.device)

config.critic_optimizer_fn = lambda params: torch.optim.Adam(params,
                                                             lr=config.critic_learning_rate)

config.batch_size = 256
config.buffer_size = 1000000
config.discount = 0.98
config.update_every = 5
config.memory_fn = lambda: ReplayBuffer(config.action_size,
                                        config.buffer_size,
                                        config.batch_size,
                                        config.seed, config.device)

config.noise_fn = lambda: OUNoise(config.action_size,
                                  config.seed)


NameError: name 'brain' is not defined

In [None]:
# config = Config()

# config.seed = 2
# config.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# config.action_size = brain.vector_action_space_size
# config.states = env_info.vector_observations
# config.state_size = config.states.shape[1]
# config.num_agents = len(env_info.agents)

# config.actor_hidden_units = (512, 256)
# config.actor_learning_rate = 1e-4
# config.actor_network_fn = lambda: Actor(config.action_size, config.state_size, config.actor_hidden_units, config.seed).to(config.device)
# config.actor_optimizer_fn = lambda params: torch.optim.Adam(params, lr=config.actor_learning_rate)

# config.critic_hidden_units = (512, 256)
# config.critic_learning_rate = 3e-4
# config.weight_decay = 0
# config.critic_network_fn = lambda: Critic(config.action_size, config.state_size, config.critic_hidden_units, config.seed).to(config.device)
# config.critic_optimizer_fn = lambda params: torch.optim.Adam(params, lr=config.critic_learning_rate)

# config.batch_size = 512
# config.buffer_size = int(1e6)
# config.discount = 0.99
# config.update_every = 4
# config.memory_fn = lambda: ReplayBuffer(config.action_size, config.buffer_size, config.batch_size, config.seed, config.device)

# config.noise_fn = lambda: OUNoise(config.action_size, config.seed)

## Setting up the algorithm:

In [None]:
agent = DDPGAgent(config)

## Training:

## Testing: