In [8]:
import numpy as np

class OUNoise(object):
    def __init__(self, action_space, mu=0.0, theta=0.15, max_sigma=0.7, min_sigma=0.4, decay_period=600_000):
        self.mu = mu
        self.theta = theta
        self.sigma = max_sigma
        self.max_sigma = max_sigma
        self.min_sigma = min_sigma
        self.decay_period = decay_period
        self.action_dim = action_space
        self.reset()

    def reset(self):
        self.state = np.ones(self.action_dim) * self.mu

    def evolve_state(self):
        x = self.state
        dx = self.theta * (self.mu - x) + self.sigma * np.random.randn(self.action_dim)
        self.state = x + dx
        return self.state

    def get_noise(self, t=0):
        ou_state = self.evolve_state()
        decaying = float(float(t) / self.decay_period)
        self.sigma = max(self.sigma - (self.max_sigma - self.min_sigma) * min(1.0, decaying), self.min_sigma)
        print('sigma:', self.sigma, 'state:', ou_state)
        return ou_state

In [9]:
noise = OUNoise(action_space=2, max_sigma=0.1, min_sigma=0.01, decay_period=500_000)

In [10]:
noise.get_noise(t=600_00)

sigma: 0.0892 state: [0.03973214 0.02855663]


array([0.03973214, 0.02855663])

In [11]:
from env_utils import GoalManager

In [12]:
GM = GoalManager()

[92mObstacle name: wall_outler, base pose: (0.0, 0.0, 0.0)[0m
Coordinates: [[[11.5, -8.425], [11.5, -11.575], [-11.5, -11.575], [-11.5, -8.425]], [[21.5, 1.575], [21.5, -1.575], [-1.5, -1.575], [-1.5, 1.575]], [[11.5, 11.575], [11.5, 8.425], [-11.5, 8.425], [-11.5, 11.575]], [[1.5, 1.575], [1.5, -1.575], [-21.5, -1.575], [-21.5, 1.575]]]



In [15]:
from drlutils_reward import Reward

MAX_GOAL_DISTANCE = np.sqrt(20**2 + 20**2)

reward = Reward()


reward.reward_initalize(init_angle_to_goal= 0.0, init_distance_to_goal= 7.0 / MAX_GOAL_DISTANCE, goal_manager=GM)



In [14]:
reward.waypoint_list

[0.2, 0.15, 0.09999999999999998, 0.04999999999999999]