In [6]:
import numpy as np

class OUNoise(object):
    def __init__(self, action_space, mu=0.0, theta=0.15, max_sigma=0.7, min_sigma=0.4, decay_period=600_000):
        self.mu = mu
        self.theta = theta
        self.sigma = max_sigma
        self.max_sigma = max_sigma
        self.min_sigma = min_sigma
        self.decay_period = decay_period
        self.action_dim = action_space
        self.reset()

    def reset(self):
        self.state = np.ones(self.action_dim) * self.mu

    def evolve_state(self):
        x = self.state
        dx = self.theta * (self.mu - x) + self.sigma * np.random.randn(self.action_dim)
        self.state = x + dx
        return self.state

    def get_noise(self, t=0):
        ou_state = self.evolve_state()
        decaying = float(float(t) / self.decay_period)
        self.sigma = max(self.sigma - (self.max_sigma - self.min_sigma) * min(1.0, decaying), self.min_sigma)
        print('sigma:', self.sigma, 'state:', ou_state)
        return ou_state

In [7]:
noise = OUNoise(action_space=2, max_sigma=0.1, min_sigma=0.01, decay_period=500_000)

In [8]:
noise.get_noise(t=600_00)

sigma: 0.0892 state: [-0.16627995  0.12382876]


array([-0.16627995,  0.12382876])

In [9]:
from env_utils import GoalManager

In [10]:
GM = GoalManager()

[92mObstacle name: wall_outler, base pose: (0.0, 0.0)[0m
Coordinates: [[[11.5, -8.425], [11.5, -11.575], [-11.5, -11.575], [-11.5, -8.425]], [[11.575, 11.5], [11.575, -11.5], [8.425, -11.5], [8.425, 11.5]], [[11.5, 11.575], [11.5, 8.425], [-11.5, 8.425], [-11.5, 11.575]], [[-8.425, 11.5], [-8.425, -11.5], [-11.575, -11.5], [-11.575, 11.5]]]

[92mObstacle name: wall_single_5m_1, base pose: (4.0, 9.5)[0m
Coordinates: [[[5.1975, 11.5625], [5.1975, 3.5625], [2.0475000000000003, 3.5625], [2.0475000000000003, 11.5625]]]

[92mObstacle name: wall_single_5m_2, base pose: (5.5, 2.5)[0m
Coordinates: [[[6.6975, 4.5625], [6.6975, -3.4375], [3.5474999999999994, -3.4375], [3.5474999999999994, 4.5625]]]

[92mObstacle name: wall_single_5m_3, base pose: (-2.0, -5.5)[0m
Coordinates: [[[-0.8025, -3.4375], [-0.8025, -11.4375], [-3.9524999999999997, -11.4375], [-3.9524999999999997, -3.4375]]]

[92mObstacle name: wall_single_5m_4, base pose: (-9.5, 1.0)[0m
Coordinates: [[[-8.3025, 3.0625], [-8.3025