In [167]:
Code Example: https://github.com/adventuresinML/adventures-in-ml-code/blob/master/r_learning_tensorflow.py

In [11]:
import numpy as np
import tensorflow as tf
import gym
import matplotlib.pylab as plt
import random
import math

MAX_EPSILON = 1
MIN_EPSILON = 0.01
LAMBDA = 0.0001
GAMMA = 0.99
BATCH_SIZE = 50

In [12]:
#def takeaction(env):
    #a = env.action_space.sample()
    #print("This is the next action sample: {} {} {} {}".format(a[0],a[1],a[2],a[3]))
    #return a
#    return[0.1,0,0,0]

In [13]:
#env_name = "FetchPickAndPlace-v1"
#env = gym.make(env_name)

#env.reset()


In [14]:
#env.render()
#env.step(takeaction(env))

In [15]:


class Model:
    def __init__(self, num_states, num_actions, batch_size):
        self._num_states = num_states
        self._num_actions = num_actions
        self._batch_size = batch_size
        # define the placeholders
        self._states = None
        self._actions = None
        # the output operations
        self._logits = None
        self._optimizer = None
        self._var_init = None
        # now setup the model
        self._define_model()

    def _define_model(self):
        self._states = tf.placeholder(shape=[None, self._num_states], dtype=tf.float32)
        self._q_s_a = tf.placeholder(shape=[None, self._num_actions], dtype=tf.float32)
        # create a couple of fully connected hidden layers
        fc1 = tf.layers.dense(self._states, 50, activation=tf.nn.relu)
        fc2 = tf.layers.dense(fc1, 50, activation=tf.nn.relu)
        self._logits = tf.layers.dense(fc2, self._num_actions)
        loss = tf.losses.mean_squared_error(self._q_s_a, self._logits)
        self._optimizer = tf.train.AdamOptimizer().minimize(loss)
        self._var_init = tf.global_variables_initializer()

    def predict_one(self, state, sess):
        return sess.run(self._logits, feed_dict={self._states:
                                                     state.reshape(1, self.num_states)})

    def predict_batch(self, states, sess):
        return sess.run(self._logits, feed_dict={self._states: states})

    def train_batch(self, sess, x_batch, y_batch):
        sess.run(self._optimizer, feed_dict={self._states: x_batch, self._q_s_a: y_batch})

    @property
    def num_states(self):
        return self._num_states

    @property
    def num_actions(self):
        return self._num_actions

    @property
    def batch_size(self):
        return self._batch_size

    @property
    def var_init(self):
        return self._var_init


In [16]:

class Memory:
    def __init__(self, max_memory):
        self._max_memory = max_memory
        self._samples = []

    def add_sample(self, sample):
        self._samples.append(sample)
        if len(self._samples) > self._max_memory:
            self._samples.pop(0)

    def sample(self, no_samples):
        if no_samples > len(self._samples):
            return random.sample(self._samples, len(self._samples))
        else:
            return random.sample(self._samples, no_samples)
        

In [57]:

class GameRunner:
    def __init__(self, sess, model, env, memory, max_eps, min_eps,
                 decay, render=True):
        self._sess = sess
        self._env = env
        self._model = model
        self._memory = memory
        self._render = render
        self._max_eps = max_eps
        self._min_eps = min_eps
        self._decay = decay
        self._eps = self._max_eps
        self._steps = 0
        self._reward_store = []
        

    def run(self):
        state = self._env.reset()["observation"]
        tot_reward = 0
        while True:
            if self._render:
                self._env.render()

            action = self._choose_action(state)
            temp_state, reward, done, info = self._env.step(action)
            
            # her
            temp_state["desired_goal"] = temp_state["achieved_goal"]
            done = True
            #print ("Des: {}".format(temp_state["desired_goal"]))
            #print ("Ach: {}".format(temp_state["achieved_goal"]))
            
            
            print("State {}".format(temp_state))
            next_state = temp_state["observation"]
            
            
            #if done:
            #    if reward >= 0:
            #       print("YEAH")
            #       reward += 1000

            # is the game complete? If so, set the next state to
            # None for storage sake
            if done:
                next_state = None

            self._memory.add_sample((state, action, reward, next_state))
            self._replay()

            # exponentially decay the eps value
            self._steps += 1
            self._eps = MIN_EPSILON + (MAX_EPSILON - MIN_EPSILON) \
                                      * math.exp(-LAMBDA * self._steps)

            # move the agent to the next state and accumulate the reward
            state = next_state
            tot_reward += reward

            # if the game is done, break the loop
            if done:
                self._reward_store.append(tot_reward)
                
                break

        print("Step {}, Total reward: {}, Eps: {}".format(self._steps, tot_reward, self._eps))

    def _choose_action(self, state):
        if random.random() < self._eps:
            return random.randint(0, self._model.num_actions - 1)  #self._env.action_space.sample()
        else:
            return np.argmax(self._model.predict_one(state, self._sess))

    def _replay(self):
        batch = self._memory.sample(self._model.batch_size)
        states = np.array([val[0] for val in batch])
        next_states = np.array([(np.zeros(self._model.num_states)
                                 if val[3] is None else val[3]) for val in batch])
        # predict Q(s,a) given the batch of states
        q_s_a = self._model.predict_batch(states, self._sess)
        # predict Q(s',a') - so that we can do gamma * max(Q(s'a')) below
        q_s_a_d = self._model.predict_batch(next_states, self._sess)
        # setup training arrays
        x = np.zeros((len(batch), self._model.num_states))
        y = np.zeros((len(batch), self._model.num_actions))
        for i, b in enumerate(batch):
            state, action, reward, next_state = b[0], b[1], b[2], b[3]
            # get the current q values for all actions in state
            current_q = q_s_a[i]
            # update the q value for action
            if next_state is None:
                # in this case, the game completed after action, so there is no max Q(s',a')
                # prediction possible
                current_q[action] = reward
            else:
                current_q[action] = reward + GAMMA * np.amax(q_s_a_d[i])
            x[i] = state
            y[i] = current_q
        self._model.train_batch(self._sess, x, y)

    @property
    def reward_store(self):
        return self._reward_store


    

In [58]:
if __name__ == "__main__":
    env_name = 'FetchPickAndPlace-v1'
    env = gym.make(env_name)

    num_states = env.observation_space["observation"].shape[0]
    print(num_states)
    num_actions = env.action_space.shape[0]
    print(num_actions)
    
    model = Model(num_states, num_actions, BATCH_SIZE)
    mem = Memory(50000)

    with tf.Session() as sess:
        sess.run(model.var_init)
        gr = GameRunner(sess, model, env, mem, MAX_EPSILON, MIN_EPSILON,
                        LAMBDA)
        num_episodes = 500
        cnt = 0
        while cnt < num_episodes:
            if cnt % 10 == 0:
                print('Episode {} of {}'.format(cnt+1, num_episodes))
            gr.run()
            cnt += 1
        plt.plot(gr.reward_store)
        plt.show()
        plt.close("all")


25
4
Episode 1 of 500
Creating window glfw
State {'achieved_goal': array([1.23566991, 0.79149706, 0.42473605]), 'desired_goal': array([1.23566991, 0.79149706, 0.42473605]), 'observation': array([ 1.36945404e+00,  7.78820325e-01,  5.64114120e-01,  1.23566991e+00,
        7.91497062e-01,  4.24736048e-01, -1.33784128e-01,  1.26767376e-02,
       -1.39378071e-01,  3.97852140e-02,  4.16259342e-02, -3.85214084e-07,
        5.92637053e-07,  1.12208536e-13, -2.49958924e-02, -2.62562578e-02,
       -2.64169060e-02,  1.87589293e-07, -2.88598912e-07, -1.50758860e-18,
        2.49958852e-02,  2.62562532e-02,  2.64435715e-02,  6.99533302e-02,
        7.08374623e-02])}
State {'achieved_goal': array([1.2356699 , 0.79149706, 0.42475675]), 'desired_goal': array([1.2356699 , 0.79149706, 0.42475675]), 'observation': array([ 1.40038018e+00,  8.12697465e-01,  5.97742402e-01,  1.23566990e+00,
        7.91497058e-01,  4.24756751e-01, -1.64710274e-01, -2.12004069e-02,
       -1.72985651e-01,  5.14205340e-02, 

State {'achieved_goal': array([1.2356698 , 0.79149699, 0.42478448]), 'desired_goal': array([1.2356698 , 0.79149699, 0.42478448]), 'observation': array([ 1.41848121e+00,  1.04493740e+00,  7.81122807e-01,  1.23566980e+00,
        7.91496988e-01,  4.24784482e-01, -1.82811414e-01, -2.53440411e-01,
       -3.56338325e-01,  5.00469908e-02,  5.00427364e-02, -3.85214084e-07,
        5.92637053e-07,  1.12208536e-13,  5.73169258e-03, -1.05293628e-02,
       -6.25901637e-03,  1.85977012e-07, -2.86118480e-07,  3.41864130e-18,
       -5.73169970e-03,  1.05293582e-02,  6.25902002e-03, -1.80752830e-06,
       -3.43525028e-08])}
State {'achieved_goal': array([1.23566979, 0.79149698, 0.42478449]), 'desired_goal': array([1.23566979, 0.79149698, 0.42478449]), 'observation': array([ 1.40426854e+00,  1.04085955e+00,  7.76073975e-01,  1.23566979e+00,
        7.91496984e-01,  4.24784485e-01, -1.68598754e-01, -2.49362567e-01,
       -3.51289489e-01,  4.99314210e-02,  4.96054649e-02, -3.85214084e-07,
        5

State {'achieved_goal': array([1.2356697 , 0.79149692, 0.42478449]), 'desired_goal': array([1.2356697 , 0.79149692, 0.42478449]), 'observation': array([ 1.33224792e+00,  1.14646031e+00,  8.30395057e-01,  1.23566970e+00,
        7.91496923e-01,  4.24784489e-01, -9.65782227e-02, -3.54963384e-01,
       -4.05610568e-01,  5.00480070e-02,  5.00416351e-02, -3.85214084e-07,
        5.92637053e-07,  1.12208536e-13,  3.06743163e-03, -5.72628992e-03,
       -2.85920789e-03,  1.85976796e-07, -2.86118148e-07, -2.61737623e-18,
       -3.06743876e-03,  5.72628529e-03,  2.85920789e-03, -3.00351960e-06,
        1.41399551e-06])}
State {'achieved_goal': array([1.23566969, 0.79149692, 0.42478449]), 'desired_goal': array([1.23566969, 0.79149692, 0.42478449]), 'observation': array([ 1.32764875e+00,  1.15424831e+00,  8.34039783e-01,  1.23566969e+00,
        7.91496919e-01,  4.24784489e-01, -9.19790614e-02, -3.62751387e-01,
       -4.09255294e-01,  5.00465287e-02,  5.00426114e-02, -3.85214084e-07,
        5

State {'achieved_goal': array([1.23566959, 0.79149685, 0.42478449]), 'desired_goal': array([1.23566959, 0.79149685, 0.42478449]), 'observation': array([ 1.27875935e+00,  1.20447990e+00,  8.51340406e-01,  1.23566959e+00,
        7.91496854e-01,  4.24784489e-01, -4.30897625e-02, -4.12983050e-01,
       -4.26555917e-01,  5.00459540e-02,  5.00431576e-02, -3.85214084e-07,
        5.92637053e-07,  1.12208536e-13,  1.43747344e-03, -2.67997150e-03,
       -1.06886736e-03,  1.85976796e-07, -2.86118148e-07,  1.26832039e-18,
       -1.43748056e-03,  2.67996687e-03,  1.06886736e-03, -1.29552887e-06,
        1.27970425e-06])}
State {'achieved_goal': array([1.23566958, 0.79149685, 0.42478449]), 'desired_goal': array([1.23566958, 0.79149685, 0.42478449]), 'observation': array([ 1.26638966e+00,  1.19490526e+00,  8.43542713e-01,  1.23566958e+00,
        7.91496849e-01,  4.24784489e-01, -3.07200754e-02, -4.03408415e-01,
       -4.18758224e-01,  4.97716669e-02,  4.97234845e-02, -3.85214084e-07,
        5

State {'achieved_goal': array([1.46840356, 0.63250729, 0.4247843 ]), 'desired_goal': array([1.46840356, 0.63250729, 0.4247843 ]), 'observation': array([ 1.45644973e+00,  8.77671375e-01,  6.60316392e-01,  1.46840356e+00,
        6.32507293e-01,  4.24784302e-01,  1.19538316e-02, -2.45164081e-01,
       -2.35532089e-01,  5.00414807e-02,  5.03366052e-02, -3.85214084e-07,
        5.92637053e-07,  1.12208536e-13, -1.99289036e-02, -2.53578893e-02,
       -2.48121043e-02,  1.85982836e-07, -2.86127440e-07, -1.04198276e-18,
        1.99288965e-02,  2.53578846e-02,  2.48122065e-02,  1.07490840e-05,
       -8.21264308e-04])}
State {'achieved_goal': array([1.46840355, 0.63250729, 0.42478438]), 'desired_goal': array([1.46840355, 0.63250729, 0.42478438]), 'observation': array([ 1.46954485e+00,  9.07376450e-01,  6.86566586e-01,  1.46840355e+00,
        6.32507289e-01,  4.24784382e-01, -1.14129219e-03, -2.74869161e-01,
       -2.61782204e-01,  5.00471707e-02,  5.00606277e-02, -3.85214084e-07,
        5

State {'achieved_goal': array([1.46840345, 0.63250722, 0.42478449]), 'desired_goal': array([1.46840345, 0.63250722, 0.42478449]), 'observation': array([ 1.36639600e+00,  1.10145720e+00,  8.07571354e-01,  1.46840345e+00,
        6.32507224e-01,  4.24784489e-01,  1.02007456e-01, -4.68949981e-01,
       -3.82786865e-01,  5.00883345e-02,  5.01554652e-02, -3.85214084e-07,
        5.92637053e-07,  1.12208536e-13,  4.81720775e-03, -8.89046792e-03,
       -6.43799148e-03,  1.85976798e-07, -2.86118150e-07,  5.08090198e-21,
       -4.81721487e-03,  8.89046329e-03,  6.43799151e-03, -1.07397330e-04,
       -3.35673757e-04])}
State {'achieved_goal': array([1.46840345, 0.63250722, 0.42478449]), 'desired_goal': array([1.46840345, 0.63250722, 0.42478449]), 'observation': array([ 1.36124529e+00,  1.11304389e+00,  8.14504505e-01,  1.46840345e+00,
        6.32507219e-01,  4.24784489e-01,  1.07158162e-01, -4.80536672e-01,
       -3.89720016e-01,  5.00518796e-02,  5.00471327e-02, -3.85214084e-07,
        5

State {'achieved_goal': array([1.46840335, 0.63250715, 0.42478449]), 'desired_goal': array([1.46840335, 0.63250715, 0.42478449]), 'observation': array([ 1.29790087e+00,  1.17836846e+00,  8.39691914e-01,  1.46840335e+00,
        6.32507154e-01,  4.24784489e-01,  1.70502475e-01, -5.45861310e-01,
       -4.14907425e-01,  5.01648736e-02,  5.01631086e-02, -3.85214084e-07,
        5.92637053e-07,  1.12208536e-13,  2.48361242e-03, -4.46426066e-03,
       -3.68039061e-03,  1.85976796e-07, -2.86118148e-07, -3.98713780e-18,
       -2.48361955e-03,  4.46425603e-03,  3.68039061e-03, -3.22588224e-04,
       -3.62307116e-04])}
State {'achieved_goal': array([1.46840334, 0.63250715, 0.42478449]), 'desired_goal': array([1.46840334, 0.63250715, 0.42478449]), 'observation': array([ 1.29573360e+00,  1.18462188e+00,  8.43266093e-01,  1.46840334e+00,
        6.32507150e-01,  4.24784489e-01,  1.72669745e-01, -5.52114730e-01,
       -4.18481604e-01,  5.00575537e-02,  5.00468405e-02, -3.85214084e-07,
        5

State {'achieved_goal': array([1.42209405, 0.88459165, 0.42478149]), 'desired_goal': array([1.42209405, 0.88459165, 0.42478149]), 'observation': array([ 1.46925769e+00,  9.07573008e-01,  6.87369632e-01,  1.42209405e+00,
        8.84591655e-01,  4.24781489e-01, -4.71636435e-02, -2.29813532e-02,
       -2.62588143e-01,  5.00476572e-02,  5.00422222e-02, -3.85214084e-07,
        5.92637053e-07,  1.12208536e-13,  3.98516699e-04, -1.80763038e-02,
       -1.45260153e-02,  1.86074041e-07, -2.86267755e-07,  2.31385069e-19,
       -3.98523825e-04,  1.80762991e-02,  1.45276592e-02,  2.89212938e-06,
       -5.18570269e-06])}
State {'achieved_goal': array([1.42209404, 0.88459165, 0.42478277]), 'desired_goal': array([1.42209404, 0.88459165, 0.42478277]), 'observation': array([ 1.46838509e+00,  9.32757392e-01,  7.07273463e-01,  1.42209404e+00,
        8.84591650e-01,  4.24782768e-01, -4.62910463e-02, -4.81657415e-02,
       -2.82490695e-01,  5.00482762e-02,  5.00408789e-02, -3.85214084e-07,
        5

State {'achieved_goal': array([1.42209394, 0.88459159, 0.42478449]), 'desired_goal': array([1.42209394, 0.88459159, 0.42478449]), 'observation': array([ 1.35927510e+00,  1.11182376e+00,  8.13009163e-01,  1.42209394e+00,
        8.84591585e-01,  4.24784488e-01,  6.28188461e-02, -2.27232170e-01,
       -3.88224675e-01,  5.00860333e-02,  5.01575979e-02, -3.85214084e-07,
        5.92637053e-07,  1.12208536e-13,  4.85744641e-03, -8.16592671e-03,
       -5.80066615e-03,  1.85976820e-07, -2.86118184e-07,  2.44579326e-18,
       -4.85745353e-03,  8.16592208e-03,  5.80066654e-03, -1.00489352e-04,
       -3.42065224e-04])}
State {'achieved_goal': array([1.42209394, 0.88459158, 0.42478449]), 'desired_goal': array([1.42209394, 0.88459158, 0.42478449]), 'observation': array([ 1.35412174e+00,  1.12254510e+00,  8.19202588e-01,  1.42209394e+00,
        8.84591581e-01,  4.24784489e-01,  6.79721977e-02, -2.37953524e-01,
       -3.94418099e-01,  5.00517379e-02,  5.00472639e-02, -3.85214084e-07,
        5

State {'achieved_goal': array([1.42209384, 0.88459152, 0.42478449]), 'desired_goal': array([1.42209384, 0.88459152, 0.42478449]), 'observation': array([ 1.28957212e+00,  1.18667617e+00,  8.42015595e-01,  1.42209384e+00,
        8.84591521e-01,  4.24784489e-01,  1.32521722e-01, -3.02084648e-01,
       -4.17231106e-01,  5.01640182e-02,  5.01638283e-02, -3.85214084e-07,
        5.92637053e-07,  1.12208536e-13,  2.07650281e-03, -4.04627787e-03,
       -3.55775813e-03,  1.85976796e-07, -2.86118148e-07,  6.48480888e-19,
       -2.07650993e-03,  4.04627324e-03,  3.55775813e-03, -3.19534311e-04,
       -3.64963696e-04])}
State {'achieved_goal': array([1.42209384, 0.88459152, 0.42478449]), 'desired_goal': array([1.42209384, 0.88459152, 0.42478449]), 'observation': array([ 1.28790569e+00,  1.19242362e+00,  8.45426912e-01,  1.42209384e+00,
        8.84591516e-01,  4.24784489e-01,  1.34188152e-01, -3.07832104e-01,
       -4.20642423e-01,  5.00576142e-02,  5.00467714e-02, -3.85214084e-07,
        5

State {'achieved_goal': array([1.35181547, 0.64903249, 0.42473605]), 'desired_goal': array([1.35181547, 0.64903249, 0.42473605]), 'observation': array([ 1.36945404e+00,  7.78820325e-01,  5.64114120e-01,  1.35181547e+00,
        6.49032487e-01,  4.24736048e-01, -1.76385695e-02, -1.29787838e-01,
       -1.39378071e-01,  3.97852140e-02,  4.16259342e-02, -3.85214084e-07,
        5.92637053e-07,  1.12208536e-13, -2.49958924e-02, -2.62562578e-02,
       -2.64169060e-02,  1.87589293e-07, -2.88598912e-07,  2.10338945e-18,
        2.49958852e-02,  2.62562532e-02,  2.64435715e-02,  6.99533302e-02,
        7.08374623e-02])}
State {'achieved_goal': array([1.35181546, 0.64903248, 0.42475675]), 'desired_goal': array([1.35181546, 0.64903248, 0.42475675]), 'observation': array([ 1.40038018e+00,  8.12697465e-01,  5.97742402e-01,  1.35181546e+00,
        6.49032482e-01,  4.24756751e-01, -4.85647154e-02, -1.63664983e-01,
       -1.72985651e-01,  5.14205340e-02,  5.17228523e-02, -3.85214084e-07,
        5

State {'achieved_goal': array([1.35181536, 0.64903242, 0.42478448]), 'desired_goal': array([1.35181536, 0.64903242, 0.42478448]), 'observation': array([ 1.40185249e+00,  1.07346182e+00,  7.97545409e-01,  1.35181536e+00,
        6.49032417e-01,  4.24784478e-01, -5.00371301e-02, -4.24429402e-01,
       -3.72760932e-01,  5.00452734e-02,  5.00438329e-02, -3.85214084e-07,
        5.92637053e-07,  1.12208536e-13,  6.16729877e-03, -8.75570762e-03,
       -4.53814478e-03,  1.85977172e-07, -2.86118726e-07, -1.98556980e-18,
       -6.16730589e-03,  8.75570299e-03,  4.53815114e-03, -2.73059827e-07,
        2.72958125e-07])}
State {'achieved_goal': array([1.35181536, 0.64903241, 0.42478448]), 'desired_goal': array([1.35181536, 0.64903241, 0.42478448]), 'observation': array([ 1.39305283e+00,  1.08562410e+00,  8.03730789e-01,  1.35181536e+00,
        6.49032413e-01,  4.24784482e-01, -4.12374725e-02, -4.36591685e-01,
       -3.78946306e-01,  5.00451363e-02,  5.00439700e-02, -3.85214084e-07,
        5

State {'achieved_goal': array([1.35181526, 0.64903235, 0.42478449]), 'desired_goal': array([1.35181526, 0.64903235, 0.42478449]), 'observation': array([ 1.31232365e+00,  1.17205181e+00,  8.40166565e-01,  1.35181526e+00,
        6.49032348e-01,  4.24784489e-01,  3.94916077e-02, -5.23019461e-01,
       -4.15382075e-01,  5.00458709e-02,  5.00432389e-02, -3.85214084e-07,
        5.92637053e-07,  1.12208536e-13,  2.81436840e-03, -4.17241278e-03,
       -1.73949722e-03,  1.85976796e-07, -2.86118148e-07,  1.06849747e-18,
       -2.81437552e-03,  4.17240815e-03,  1.73949722e-03, -1.10764682e-06,
        1.09718952e-06])}
State {'achieved_goal': array([1.35181525, 0.64903234, 0.42478449]), 'desired_goal': array([1.35181525, 0.64903234, 0.42478449]), 'observation': array([ 1.30821742e+00,  1.17776419e+00,  8.42415155e-01,  1.35181525e+00,
        6.49032343e-01,  4.24784489e-01,  4.35978271e-02, -5.28731843e-01,
       -4.17630665e-01,  5.00453914e-02,  5.00437151e-02, -3.85214084e-07,
        5

State {'achieved_goal': array([1.35181515, 0.64903228, 0.42478449]), 'desired_goal': array([1.35181515, 0.64903228, 0.42478449]), 'observation': array([ 1.26073533e+00,  1.20027462e+00,  8.45496712e-01,  1.35181515e+00,
        6.49032278e-01,  4.24784489e-01,  9.10798241e-02, -5.51242340e-01,
       -4.20712223e-01,  4.97652915e-02,  4.97271300e-02, -3.85214084e-07,
        5.92637053e-07,  1.12208536e-13,  4.11241672e-03,  3.90618928e-03,
        4.77178253e-03,  1.85976796e-07, -2.86118148e-07,  8.43224978e-19,
       -4.11242384e-03, -3.90619391e-03, -4.77178253e-03, -4.58200032e-04,
        2.03633792e-05])}
State {'achieved_goal': array([1.35181514, 0.64903227, 0.42478449]), 'desired_goal': array([1.35181514, 0.64903227, 0.42478449]), 'observation': array([ 1.26500425e+00,  1.20808259e+00,  8.49727517e-01,  1.35181514e+00,
        6.49032274e-01,  4.24784489e-01,  8.68108900e-02, -5.59050321e-01,
       -4.24943028e-01,  5.01621641e-02,  5.01654544e-02, -3.85214084e-07,
        5

State {'achieved_goal': array([1.44694138, 0.68907471, 0.42478438]), 'desired_goal': array([1.44694138, 0.68907471, 0.42478438]), 'observation': array([ 1.46424637e+00,  9.55654138e-01,  7.23882974e-01,  1.44694138e+00,
        6.89074714e-01,  4.24784382e-01, -1.73049969e-02, -2.66579424e-01,
       -2.99098592e-01,  5.00469765e-02,  5.00423400e-02, -3.85214084e-07,
        5.92637053e-07,  1.12208536e-13,  3.82696835e-03, -1.53605691e-02,
       -1.12101271e-02,  1.85980262e-07, -2.86123480e-07,  6.32643324e-19,
       -3.82697547e-03,  1.53605644e-02,  1.12101858e-02, -9.38971586e-07,
        3.15569738e-07])}
State {'achieved_goal': array([1.44694137, 0.68907471, 0.42478443]), 'desired_goal': array([1.44694137, 0.68907471, 0.42478443]), 'observation': array([ 1.45766148e+00,  9.76816496e-01,  7.38832497e-01,  1.44694137e+00,
        6.89074709e-01,  4.24784428e-01, -1.07201095e-02, -2.87741787e-01,
       -3.14048070e-01,  5.00464266e-02,  5.00426930e-02, -3.85214084e-07,
        5

State {'achieved_goal': array([1.44694127, 0.68907464, 0.42478449]), 'desired_goal': array([1.44694127, 0.68907464, 0.42478449]), 'observation': array([ 1.34339500e+00,  1.12799682e+00,  8.19239765e-01,  1.44694127e+00,
        6.89074644e-01,  4.24784489e-01,  1.03546274e-01, -4.38922172e-01,
       -3.94455276e-01,  5.00858858e-02,  5.01576928e-02, -3.85214084e-07,
        5.92637053e-07,  1.12208536e-13,  4.03716510e-03, -7.41418881e-03,
       -5.52386748e-03,  1.85976797e-07, -2.86118149e-07, -2.13699357e-18,
       -4.03717222e-03,  7.41418418e-03,  5.52386749e-03, -9.83885947e-05,
       -3.44007448e-04])}
State {'achieved_goal': array([1.44694126, 0.68907464, 0.42478449]), 'desired_goal': array([1.44694126, 0.68907464, 0.42478449]), 'observation': array([ 1.33927107e+00,  1.13781543e+00,  8.25033642e-01,  1.44694126e+00,
        6.89074640e-01,  4.24784489e-01,  1.07670198e-01, -4.48740790e-01,
       -4.00249153e-01,  5.00521159e-02,  5.00468826e-02, -3.85214084e-07,
        5

State {'achieved_goal': array([1.44694116, 0.68907457, 0.42478449]), 'desired_goal': array([1.44694116, 0.68907457, 0.42478449]), 'observation': array([ 1.28374454e+00,  1.18933852e+00,  8.43325983e-01,  1.44694116e+00,
        6.89074575e-01,  4.24784489e-01,  1.63196629e-01, -5.00263941e-01,
       -4.18541494e-01,  5.01652359e-02,  5.01641185e-02, -3.85214084e-07,
        5.92637053e-07,  1.12208536e-13,  1.58722365e-03, -4.07759473e-03,
       -3.72995059e-03,  1.85976796e-07, -2.86118148e-07,  2.18211071e-18,
       -1.58723077e-03,  4.07759010e-03,  3.72995059e-03, -3.22280595e-04,
       -3.66626259e-04])}
State {'achieved_goal': array([1.44694116, 0.68907457, 0.42478449]), 'desired_goal': array([1.44694116, 0.68907457, 0.42478449]), 'observation': array([ 1.28268466e+00,  1.19513218e+00,  8.46886977e-01,  1.44694116e+00,
        6.89074570e-01,  4.24784489e-01,  1.64256494e-01, -5.06057612e-01,
       -4.22102487e-01,  5.00579447e-02,  5.00465365e-02, -3.85214084e-07,
        5

State {'achieved_goal': array([1.48836971, 0.61726209, 0.42478277]), 'desired_goal': array([1.48836971, 0.61726209, 0.42478277]), 'observation': array([ 1.46980209e+00,  9.07209674e-01,  6.87098923e-01,  1.48836971e+00,
        6.17262093e-01,  4.24782768e-01,  1.85676159e-02, -2.89947581e-01,
       -2.62316155e-01,  5.00435896e-02,  5.02721247e-02, -3.85214084e-07,
        5.92637053e-07,  1.12208536e-13,  2.95376884e-04, -2.04954669e-02,
       -1.68440733e-02,  1.86032551e-07, -2.86203925e-07, -5.76449915e-19,
       -2.95384008e-04,  2.04954623e-02,  1.68450164e-02,  1.50156452e-05,
       -6.60029183e-04])}
State {'achieved_goal': array([1.4883697 , 0.61726209, 0.4247835 ]), 'desired_goal': array([1.4883697 , 0.61726209, 0.4247835 ]), 'observation': array([ 1.46855213e+00,  9.32695336e-01,  7.07267803e-01,  1.48836970e+00,
        6.17262088e-01,  4.24783501e-01,  1.98175728e-02, -3.15433248e-01,
       -2.82484302e-01,  5.00478375e-02,  5.00558225e-02, -3.85214084e-07,
        5

State {'achieved_goal': array([1.4883696 , 0.61726202, 0.42478449]), 'desired_goal': array([1.4883696 , 0.61726202, 0.42478449]), 'observation': array([ 1.35993577e+00,  1.11216034e+00,  8.13288202e-01,  1.48836960e+00,
        6.17262023e-01,  4.24784489e-01,  1.28433828e-01, -4.94898312e-01,
       -3.88503714e-01,  5.00854879e-02,  5.01576046e-02, -3.85214084e-07,
        5.92637053e-07,  1.12208536e-13,  4.99952902e-03, -8.08178421e-03,
       -5.67839211e-03,  1.85976810e-07, -2.86118168e-07,  1.80253386e-19,
       -4.99953614e-03,  8.08177958e-03,  5.67839234e-03, -9.90223873e-05,
       -3.41953361e-04])}
State {'achieved_goal': array([1.48836959, 0.61726202, 0.42478449]), 'desired_goal': array([1.48836959, 0.61726202, 0.42478449]), 'observation': array([ 1.34738361e+00,  1.10666540e+00,  8.08243620e-01,  1.48836959e+00,
        6.17262019e-01,  4.24784489e-01,  1.40985981e-01, -4.89403386e-01,
       -3.83459131e-01,  4.98804993e-02,  4.95057792e-02, -3.85214084e-07,
        5

State {'achieved_goal': array([1.48836949, 0.61726195, 0.42478449]), 'desired_goal': array([1.48836949, 0.61726195, 0.42478449]), 'observation': array([ 1.29813242e+00,  1.18685850e+00,  8.46148577e-01,  1.48836949e+00,
        6.17261954e-01,  4.24784489e-01,  1.90237069e-01, -5.69596542e-01,
       -4.21364088e-01,  5.00456676e-02,  5.00434425e-02, -3.85214084e-07,
        5.92637053e-07,  1.12208536e-13,  2.37850772e-03, -3.38045268e-03,
       -1.23469903e-03,  1.85976796e-07, -2.86118148e-07,  1.56078987e-19,
       -2.37851484e-03,  3.38044805e-03,  1.23469903e-03, -9.79670799e-07,
        9.68203219e-07])}
State {'achieved_goal': array([1.48836949, 0.61726195, 0.42478449]), 'desired_goal': array([1.48836949, 0.61726195, 0.42478449]), 'observation': array([ 1.28506402e+00,  1.17767619e+00,  8.38291677e-01,  1.48836949e+00,
        6.17261949e-01,  4.24784489e-01,  2.03305462e-01, -5.60414239e-01,
       -4.13507188e-01,  4.97879635e-02,  4.97094651e-02, -3.85214084e-07,
        5

State {'achieved_goal': array([1.38011371, 0.65072763, 0.42475675]), 'desired_goal': array([1.38011371, 0.65072763, 0.42475675]), 'observation': array([ 1.36948504e+00,  7.78820752e-01,  5.63919177e-01,  1.38011371e+00,
        6.50727633e-01,  4.24756751e-01,  1.06286679e-02, -1.28093119e-01,
       -1.39162426e-01,  3.97853246e-02,  4.16260047e-02, -3.85214084e-07,
        5.92637053e-07,  1.12208536e-13, -2.49939057e-02, -2.62562399e-02,
       -2.64462078e-02,  1.86889059e-07, -2.87521630e-07,  3.02820559e-18,
        2.49938986e-02,  2.62562353e-02,  2.64614438e-02,  6.99530785e-02,
        7.08373750e-02])}
State {'achieved_goal': array([1.3801137 , 0.65072763, 0.42476859]), 'desired_goal': array([1.3801137 , 0.65072763, 0.42476859]), 'observation': array([ 1.40041205e+00,  8.12698044e-01,  5.97551078e-01,  1.38011370e+00,
        6.50727629e-01,  4.24768591e-01, -2.02983500e-02, -1.61970415e-01,
       -1.72782486e-01,  5.14205396e-02,  5.17228592e-02, -3.85214084e-07,
        5

State {'achieved_goal': array([1.3801136 , 0.65072756, 0.42478448]), 'desired_goal': array([1.3801136 , 0.65072756, 0.42478448]), 'observation': array([ 1.39963144e+00,  1.07267261e+00,  7.96760690e-01,  1.38011360e+00,
        6.50727564e-01,  4.24784482e-01, -1.95178376e-02, -4.21945047e-01,
       -3.71976208e-01,  5.00461374e-02,  5.00430081e-02, -3.85214084e-07,
        5.92637053e-07,  1.12208536e-13,  5.66427234e-03, -8.99631850e-03,
       -4.87608352e-03,  1.85977012e-07, -2.86118480e-07, -1.45212130e-19,
       -5.66427946e-03,  8.99631387e-03,  4.87608717e-03, -9.32161856e-07,
        8.15756962e-07])}
State {'achieved_goal': array([1.3801136 , 0.65072756, 0.42478449]), 'desired_goal': array([1.3801136 , 0.65072756, 0.42478449]), 'observation': array([ 1.39141464e+00,  1.08511938e+00,  8.03328618e-01,  1.38011360e+00,
        6.50727559e-01,  4.24784485e-01, -1.13010433e-02, -4.34391818e-01,
       -3.78544133e-01,  5.00456412e-02,  5.00434676e-02, -3.85214084e-07,
        5

State {'achieved_goal': array([1.3801135 , 0.65072749, 0.42478449]), 'desired_goal': array([1.3801135 , 0.65072749, 0.42478449]), 'observation': array([ 1.30947961e+00,  1.17874589e+00,  8.42916945e-01,  1.38011350e+00,
        6.50727494e-01,  4.24784489e-01,  7.06338920e-02, -5.28018400e-01,
       -4.18132456e-01,  5.00448950e-02,  5.00442112e-02, -3.85214084e-07,
        5.92637053e-07,  1.12208536e-13,  3.10562441e-03, -3.56328685e-03,
       -1.19255962e-03,  1.85976796e-07, -2.86118148e-07,  1.82577012e-18,
       -3.10563154e-03,  3.56328222e-03,  1.19255962e-03, -2.84370063e-07,
        2.84352532e-07])}
State {'achieved_goal': array([1.38011349, 0.65072749, 0.42478449]), 'desired_goal': array([1.38011349, 0.65072749, 0.42478449]), 'observation': array([ 1.29575207e+00,  1.16955085e+00,  8.34805725e-01,  1.38011349e+00,
        6.50727490e-01,  4.24784489e-01,  8.43614187e-02, -5.18823358e-01,
       -4.10021236e-01,  4.97936389e-02,  4.97015209e-02, -3.85214084e-07,
        5

State {'achieved_goal': array([1.38011339, 0.65072742, 0.42478449]), 'desired_goal': array([1.38011339, 0.65072742, 0.42478449]), 'observation': array([ 1.26637569e+00,  1.22163456e+00,  8.55972906e-01,  1.38011339e+00,
        6.50727425e-01,  4.24784489e-01,  1.13737705e-01, -5.70907131e-01,
       -4.31188417e-01,  5.00446335e-02,  5.00444728e-02, -3.85214084e-07,
        5.92637053e-07,  1.12208536e-13,  1.35733290e-03, -1.51528861e-03,
       -3.77981657e-04,  1.85976796e-07, -2.86118148e-07, -2.74003510e-18,
       -1.35734003e-03,  1.51528398e-03,  3.77981657e-04, -1.52746267e-07,
        1.52743324e-07])}
State {'achieved_goal': array([1.38011338, 0.65072742, 0.42478449]), 'desired_goal': array([1.38011338, 0.65072742, 0.42478449]), 'observation': array([ 1.26445173e+00,  1.22373059e+00,  8.56468556e-01,  1.38011338e+00,
        6.50727420e-01,  4.24784489e-01,  1.15661651e-01, -5.73003165e-01,
       -4.31684067e-01,  5.00445951e-02,  5.00445112e-02, -3.85214084e-07,
        5

State {'achieved_goal': array([1.21502453, 0.75337463, 0.42478438]), 'desired_goal': array([1.21502453, 0.75337463, 0.42478438]), 'observation': array([ 1.44871603e+00,  9.96296092e-01,  7.52011921e-01,  1.21502453e+00,
        7.53374628e-01,  4.24784382e-01, -2.33691507e-01, -2.42921464e-01,
       -3.27227539e-01,  5.00461518e-02,  5.00429785e-02, -3.85214084e-07,
        5.92637053e-07,  1.12208536e-13,  5.67195532e-03, -1.29907255e-02,
       -8.53272770e-03,  1.85980262e-07, -2.86123480e-07,  3.73585814e-18,
       -5.67196244e-03,  1.29907208e-02,  8.53278638e-03, -5.63048154e-07,
        4.91982469e-07])}
State {'achieved_goal': array([1.21502452, 0.75337462, 0.42478443]), 'desired_goal': array([1.21502452, 0.75337462, 0.42478443]), 'observation': array([ 1.44015142e+00,  1.01426760e+00,  7.63495114e-01,  1.21502452e+00,
        7.53374624e-01,  4.24784428e-01, -2.25126900e-01, -2.60892973e-01,
       -3.38710686e-01,  5.00458048e-02,  5.00433030e-02, -3.85214084e-07,
        5

State {'achieved_goal': array([1.21502442, 0.75337456, 0.42478449]), 'desired_goal': array([1.21502442, 0.75337456, 0.42478449]), 'observation': array([ 1.36527381e+00,  1.08860580e+00,  8.00768228e-01,  1.21502442e+00,
        7.53374559e-01,  4.24784489e-01, -1.50249395e-01, -3.35231244e-01,
       -3.75983739e-01,  5.01547736e-02,  5.02471403e-02, -3.85214084e-07,
        5.92637053e-07,  1.12208536e-13,  2.94532383e-03, -1.03753831e-02,
       -7.97197585e-03,  1.85976797e-07, -2.86118149e-07, -2.18439291e-19,
       -2.94533095e-03,  1.03753784e-02,  7.97197587e-03, -2.94174379e-04,
       -5.96270398e-04])}
State {'achieved_goal': array([1.21502441, 0.75337455, 0.42478449]), 'desired_goal': array([1.21502441, 0.75337455, 0.42478449]), 'observation': array([ 1.36204743e+00,  1.10198194e+00,  8.09526648e-01,  1.21502441e+00,
        7.53374554e-01,  4.24784489e-01, -1.47023022e-01, -3.48607383e-01,
       -3.84742159e-01,  5.00568559e-02,  5.00523409e-02, -3.85214084e-07,
        5

State {'achieved_goal': array([1.21502431, 0.75337449, 0.42478449]), 'desired_goal': array([1.21502431, 0.75337449, 0.42478449]), 'observation': array([ 1.29383272e+00,  1.19125251e+00,  8.46686610e-01,  1.21502431e+00,
        7.53374489e-01,  4.24784489e-01, -7.88084033e-02, -4.37878024e-01,
       -4.21902120e-01,  5.00463086e-02,  5.00428582e-02, -3.85214084e-07,
        5.92637053e-07,  1.12208536e-13,  2.14661707e-03, -3.21945741e-03,
       -1.26073453e-03,  1.85976796e-07, -2.86118148e-07,  6.82861794e-19,
       -2.14662420e-03,  3.21945278e-03,  1.26073453e-03, -1.54537067e-06,
        1.36591169e-06])}
State {'achieved_goal': array([1.21502431, 0.75337448, 0.42478449]), 'desired_goal': array([1.21502431, 0.75337448, 0.42478449]), 'observation': array([ 1.29069268e+00,  1.19564829e+00,  8.48292108e-01,  1.21502431e+00,
        7.53374485e-01,  4.24784489e-01, -7.56683779e-02, -4.42273810e-01,
       -4.23507619e-01,  5.00455401e-02,  5.00435700e-02, -3.85214084e-07,
        5

State {'achieved_goal': array([1.24359203, 0.6729555 , 0.42478277]), 'desired_goal': array([1.24359203, 0.6729555 , 0.42478277]), 'observation': array([ 1.46357633e+00,  9.09137504e-01,  6.88308067e-01,  1.24359203e+00,
        6.72955504e-01,  4.24782768e-01, -2.19984300e-01, -2.36182000e-01,
       -2.63525299e-01,  5.00019091e-02,  4.95144469e-02, -3.85214084e-07,
        5.92637053e-07,  1.12208536e-13,  6.77141188e-04,  2.17101291e-03,
        2.52334568e-03,  1.86032551e-07, -2.86203925e-07, -3.47536162e-18,
       -6.77148312e-04, -2.17101754e-03, -2.52240256e-03, -2.71581508e-05,
       -4.61368974e-04])}
State {'achieved_goal': array([1.24359202, 0.6729555 , 0.4247835 ]), 'desired_goal': array([1.24359202, 0.6729555 , 0.4247835 ]), 'observation': array([ 1.46182623e+00,  9.08427683e-01,  6.87256502e-01,  1.24359202e+00,
        6.72955499e-01,  4.24783501e-01, -2.18234201e-01, -2.35472184e-01,
       -2.62473000e-01,  4.99305953e-02,  4.92132070e-02, -3.85214084e-07,
        5

State {'achieved_goal': array([1.24359193, 0.67295543, 0.42478449]), 'desired_goal': array([1.24359193, 0.67295543, 0.42478449]), 'observation': array([ 1.39300171e+00,  1.08579121e+00,  8.03700669e-01,  1.24359193e+00,
        6.72955434e-01,  4.24784489e-01, -1.49409783e-01, -4.12835780e-01,
       -3.78916180e-01,  5.00451204e-02,  5.00439859e-02, -3.85214084e-07,
        5.92637053e-07,  1.12208536e-13,  6.04452575e-03, -8.09033681e-03,
       -4.00426281e-03,  1.85976810e-07, -2.86118168e-07,  1.33299946e-18,
       -6.04453288e-03,  8.09033218e-03,  4.00426304e-03, -1.93529416e-07,
        1.93488941e-07])}
State {'achieved_goal': array([1.24359192, 0.67295543, 0.42478449]), 'desired_goal': array([1.24359192, 0.67295543, 0.42478449]), 'observation': array([ 1.38441965e+00,  1.09704061e+00,  8.09173352e-01,  1.24359192e+00,
        6.72955430e-01,  4.24784489e-01, -1.40827730e-01, -4.24085182e-01,
       -3.84388863e-01,  5.00450253e-02,  5.00440810e-02, -3.85214084e-07,
        5

State {'achieved_goal': array([1.24359182, 0.67295536, 0.42478449]), 'desired_goal': array([1.24359182, 0.67295536, 0.42478449]), 'observation': array([ 1.30560840e+00,  1.17600013e+00,  8.40890972e-01,  1.24359182e+00,
        6.72955365e-01,  4.24784489e-01, -6.20165772e-02, -5.03044764e-01,
       -4.16106483e-01,  5.00483484e-02,  5.00417318e-02, -3.85214084e-07,
        5.92637053e-07,  1.12208536e-13,  2.22441189e-03, -4.23648926e-03,
       -2.01998034e-03,  1.85976796e-07, -2.86118148e-07,  1.52778139e-19,
       -2.22441901e-03,  4.23648463e-03,  2.01998034e-03, -4.12280910e-06,
        1.23347050e-06])}
State {'achieved_goal': array([1.24359181, 0.67295536, 0.42478449]), 'desired_goal': array([1.24359181, 0.67295536, 0.42478449]), 'observation': array([ 1.30225368e+00,  1.18173063e+00,  8.43418917e-01,  1.24359181e+00,
        6.72955360e-01,  4.24784489e-01, -5.86618723e-02, -5.08775270e-01,
       -4.18634428e-01,  5.00464768e-02,  5.00426908e-02, -3.85214084e-07,
        5

State {'achieved_goal': array([1.34659549, 0.88260943, 0.42475675]), 'desired_goal': array([1.34659549, 0.88260943, 0.42475675]), 'observation': array([ 1.40038018e+00,  8.12697465e-01,  5.97742402e-01,  1.34659549e+00,
        8.82609426e-01,  4.24756751e-01, -5.37846838e-02,  6.99119609e-02,
       -1.72985651e-01,  5.14205340e-02,  5.17228523e-02, -3.85214084e-07,
        5.92637053e-07,  1.12208536e-13, -2.24004811e-02, -2.31362458e-02,
       -2.31249524e-02,  1.86889059e-07, -2.87521630e-07,  1.92245850e-18,
        2.24004739e-02,  2.31362411e-02,  2.31401884e-02, -3.91441617e-03,
       -4.80798337e-03])}
State {'achieved_goal': array([1.34659549, 0.88260942, 0.42476859]), 'desired_goal': array([1.34659549, 0.88260942, 0.42476859]), 'observation': array([ 1.40426569e+00,  8.16460682e-01,  6.01567386e-01,  1.34659549e+00,
        8.82609421e-01,  4.24768591e-01, -5.76701992e-02,  6.61487389e-02,
       -1.76798795e-01,  4.90719454e-02,  4.74195657e-02, -3.85214084e-07,
        5

State {'achieved_goal': array([1.34659539, 0.88260936, 0.42478448]), 'desired_goal': array([1.34659539, 0.88260936, 0.42478448]), 'observation': array([ 1.43019930e+00,  1.01187884e+00,  7.60368379e-01,  1.34659539e+00,
        8.82609356e-01,  4.24784482e-01, -8.36039144e-02, -1.29269487e-01,
       -3.35583897e-01,  5.00508160e-02,  5.00623465e-02, -3.85214084e-07,
        5.92637053e-07,  1.12208536e-13,  3.69443465e-03, -1.26718024e-02,
       -8.58527417e-03,  1.85977012e-07, -2.86118480e-07,  3.46009781e-18,
       -3.69444177e-03,  1.26717978e-02,  8.58527782e-03, -9.42649403e-06,
       -6.19008389e-05])}
State {'achieved_goal': array([1.34659538, 0.88260935, 0.42478449]), 'desired_goal': array([1.34659538, 0.88260935, 0.42478449]), 'observation': array([ 1.42381250e+00,  1.02942428e+00,  7.71895248e-01,  1.34659538e+00,
        8.82609351e-01,  4.24784485e-01, -7.72171243e-02, -1.46814926e-01,
       -3.47110762e-01,  5.00473078e-02,  5.00433121e-02, -3.85214084e-07,
        5

State {'achieved_goal': array([1.34659528, 0.88260929, 0.42478449]), 'desired_goal': array([1.34659528, 0.88260929, 0.42478449]), 'observation': array([ 1.33280692e+00,  1.14724394e+00,  8.30782438e-01,  1.34659528e+00,
        8.82609287e-01,  4.24784489e-01,  1.37883624e-02, -2.64634654e-01,
       -4.05997949e-01,  5.00478252e-02,  5.00419038e-02, -3.85214084e-07,
        5.92637053e-07,  1.12208536e-13,  3.23301210e-03, -5.59559629e-03,
       -2.70840707e-03,  1.85976796e-07, -2.86118148e-07, -1.32213874e-18,
       -3.23301923e-03,  5.59559166e-03,  2.70840707e-03, -2.84702492e-06,
        9.99489208e-07])}
State {'achieved_goal': array([1.34659527, 0.88260928, 0.42478449]), 'desired_goal': array([1.34659527, 0.88260928, 0.42478449]), 'observation': array([ 1.32801298e+00,  1.15487287e+00,  8.34257895e-01,  1.34659527e+00,
        8.82609282e-01,  4.24784489e-01,  1.85822952e-02, -2.72263589e-01,
       -4.09473406e-01,  5.00464026e-02,  5.00427429e-02, -3.85214084e-07,
        5

State {'achieved_goal': array([1.34659518, 0.88260922, 0.42478449]), 'desired_goal': array([1.34659518, 0.88260922, 0.42478449]), 'observation': array([ 1.27989635e+00,  1.20542439e+00,  8.51513567e-01,  1.34659518e+00,
        8.82609222e-01,  4.24784489e-01,  6.66988343e-02, -3.22815166e-01,
       -4.26729077e-01,  5.00456008e-02,  5.00435093e-02, -3.85214084e-07,
        5.92637053e-07,  1.12208536e-13,  1.66203584e-03, -2.49114489e-03,
       -8.81534005e-04,  1.85976796e-07, -2.86118148e-07, -2.58666357e-18,
       -1.66204296e-03,  2.49114026e-03,  8.81534005e-04, -9.69782570e-07,
        9.58431135e-07])}
State {'achieved_goal': array([1.34659517, 0.88260922, 0.42478449]), 'desired_goal': array([1.34659517, 0.88260922, 0.42478449]), 'observation': array([ 1.27746604e+00,  1.20882004e+00,  8.52623507e-01,  1.34659517e+00,
        8.82609217e-01,  4.24784489e-01,  6.91291369e-02, -3.26210821e-01,
       -4.27839018e-01,  5.00451678e-02,  5.00439387e-02, -3.85214084e-07,
        5

State {'achieved_goal': array([1.41996357, 0.83171957, 0.4247843 ]), 'desired_goal': array([1.41996357, 0.83171957, 0.4247843 ]), 'observation': array([ 1.44078353e+00,  1.01426639e+00,  7.63640518e-01,  1.41996357e+00,
        8.31719569e-01,  4.24784302e-01, -2.08199546e-02, -1.82546825e-01,
       -3.38856215e-01,  5.00456973e-02,  5.00434093e-02, -3.85214084e-07,
        5.92637053e-07,  1.12208536e-13,  6.28167071e-03, -1.19139606e-02,
       -7.36038033e-03,  1.85982836e-07, -2.86127440e-07,  1.89472169e-19,
       -6.28167783e-03,  1.19139560e-02,  7.36048258e-03, -3.88781227e-07,
        3.88019364e-07])}
State {'achieved_goal': array([1.41996357, 0.83171956, 0.42478438]), 'desired_goal': array([1.41996357, 0.83171956, 0.42478438]), 'observation': array([ 1.43155270e+00,  1.03077274e+00,  7.73621868e-01,  1.41996357e+00,
        8.31719564e-01,  4.24784382e-01, -1.15891334e-02, -1.99053180e-01,
       -3.48837486e-01,  5.00454821e-02,  5.00436243e-02, -3.85214084e-07,
        5

State {'achieved_goal': array([1.41996347, 0.8317195 , 0.42478449]), 'desired_goal': array([1.41996347, 0.8317195 , 0.42478449]), 'observation': array([ 1.33119019e+00,  1.14437733e+00,  8.27677084e-01,  1.41996347e+00,
        8.31719499e-01,  4.24784489e-01,  8.87732790e-02, -3.12657832e-01,
       -4.02892595e-01,  5.00524260e-02,  5.00466504e-02, -3.85214084e-07,
        5.92637053e-07,  1.12208536e-13,  2.38082242e-03, -6.10067901e-03,
       -3.32535791e-03,  1.85976798e-07, -2.86118150e-07,  2.35715776e-18,
       -2.38082954e-03,  6.10067438e-03,  3.32535793e-03, -1.07248426e-05,
       -1.88409539e-05])}
State {'achieved_goal': array([1.41996346, 0.83171949, 0.42478449]), 'desired_goal': array([1.41996346, 0.83171949, 0.42478449]), 'observation': array([ 1.32723311e+00,  1.15266370e+00,  8.32015864e-01,  1.41996346e+00,
        8.31719495e-01,  4.24784489e-01,  9.27303472e-02, -3.20944203e-01,
       -4.07231375e-01,  5.00479983e-02,  5.00417353e-02, -3.85214084e-07,
        5

State {'achieved_goal': array([1.41996336, 0.83171943, 0.42478449]), 'desired_goal': array([1.41996336, 0.83171943, 0.42478449]), 'observation': array([ 1.28259259e+00,  1.20132711e+00,  8.50689889e-01,  1.41996336e+00,
        8.31719430e-01,  4.24784489e-01,  1.37370775e-01, -3.69607683e-01,
       -4.25905400e-01,  5.00455446e-02,  5.00435620e-02, -3.85214084e-07,
        5.92637053e-07,  1.12208536e-13,  1.67153846e-03, -2.76347616e-03,
       -1.02984764e-03,  1.85976796e-07, -2.86118148e-07, -5.64297488e-19,
       -1.67154559e-03,  2.76347153e-03,  1.02984764e-03, -9.35836005e-07,
        9.34844754e-07])}
State {'achieved_goal': array([1.41996335, 0.83171943, 0.42478449]), 'desired_goal': array([1.41996335, 0.83171943, 0.42478449]), 'observation': array([ 1.27001575e+00,  1.19176345e+00,  8.42802165e-01,  1.41996335e+00,
        8.31719425e-01,  4.24784489e-01,  1.49947608e-01, -3.60044028e-01,
       -4.18017675e-01,  4.97736238e-02,  4.97206405e-02, -3.85214084e-07,
        5

State {'achieved_goal': array([1.20270869, 0.88246571, 0.42478149]), 'desired_goal': array([1.20270869, 0.88246571, 0.42478149]), 'observation': array([ 1.46832755e+00,  9.32750999e-01,  7.07434129e-01,  1.20270869e+00,
        8.82465709e-01,  4.24781489e-01, -2.65618860e-01, -5.02852896e-02,
       -2.82652640e-01,  5.00482879e-02,  5.00408672e-02, -3.85214084e-07,
        5.92637053e-07,  1.12208536e-13,  1.77539195e-03, -1.67379564e-02,
       -1.27377041e-02,  1.86074041e-07, -2.86267755e-07, -2.56293013e-18,
       -1.77539908e-03,  1.67379517e-02,  1.27393480e-02, -2.86398628e-06,
        2.71949883e-06])}


KeyboardInterrupt: 