In [1]:
# import module
import random
import numpy as np
from itertools import count
from collections import deque

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import gym

# make game
env = gym.make('CartPole-v1')

# seed the experiment
env.seed(9)
torch.manual_seed(9)



<torch._C.Generator at 0x10f4b45f0>

In [2]:
# define util function
def to_torch_tensor(np_arr):
    return torch.from_numpy(np_arr).float()

In [3]:
# define our policy
class Policy(nn.Module):
    def __init__(self, observation_space, action_space):
        super(Policy, self).__init__()
        self.observation_space = observation_space
        self.action_space = action_space
        self.fc1 = nn.Linear(self.observation_space, 24)
        self.fc2 = nn.Linear(24, 24)
        self.fc3 = nn.Linear(24, self.action_space)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
# define our agent
class Agent:
    def __init__(self, policy):
        MEMORY_SIZE = 1000000
        GAMMA = 0.95
        BATCH_SIZE = 20
        EXPLORATION_MAX = 1.0
        EXPLORATION_MIN = 0.01
        EXPLORATION_DECAY = 0.995

        self.policy = policy
        self.optimizer = optim.Adam(self.policy.parameters(), lr=1e-2)
        self.loss_fn = nn.MSELoss(reduction='mean')
        self.memory = deque(maxlen=MEMORY_SIZE)
        self.gamma = GAMMA
        self.batch_size = BATCH_SIZE
        self.exploration_rate = EXPLORATION_MAX
        self.exploration_min = EXPLORATION_MIN
        self.exploration_decay = EXPLORATION_DECAY
    
    def select_action(self, state):
        if np.random.rand() < self.exploration_rate:
            return random.randrange(self.policy.action_space)
        q_values = self.policy(to_torch_tensor(state))
        return int(q_values.max(0)[-1])

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def experience_replay(self):
        if len(self.memory) < self.batch_size:
            return
        print("[ Experience replay ] starts")
        batch = random.sample(self.memory, self.batch_size)
        for state, action, reward, state_next, done in batch:
            if not done:
                # Q function (bellman eqution): q value = reward at current step + gamma * q value of next step by taking an optimal action
                q_value_to_update = (reward + self.gamma * torch.max(self.policy(to_torch_tensor(state_next))))
                # remove this tensor from the autograph
                q_value_to_update = q_value_to_update.clone().detach()
            else:
                q_value_to_update = reward
            q_values_hat = self.policy(to_torch_tensor(state))
            # generate target
            q_values_target = q_values_hat.clone().detach()
            q_values_target[action] = q_value_to_update
            # train policy            
            policy_loss = self.loss_fn(q_values_target, q_values_hat)
            policy_loss.backward()    
            self.optimizer.step()
            self.optimizer.zero_grad()

        # the more policy gets replayed, the less the agent explores
        self.exploration_rate *= self.exploration_decay
        self.exploration_rate = max(self.exploration_min, self.exploration_rate)

In [4]:
# create policy
observation_space = env.observation_space.shape[0]
action_space = env.action_space.n
policy = Policy(observation_space, action_space)

# create agent
agent = Agent(policy)

# play game
for i_episode in count(1):
    state = env.reset()
    print("[ episode {} ] state={}".format(i_episode, state))
    for t in range(1, 10000):
        action = agent.select_action(state)
        state_next, reward, done, _ = env.step(action)
        if done:
            reward *= -1
        agent.remember(state, action, reward, state_next, done)
        print("[ episode {} ][ timestamp {} ] state={}, action={}, reward={}, next_state={}".format(i_episode, t, state, action, reward, state_next))
        state = state_next
        agent.experience_replay()
        if done:
            break
    print("[ Ended! ] Episode {}: Exploration_rate={}. Score={}.".format(i_episode, agent.exploration_rate, t))

    # end game criteria
    if t > env.spec.reward_threshold:
        print("[ Solved! ] Score is now {}".format(t))
        break
    elif i_episode > 500:
        print("[ Failed! ] took more than 500 episodes to converge")
        break


[ episode 1 ] state=[-0.00551277  0.02101743  0.00884103  0.02545213]
[ episode 1 ][ timestamp 1 ] state=[-0.00551277  0.02101743  0.00884103  0.02545213], action=1, reward=1.0, next_state=[-0.00509242  0.21601149  0.00935007 -0.26442829]
[ episode 1 ][ timestamp 2 ] state=[-0.00509242  0.21601149  0.00935007 -0.26442829], action=0, reward=1.0, next_state=[-0.00077219  0.02075734  0.0040615   0.03118901]
[ episode 1 ][ timestamp 3 ] state=[-0.00077219  0.02075734  0.0040615   0.03118901], action=1, reward=1.0, next_state=[-0.00035704  0.21582081  0.00468528 -0.2602097 ]
[ episode 1 ][ timestamp 4 ] state=[-0.00035704  0.21582081  0.00468528 -0.2602097 ], action=1, reward=1.0, next_state=[ 3.95937136e-03  4.10875563e-01 -5.18910048e-04 -5.51411155e-01]
[ episode 1 ][ timestamp 5 ] state=[ 3.95937136e-03  4.10875563e-01 -5.18910048e-04 -5.51411155e-01], action=0, reward=1.0, next_state=[ 0.01217688  0.2157609  -0.01154713 -0.25889176]
[ episode 1 ][ timestamp 6 ] state=[ 0.01217688  0.21

[ episode 2 ][ timestamp 16 ] state=[-0.05424039 -0.20834032  0.11663412  0.4558664 ], action=1, reward=1.0, next_state=[-0.05840719 -0.01504389  0.12575145  0.20210438]
[ Experience replay ] starts
[ episode 2 ][ timestamp 17 ] state=[-0.05840719 -0.01504389  0.12575145  0.20210438], action=0, reward=1.0, next_state=[-0.05870807 -0.21171898  0.12979354  0.53166107]
[ Experience replay ] starts
[ episode 2 ][ timestamp 18 ] state=[-0.05870807 -0.21171898  0.12979354  0.53166107], action=1, reward=1.0, next_state=[-0.06294245 -0.01863859  0.14042676  0.28252885]
[ Experience replay ] starts
[ episode 2 ][ timestamp 19 ] state=[-0.06294245 -0.01863859  0.14042676  0.28252885], action=0, reward=1.0, next_state=[-0.06331522 -0.21545507  0.14607733  0.61599739]
[ Experience replay ] starts
[ episode 2 ][ timestamp 20 ] state=[-0.06331522 -0.21545507  0.14607733  0.61599739], action=0, reward=1.0, next_state=[-0.06762432 -0.41228329  0.15839728  0.95088944]
[ Experience replay ] starts
[ epi

[ episode 5 ][ timestamp 8 ] state=[-0.09737183 -0.97308283  0.06434231  1.46634731], action=0, reward=1.0, next_state=[-0.11683348 -1.16893088  0.09366926  1.77841508]
[ Experience replay ] starts
[ episode 5 ][ timestamp 9 ] state=[-0.11683348 -1.16893088  0.09366926  1.77841508], action=1, reward=1.0, next_state=[-0.1402121  -0.97497999  0.12923756  1.5162626 ]
[ Experience replay ] starts
[ episode 5 ][ timestamp 10 ] state=[-0.1402121  -0.97497999  0.12923756  1.5162626 ], action=1, reward=1.0, next_state=[-0.1597117  -0.78163683  0.15956281  1.26655661]
[ Experience replay ] starts
[ episode 5 ][ timestamp 11 ] state=[-0.1597117  -0.78163683  0.15956281  1.26655661], action=1, reward=1.0, next_state=[-0.17534444 -0.588872    0.18489395  1.02779512]
[ Experience replay ] starts
[ episode 5 ][ timestamp 12 ] state=[-0.17534444 -0.588872    0.18489395  1.02779512], action=1, reward=1.0, next_state=[-0.18712188 -0.39662764  0.20544985  0.7983932 ]
[ Experience replay ] starts
[ episo

[ episode 7 ][ timestamp 14 ] state=[ 0.08646171  1.35025577 -0.175557   -2.2066328 ], action=1, reward=-1.0, next_state=[ 0.11346683  1.54657615 -0.21968965 -2.54793607]
[ Experience replay ] starts
[ Ended! ] Episode 7: Exploration_rate=0.5647174463480732. Score=14.
[ episode 8 ] state=[-0.00883346  0.00371395 -0.00354461  0.02637272]
[ episode 8 ][ timestamp 1 ] state=[-0.00883346  0.00371395 -0.00354461  0.02637272], action=0, reward=1.0, next_state=[-0.00875918 -0.19135699 -0.00301715  0.31793518]
[ Experience replay ] starts
[ episode 8 ][ timestamp 2 ] state=[-0.00875918 -0.19135699 -0.00301715  0.31793518], action=1, reward=1.0, next_state=[-0.01258632  0.0038078   0.00334155  0.02430228]
[ Experience replay ] starts
[ episode 8 ][ timestamp 3 ] state=[-0.01258632  0.0038078   0.00334155  0.02430228], action=1, reward=1.0, next_state=[-0.01251016  0.19888167  0.0038276  -0.26732448]
[ Experience replay ] starts
[ episode 8 ][ timestamp 4 ] state=[-0.01251016  0.19888167  0.0038

[ episode 11 ][ timestamp 4 ] state=[-0.02880037  0.19963371  0.0273355  -0.24657131], action=1, reward=1.0, next_state=[-0.02480769  0.3943548   0.02240407 -0.53050819]
[ Experience replay ] starts
[ episode 11 ][ timestamp 5 ] state=[-0.02480769  0.3943548   0.02240407 -0.53050819], action=1, reward=1.0, next_state=[-0.0169206   0.58915455  0.01179391 -0.81604824]
[ Experience replay ] starts
[ episode 11 ][ timestamp 6 ] state=[-0.0169206   0.58915455  0.01179391 -0.81604824], action=1, reward=1.0, next_state=[-0.00513751  0.78411306 -0.00452706 -1.10499833]
[ Experience replay ] starts
[ episode 11 ][ timestamp 7 ] state=[-0.00513751  0.78411306 -0.00452706 -1.10499833], action=1, reward=1.0, next_state=[ 0.01054476  0.97929425 -0.02662703 -1.39909807]
[ Experience replay ] starts
[ episode 11 ][ timestamp 8 ] state=[ 0.01054476  0.97929425 -0.02662703 -1.39909807], action=1, reward=1.0, next_state=[ 0.03013064  1.17473691 -0.05460899 -1.69998556]
[ Experience replay ] starts
[ epi

[ episode 13 ][ timestamp 8 ] state=[ 0.06108322  1.01477985 -0.0631331  -1.51278198], action=1, reward=1.0, next_state=[ 0.08137881  1.21060692 -0.09338874 -1.82448619]
[ Experience replay ] starts
[ episode 13 ][ timestamp 9 ] state=[ 0.08137881  1.21060692 -0.09338874 -1.82448619], action=1, reward=1.0, next_state=[ 0.10559095  1.40663285 -0.12987846 -2.14466019]
[ Experience replay ] starts
[ episode 13 ][ timestamp 10 ] state=[ 0.10559095  1.40663285 -0.12987846 -2.14466019], action=1, reward=1.0, next_state=[ 0.13372361  1.60277467 -0.17277166 -2.47447195]
[ Experience replay ] starts
[ episode 13 ][ timestamp 11 ] state=[ 0.13372361  1.60277467 -0.17277166 -2.47447195], action=1, reward=-1.0, next_state=[ 0.1657791   1.79887438 -0.2222611  -2.81478478]
[ Experience replay ] starts
[ Ended! ] Episode 13: Exploration_rate=0.3614809303671764. Score=11.
[ episode 14 ] state=[-0.0375153  -0.00623601 -0.00406604 -0.04050475]
[ episode 14 ][ timestamp 1 ] state=[-0.0375153  -0.00623601

[ episode 16 ][ timestamp 6 ] state=[-0.01707775 -0.20810121  0.07290893  0.36293859], action=0, reward=1.0, next_state=[-0.02123977 -0.40417959  0.08016771  0.67769102]
[ Experience replay ] starts
[ episode 16 ][ timestamp 7 ] state=[-0.02123977 -0.40417959  0.08016771  0.67769102], action=1, reward=1.0, next_state=[-0.02932336 -0.21025755  0.09372153  0.41128628]
[ Experience replay ] starts
[ episode 16 ][ timestamp 8 ] state=[-0.02932336 -0.21025755  0.09372153  0.41128628], action=0, reward=1.0, next_state=[-0.03352851 -0.4065745   0.10194725  0.73198316]
[ Experience replay ] starts
[ episode 16 ][ timestamp 9 ] state=[-0.03352851 -0.4065745   0.10194725  0.73198316], action=1, reward=1.0, next_state=[-0.04166    -0.21299802  0.11658692  0.47304664]
[ Experience replay ] starts
[ episode 16 ][ timestamp 10 ] state=[-0.04166    -0.21299802  0.11658692  0.47304664], action=1, reward=1.0, next_state=[-0.04591996 -0.01969897  0.12604785  0.21926535]
[ Experience replay ] starts
[ ep

[ episode 17 ][ timestamp 27 ] state=[0.01087667 0.37406437 0.16063859 0.11183462], action=0, reward=1.0, next_state=[0.01835796 0.17704882 0.16287529 0.45057808]
[ Experience replay ] starts
[ episode 17 ][ timestamp 28 ] state=[0.01835796 0.17704882 0.16287529 0.45057808], action=1, reward=1.0, next_state=[0.02189894 0.36953776 0.17188685 0.21333991]
[ Experience replay ] starts
[ episode 17 ][ timestamp 29 ] state=[0.02189894 0.36953776 0.17188685 0.21333991], action=0, reward=1.0, next_state=[0.02928969 0.1724285  0.17615364 0.55493309]
[ Experience replay ] starts
[ episode 17 ][ timestamp 30 ] state=[0.02928969 0.1724285  0.17615364 0.55493309], action=1, reward=1.0, next_state=[0.03273826 0.36469669 0.18725231 0.32251558]
[ Experience replay ] starts
[ episode 17 ][ timestamp 31 ] state=[0.03273826 0.36469669 0.18725231 0.32251558], action=1, reward=1.0, next_state=[0.0400322  0.55672745 0.19370262 0.09423566]
[ Experience replay ] starts
[ episode 17 ][ timestamp 32 ] state=[0.

[ episode 18 ][ timestamp 37 ] state=[-0.01921189  0.05367206 -0.10688999 -0.52457313], action=0, reward=1.0, next_state=[-0.01813845 -0.1397961  -0.11738145 -0.26739301]
[ Experience replay ] starts
[ episode 18 ][ timestamp 38 ] state=[-0.01813845 -0.1397961  -0.11738145 -0.26739301], action=0, reward=1.0, next_state=[-0.02093438 -0.33306426 -0.12272931 -0.01391662]
[ Experience replay ] starts
[ episode 18 ][ timestamp 39 ] state=[-0.02093438 -0.33306426 -0.12272931 -0.01391662], action=1, reward=1.0, next_state=[-0.02759566 -0.13641548 -0.12300764 -0.34266296]
[ Experience replay ] starts
[ episode 18 ][ timestamp 40 ] state=[-0.02759566 -0.13641548 -0.12300764 -0.34266296], action=1, reward=1.0, next_state=[-0.03032397  0.06022217 -0.1298609  -0.6714639 ]
[ Experience replay ] starts
[ episode 18 ][ timestamp 41 ] state=[-0.03032397  0.06022217 -0.1298609  -0.6714639 ], action=0, reward=1.0, next_state=[-0.02911953 -0.13287827 -0.14329018 -0.422324  ]
[ Experience replay ] starts


[ episode 19 ][ timestamp 35 ] state=[ 0.101152    0.02722452 -0.03318936 -0.03223255], action=1, reward=1.0, next_state=[ 0.10169649  0.22280631 -0.03383401 -0.33519955]
[ Experience replay ] starts
[ episode 19 ][ timestamp 36 ] state=[ 0.10169649  0.22280631 -0.03383401 -0.33519955], action=0, reward=1.0, next_state=[ 0.10615262  0.02818181 -0.040538   -0.05337517]
[ Experience replay ] starts
[ episode 19 ][ timestamp 37 ] state=[ 0.10615262  0.02818181 -0.040538   -0.05337517], action=1, reward=1.0, next_state=[ 0.10671626  0.22386087 -0.0416055  -0.35856753]
[ Experience replay ] starts
[ episode 19 ][ timestamp 38 ] state=[ 0.10671626  0.22386087 -0.0416055  -0.35856753], action=0, reward=1.0, next_state=[ 0.11119347  0.02935431 -0.04877685 -0.07928868]
[ Experience replay ] starts
[ episode 19 ][ timestamp 39 ] state=[ 0.11119347  0.02935431 -0.04877685 -0.07928868], action=1, reward=1.0, next_state=[ 0.11178056  0.22514033 -0.05036263 -0.38695313]
[ Experience replay ] starts


[ episode 20 ][ timestamp 23 ] state=[ 0.06541238  0.42082395 -0.11748807 -0.81946053], action=0, reward=1.0, next_state=[ 0.07382886  0.22748907 -0.13387728 -0.5659195 ]
[ Experience replay ] starts
[ episode 20 ][ timestamp 24 ] state=[ 0.07382886  0.22748907 -0.13387728 -0.5659195 ], action=0, reward=1.0, next_state=[ 0.07837865  0.03447417 -0.14519567 -0.3182303 ]
[ Experience replay ] starts
[ episode 20 ][ timestamp 25 ] state=[ 0.07837865  0.03447417 -0.14519567 -0.3182303 ], action=0, reward=1.0, next_state=[ 0.07906813 -0.1583136  -0.15156028 -0.07462923]
[ Experience replay ] starts
[ episode 20 ][ timestamp 26 ] state=[ 0.07906813 -0.1583136  -0.15156028 -0.07462923], action=1, reward=1.0, next_state=[ 0.07590186  0.03861953 -0.15305286 -0.41103098]
[ Experience replay ] starts
[ episode 20 ][ timestamp 27 ] state=[ 0.07590186  0.03861953 -0.15305286 -0.41103098], action=0, reward=1.0, next_state=[ 0.07667425 -0.15403902 -0.16127348 -0.17024343]
[ Experience replay ] starts


[ episode 21 ][ timestamp 34 ] state=[ 0.02957422  0.21415227 -0.12153611 -0.72928269], action=0, reward=1.0, next_state=[ 0.03385726  0.02090114 -0.13612176 -0.47718797]
[ Experience replay ] starts
[ episode 21 ][ timestamp 35 ] state=[ 0.03385726  0.02090114 -0.13612176 -0.47718797], action=0, reward=1.0, next_state=[ 0.03427529 -0.17206279 -0.14566552 -0.23031585]
[ Experience replay ] starts
[ episode 21 ][ timestamp 36 ] state=[ 0.03427529 -0.17206279 -0.14566552 -0.23031585], action=1, reward=1.0, next_state=[ 0.03083403  0.02480778 -0.15027184 -0.56516865]
[ Experience replay ] starts
[ episode 21 ][ timestamp 37 ] state=[ 0.03083403  0.02480778 -0.15027184 -0.56516865], action=0, reward=1.0, next_state=[ 0.03133019 -0.16792194 -0.16157521 -0.32334587]
[ Experience replay ] starts
[ episode 21 ][ timestamp 38 ] state=[ 0.03133019 -0.16792194 -0.16157521 -0.32334587], action=1, reward=1.0, next_state=[ 0.02797175  0.02908763 -0.16804213 -0.66230788]
[ Experience replay ] starts


[ episode 22 ][ timestamp 36 ] state=[ 0.1014452   0.1974391  -0.12525462 -0.55022569], action=0, reward=1.0, next_state=[ 0.10539398  0.00427843 -0.13625914 -0.2994832 ]
[ Experience replay ] starts
[ episode 22 ][ timestamp 37 ] state=[ 0.10539398  0.00427843 -0.13625914 -0.2994832 ], action=1, reward=1.0, next_state=[ 0.10547955  0.20105288 -0.1422488  -0.63184538]
[ Experience replay ] starts
[ episode 22 ][ timestamp 38 ] state=[ 0.10547955  0.20105288 -0.1422488  -0.63184538], action=1, reward=1.0, next_state=[ 0.10950061  0.39784286 -0.15488571 -0.96572914]
[ Experience replay ] starts
[ episode 22 ][ timestamp 39 ] state=[ 0.10950061  0.39784286 -0.15488571 -0.96572914], action=0, reward=1.0, next_state=[ 0.11745747  0.2051021  -0.17420029 -0.72543345]
[ Experience replay ] starts
[ episode 22 ][ timestamp 40 ] state=[ 0.11745747  0.2051021  -0.17420029 -0.72543345], action=1, reward=1.0, next_state=[ 0.12155951  0.40215    -0.18870896 -1.0674882 ]
[ Experience replay ] starts


[ episode 24 ][ timestamp 9 ] state=[ 0.02585754  0.36760871  0.01315024 -0.4632921 ], action=0, reward=1.0, next_state=[ 0.03320971  0.17230341  0.00388439 -0.16649342]
[ Experience replay ] starts
[ episode 24 ][ timestamp 10 ] state=[ 0.03320971  0.17230341  0.00388439 -0.16649342], action=1, reward=1.0, next_state=[ 0.03665578  0.36736954  0.00055452 -0.4579484 ]
[ Experience replay ] starts
[ episode 24 ][ timestamp 11 ] state=[ 0.03665578  0.36736954  0.00055452 -0.4579484 ], action=0, reward=1.0, next_state=[ 0.04400317  0.17223976 -0.00860444 -0.16509074]
[ Experience replay ] starts
[ episode 24 ][ timestamp 12 ] state=[ 0.04400317  0.17223976 -0.00860444 -0.16509074], action=1, reward=1.0, next_state=[ 0.04744797  0.36748382 -0.01190626 -0.46047566]
[ Experience replay ] starts
[ episode 24 ][ timestamp 13 ] state=[ 0.04744797  0.36748382 -0.01190626 -0.46047566], action=0, reward=1.0, next_state=[ 0.05479764  0.17253216 -0.02111577 -0.17156926]
[ Experience replay ] starts
[

[ episode 25 ][ timestamp 17 ] state=[ 0.0880417  -0.03998907 -0.06742928 -0.11035075], action=1, reward=1.0, next_state=[ 0.08724192  0.15603107 -0.0696363  -0.42352197]
[ Experience replay ] starts
[ episode 25 ][ timestamp 18 ] state=[ 0.08724192  0.15603107 -0.0696363  -0.42352197], action=0, reward=1.0, next_state=[ 0.09036254 -0.03803885 -0.07810674 -0.15357916]
[ Experience replay ] starts
[ episode 25 ][ timestamp 19 ] state=[ 0.09036254 -0.03803885 -0.07810674 -0.15357916], action=1, reward=1.0, next_state=[ 0.08960177  0.15810954 -0.08117832 -0.46984476]
[ Experience replay ] starts
[ episode 25 ][ timestamp 20 ] state=[ 0.08960177  0.15810954 -0.08117832 -0.46984476], action=0, reward=1.0, next_state=[ 0.09276396 -0.03577748 -0.09057521 -0.2038122 ]
[ Experience replay ] starts
[ episode 25 ][ timestamp 21 ] state=[ 0.09276396 -0.03577748 -0.09057521 -0.2038122 ], action=1, reward=1.0, next_state=[ 0.09204841  0.16051518 -0.09465146 -0.52363697]
[ Experience replay ] starts


[ episode 26 ][ timestamp 26 ] state=[ 0.05091867  0.20002059 -0.13554553 -0.74233226], action=0, reward=1.0, next_state=[ 0.05491908  0.00700408 -0.15039217 -0.49519156]
[ Experience replay ] starts
[ episode 26 ][ timestamp 27 ] state=[ 0.05491908  0.00700408 -0.15039217 -0.49519156], action=1, reward=1.0, next_state=[ 0.05505916  0.20389127 -0.160296   -0.83123757]
[ Experience replay ] starts
[ episode 26 ][ timestamp 28 ] state=[ 0.05505916  0.20389127 -0.160296   -0.83123757], action=0, reward=1.0, next_state=[ 0.05913699  0.01128046 -0.17692076 -0.59295071]
[ Experience replay ] starts
[ episode 26 ][ timestamp 29 ] state=[ 0.05913699  0.01128046 -0.17692076 -0.59295071], action=1, reward=1.0, next_state=[ 0.0593626   0.20838    -0.18877977 -0.9357288 ]
[ Experience replay ] starts
[ episode 26 ][ timestamp 30 ] state=[ 0.0593626   0.20838    -0.18877977 -0.9357288 ], action=0, reward=1.0, next_state=[ 0.0635302   0.01623638 -0.20749435 -0.70780599]
[ Experience replay ] starts


[ episode 28 ][ timestamp 10 ] state=[ 0.02132957  0.17898218 -0.05893626 -0.38276212], action=0, reward=1.0, next_state=[ 0.02490921 -0.0152556  -0.0665915  -0.10922854]
[ Experience replay ] starts
[ episode 28 ][ timestamp 11 ] state=[ 0.02490921 -0.0152556  -0.0665915  -0.10922854], action=1, reward=1.0, next_state=[ 0.0246041   0.18075427 -0.06877607 -0.42215513]
[ Experience replay ] starts
[ episode 28 ][ timestamp 12 ] state=[ 0.0246041   0.18075427 -0.06877607 -0.42215513], action=1, reward=1.0, next_state=[ 0.02821919  0.37677977 -0.07721918 -0.73570247]
[ Experience replay ] starts
[ episode 28 ][ timestamp 13 ] state=[ 0.02821919  0.37677977 -0.07721918 -0.73570247], action=0, reward=1.0, next_state=[ 0.03575478  0.18280459 -0.09193323 -0.46828662]
[ Experience replay ] starts
[ episode 28 ][ timestamp 14 ] state=[ 0.03575478  0.18280459 -0.09193323 -0.46828662], action=0, reward=1.0, next_state=[ 0.03941087 -0.01090651 -0.10129896 -0.20593732]
[ Experience replay ] starts


[ episode 29 ][ timestamp 25 ] state=[ 0.03510092  0.02395607 -0.01248365 -0.07157516], action=1, reward=1.0, next_state=[ 0.03558004  0.21925475 -0.01391516 -0.36817045]
[ Experience replay ] starts
[ episode 29 ][ timestamp 26 ] state=[ 0.03558004  0.21925475 -0.01391516 -0.36817045], action=0, reward=1.0, next_state=[ 0.03996514  0.02433326 -0.02127857 -0.07990744]
[ Experience replay ] starts
[ episode 29 ][ timestamp 27 ] state=[ 0.03996514  0.02433326 -0.02127857 -0.07990744], action=1, reward=1.0, next_state=[ 0.0404518   0.21975368 -0.02287671 -0.37922714]
[ Experience replay ] starts
[ episode 29 ][ timestamp 28 ] state=[ 0.0404518   0.21975368 -0.02287671 -0.37922714], action=1, reward=1.0, next_state=[ 0.04484688  0.41519291 -0.03046126 -0.67903444]
[ Experience replay ] starts
[ episode 29 ][ timestamp 29 ] state=[ 0.04484688  0.41519291 -0.03046126 -0.67903444], action=0, reward=1.0, next_state=[ 0.05315074  0.22050706 -0.04404195 -0.39609537]
[ Experience replay ] starts


[ episode 31 ][ timestamp 6 ] state=[-0.0618463  -0.24021967  0.09383247  0.38901764], action=1, reward=1.0, next_state=[-0.06665069 -0.0465461   0.10161282  0.12733153]
[ Experience replay ] starts
[ episode 31 ][ timestamp 7 ] state=[-0.06665069 -0.0465461   0.10161282  0.12733153], action=0, reward=1.0, next_state=[-0.06758161 -0.24296583  0.10415946  0.45026417]
[ Experience replay ] starts
[ episode 31 ][ timestamp 8 ] state=[-0.06758161 -0.24296583  0.10415946  0.45026417], action=1, reward=1.0, next_state=[-0.07244093 -0.04945939  0.11316474  0.19214517]
[ Experience replay ] starts
[ episode 31 ][ timestamp 9 ] state=[-0.07244093 -0.04945939  0.11316474  0.19214517], action=1, reward=1.0, next_state=[-0.07343012  0.14387711  0.11700764 -0.06280516]
[ Experience replay ] starts
[ episode 31 ][ timestamp 10 ] state=[-0.07343012  0.14387711  0.11700764 -0.06280516], action=0, reward=1.0, next_state=[-0.07055257 -0.05271114  0.11575154  0.26438274]
[ Experience replay ] starts
[ ep

[ episode 33 ][ timestamp 13 ] state=[-0.05805009  0.39988486  0.05339102 -0.40480459], action=0, reward=1.0, next_state=[-0.05005239  0.204048    0.04529493 -0.09577839]
[ Experience replay ] starts
[ episode 33 ][ timestamp 14 ] state=[-0.05005239  0.204048    0.04529493 -0.09577839], action=0, reward=1.0, next_state=[-0.04597143  0.00830711  0.04337936  0.21084396]
[ Experience replay ] starts
[ episode 33 ][ timestamp 15 ] state=[-0.04597143  0.00830711  0.04337936  0.21084396], action=0, reward=1.0, next_state=[-0.04580529 -0.18740737  0.04759624  0.51688905]
[ Experience replay ] starts
[ episode 33 ][ timestamp 16 ] state=[-0.04580529 -0.18740737  0.04759624  0.51688905], action=0, reward=1.0, next_state=[-0.04955344 -0.38316607  0.05793402  0.82418256]
[ Experience replay ] starts
[ episode 33 ][ timestamp 17 ] state=[-0.04955344 -0.38316607  0.05793402  0.82418256], action=1, reward=1.0, next_state=[-0.05721676 -0.18888241  0.07441768  0.55026907]
[ Experience replay ] starts


[ episode 35 ][ timestamp 21 ] state=[ 0.13826467  0.04512091 -0.0584202  -0.01700823], action=1, reward=1.0, next_state=[ 0.13916708  0.24102991 -0.05876037 -0.32753616]
[ Experience replay ] starts
[ episode 35 ][ timestamp 22 ] state=[ 0.13916708  0.24102991 -0.05876037 -0.32753616], action=0, reward=1.0, next_state=[ 0.14398768  0.0467916  -0.06531109 -0.05394716]
[ Experience replay ] starts
[ episode 35 ][ timestamp 23 ] state=[ 0.14398768  0.0467916  -0.06531109 -0.05394716], action=1, reward=1.0, next_state=[ 0.14492351  0.24278627 -0.06639003 -0.36650019]
[ Experience replay ] starts
[ episode 35 ][ timestamp 24 ] state=[ 0.14492351  0.24278627 -0.06639003 -0.36650019], action=0, reward=1.0, next_state=[ 0.14977924  0.04866747 -0.07372004 -0.0954678 ]
[ Experience replay ] starts
[ episode 35 ][ timestamp 25 ] state=[ 0.14977924  0.04866747 -0.07372004 -0.0954678 ], action=1, reward=1.0, next_state=[ 0.15075259  0.24476436 -0.07562939 -0.41046826]
[ Experience replay ] starts


[ episode 36 ][ timestamp 26 ] state=[ 0.05202416 -0.165474   -0.12057706 -0.03127434], action=0, reward=1.0, next_state=[ 0.04871468 -0.35867898 -0.12120254  0.22106514]
[ Experience replay ] starts
[ episode 36 ][ timestamp 27 ] state=[ 0.04871468 -0.35867898 -0.12120254  0.22106514], action=0, reward=1.0, next_state=[ 0.0415411  -0.5518788  -0.11678124  0.47319251]
[ Experience replay ] starts
[ episode 36 ][ timestamp 28 ] state=[ 0.0415411  -0.5518788  -0.11678124  0.47319251], action=0, reward=1.0, next_state=[ 0.03050353 -0.74517456 -0.10731739  0.7269056 ]
[ Experience replay ] starts
[ episode 36 ][ timestamp 29 ] state=[ 0.03050353 -0.74517456 -0.10731739  0.7269056 ], action=0, reward=1.0, next_state=[ 0.01560003 -0.938662   -0.09277928  0.98397628]
[ Experience replay ] starts
[ episode 36 ][ timestamp 30 ] state=[ 0.01560003 -0.938662   -0.09277928  0.98397628], action=1, reward=1.0, next_state=[-0.00317321 -0.74242784 -0.07309975  0.66365302]
[ Experience replay ] starts


[ episode 37 ][ timestamp 17 ] state=[-0.04717662  0.02465653  0.10418103  0.17495968], action=0, reward=1.0, next_state=[-0.04668349 -0.17179019  0.10768022  0.49860591]
[ Experience replay ] starts
[ episode 37 ][ timestamp 18 ] state=[-0.04668349 -0.17179019  0.10768022  0.49860591], action=0, reward=1.0, next_state=[-0.0501193  -0.36825242  0.11765234  0.82318926]
[ Experience replay ] starts
[ episode 37 ][ timestamp 19 ] state=[-0.0501193  -0.36825242  0.11765234  0.82318926], action=1, reward=1.0, next_state=[-0.05748434 -0.17491955  0.13411612  0.56970479]
[ Experience replay ] starts
[ episode 37 ][ timestamp 20 ] state=[-0.05748434 -0.17491955  0.13411612  0.56970479], action=1, reward=1.0, next_state=[-0.06098273  0.01809179  0.14551022  0.32209971]
[ Experience replay ] starts
[ episode 37 ][ timestamp 21 ] state=[-0.06098273  0.01809179  0.14551022  0.32209971], action=0, reward=1.0, next_state=[-0.0606209  -0.17877014  0.15195221  0.65690116]
[ Experience replay ] starts


[ episode 38 ][ timestamp 38 ] state=[-0.13714348 -0.96463996  0.09247475  1.18243015], action=1, reward=1.0, next_state=[-0.15643628 -0.77083165  0.11612335  0.92010867]
[ Experience replay ] starts
[ episode 38 ][ timestamp 39 ] state=[-0.15643628 -0.77083165  0.11612335  0.92010867], action=0, reward=1.0, next_state=[-0.17185291 -0.96731536  0.13452553  1.24691291]
[ Experience replay ] starts
[ episode 38 ][ timestamp 40 ] state=[-0.17185291 -0.96731536  0.13452553  1.24691291], action=0, reward=1.0, next_state=[-0.19119922 -1.16388125  0.15946378  1.57852913]
[ Experience replay ] starts
[ episode 38 ][ timestamp 41 ] state=[-0.19119922 -1.16388125  0.15946378  1.57852913], action=1, reward=1.0, next_state=[-0.21447684 -0.97097755  0.19103437  1.33952867]
[ Experience replay ] starts
[ episode 38 ][ timestamp 42 ] state=[-0.21447684 -0.97097755  0.19103437  1.33952867], action=1, reward=-1.0, next_state=[-0.2338964  -0.77870396  0.21782494  1.11218806]
[ Experience replay ] starts

[ episode 42 ][ timestamp 2 ] state=[ 0.00988495 -0.21679854  0.00070917  0.33354043], action=0, reward=1.0, next_state=[ 0.00554898 -0.41193058  0.00737998  0.62644691]
[ Experience replay ] starts
[ episode 42 ][ timestamp 3 ] state=[ 0.00554898 -0.41193058  0.00737998  0.62644691], action=1, reward=1.0, next_state=[-0.00268963 -0.21691242  0.01990892  0.33609733]
[ Experience replay ] starts
[ episode 42 ][ timestamp 4 ] state=[-0.00268963 -0.21691242  0.01990892  0.33609733], action=0, reward=1.0, next_state=[-0.00702788 -0.41231196  0.02663086  0.63499138]
[ Experience replay ] starts
[ episode 42 ][ timestamp 5 ] state=[-0.00702788 -0.41231196  0.02663086  0.63499138], action=1, reward=1.0, next_state=[-0.01527412 -0.21757138  0.03933069  0.35081264]
[ Experience replay ] starts
[ episode 42 ][ timestamp 6 ] state=[-0.01527412 -0.21757138  0.03933069  0.35081264], action=1, reward=1.0, next_state=[-0.01962555 -0.02303019  0.04634694  0.07078678]
[ Experience replay ] starts
[ epi

[ episode 43 ][ timestamp 23 ] state=[ 0.08819861  0.36981542 -0.09773357 -0.7795698 ], action=0, reward=1.0, next_state=[ 0.09559492  0.17616326 -0.11332497 -0.51916571]
[ Experience replay ] starts
[ episode 43 ][ timestamp 24 ] state=[ 0.09559492  0.17616326 -0.11332497 -0.51916571], action=0, reward=1.0, next_state=[ 0.09911818 -0.01719614 -0.12370829 -0.26423332]
[ Experience replay ] starts
[ episode 43 ][ timestamp 25 ] state=[ 0.09911818 -0.01719614 -0.12370829 -0.26423332], action=1, reward=1.0, next_state=[ 0.09877426  0.17945444 -0.12899295 -0.59323248]
[ Experience replay ] starts
[ episode 43 ][ timestamp 26 ] state=[ 0.09877426  0.17945444 -0.12899295 -0.59323248], action=1, reward=1.0, next_state=[ 0.10236335  0.3761236  -0.1408576  -0.92360415]
[ Experience replay ] starts
[ episode 43 ][ timestamp 27 ] state=[ 0.10236335  0.3761236  -0.1408576  -0.92360415], action=0, reward=1.0, next_state=[ 0.10988582  0.18315659 -0.15932968 -0.6782957 ]
[ Experience replay ] starts


[ episode 45 ][ timestamp 8 ] state=[ 0.05822408  0.23916997 -0.01786343 -0.33135553], action=1, reward=1.0, next_state=[ 0.06300748  0.43454158 -0.02449054 -0.62961775]
[ Experience replay ] starts
[ episode 45 ][ timestamp 9 ] state=[ 0.06300748  0.43454158 -0.02449054 -0.62961775], action=0, reward=1.0, next_state=[ 0.07169831  0.23976979 -0.03708289 -0.34474718]
[ Experience replay ] starts
[ episode 45 ][ timestamp 10 ] state=[ 0.07169831  0.23976979 -0.03708289 -0.34474718], action=1, reward=1.0, next_state=[ 0.07649371  0.43539911 -0.04397784 -0.64888928]
[ Experience replay ] starts
[ episode 45 ][ timestamp 11 ] state=[ 0.07649371  0.43539911 -0.04397784 -0.64888928], action=0, reward=1.0, next_state=[ 0.08520169  0.24091649 -0.05695562 -0.37037274]
[ Experience replay ] starts
[ episode 45 ][ timestamp 12 ] state=[ 0.08520169  0.24091649 -0.05695562 -0.37037274], action=1, reward=1.0, next_state=[ 0.09002002  0.43679944 -0.06436308 -0.68045662]
[ Experience replay ] starts
[ 

[ episode 46 ][ timestamp 22 ] state=[ 0.01015011  0.150265   -0.08155398 -0.34218529], action=0, reward=1.0, next_state=[ 0.01315541 -0.04360769 -0.08839769 -0.07629311]
[ Experience replay ] starts
[ episode 46 ][ timestamp 23 ] state=[ 0.01315541 -0.04360769 -0.08839769 -0.07629311], action=0, reward=1.0, next_state=[ 0.01228326 -0.23735849 -0.08992355  0.18724325]
[ Experience replay ] starts
[ episode 46 ][ timestamp 24 ] state=[ 0.01228326 -0.23735849 -0.08992355  0.18724325], action=1, reward=1.0, next_state=[ 0.00753609 -0.04107274 -0.08617869 -0.13239768]
[ Experience replay ] starts
[ episode 46 ][ timestamp 25 ] state=[ 0.00753609 -0.04107274 -0.08617869 -0.13239768], action=1, reward=1.0, next_state=[ 0.00671463  0.15517118 -0.08882664 -0.45097632]
[ Experience replay ] starts
[ episode 46 ][ timestamp 26 ] state=[ 0.00671463  0.15517118 -0.08882664 -0.45097632], action=0, reward=1.0, next_state=[ 0.00981806 -0.03858959 -0.09784617 -0.18756172]
[ Experience replay ] starts


[ episode 47 ][ timestamp 29 ] state=[ 0.10716028  0.36421356 -0.02822647 -0.30772967], action=0, reward=1.0, next_state=[ 0.11444455  0.16950495 -0.03438107 -0.02408057]
[ Experience replay ] starts
[ episode 47 ][ timestamp 30 ] state=[ 0.11444455  0.16950495 -0.03438107 -0.02408057], action=1, reward=1.0, next_state=[ 0.11783465  0.36510265 -0.03486268 -0.32740978]
[ Experience replay ] starts
[ episode 47 ][ timestamp 31 ] state=[ 0.11783465  0.36510265 -0.03486268 -0.32740978], action=0, reward=1.0, next_state=[ 0.12513671  0.17049393 -0.04141087 -0.04592163]
[ Experience replay ] starts
[ episode 47 ][ timestamp 32 ] state=[ 0.12513671  0.17049393 -0.04141087 -0.04592163], action=1, reward=1.0, next_state=[ 0.12854659  0.36618447 -0.04232931 -0.35137711]
[ Experience replay ] starts
[ episode 47 ][ timestamp 33 ] state=[ 0.12854659  0.36618447 -0.04232931 -0.35137711], action=0, reward=1.0, next_state=[ 0.13587027  0.17168925 -0.04935685 -0.0723367 ]
[ Experience replay ] starts


[ episode 48 ][ timestamp 14 ] state=[ 0.00956424  0.54975051 -0.00827993 -0.70519902], action=0, reward=1.0, next_state=[ 0.02055925  0.35474426 -0.02238391 -0.41513394]
[ Experience replay ] starts
[ episode 48 ][ timestamp 15 ] state=[ 0.02055925  0.35474426 -0.02238391 -0.41513394], action=0, reward=1.0, next_state=[ 0.02765414  0.1599466  -0.03068659 -0.12959096]
[ Experience replay ] starts
[ episode 48 ][ timestamp 16 ] state=[ 0.02765414  0.1599466  -0.03068659 -0.12959096], action=0, reward=1.0, next_state=[ 0.03085307 -0.03472263 -0.0332784   0.15325497]
[ Experience replay ] starts
[ episode 48 ][ timestamp 17 ] state=[ 0.03085307 -0.03472263 -0.0332784   0.15325497], action=1, reward=1.0, next_state=[ 0.03015862  0.16085962 -0.03021331 -0.14973803]
[ Experience replay ] starts
[ episode 48 ][ timestamp 18 ] state=[ 0.03015862  0.16085962 -0.03021331 -0.14973803], action=1, reward=1.0, next_state=[ 0.03337581  0.3564009  -0.03320807 -0.45179743]
[ Experience replay ] starts


[ episode 49 ][ timestamp 12 ] state=[-0.03262443 -0.14841349 -0.01882084  0.19714614], action=0, reward=1.0, next_state=[-0.0355927  -0.34326125 -0.01487792  0.48383301]
[ Experience replay ] starts
[ episode 49 ][ timestamp 13 ] state=[-0.0355927  -0.34326125 -0.01487792  0.48383301], action=0, reward=1.0, next_state=[-0.04245792 -0.5381701  -0.00520126  0.77178999]
[ Experience replay ] starts
[ episode 49 ][ timestamp 14 ] state=[-0.04245792 -0.5381701  -0.00520126  0.77178999], action=1, reward=1.0, next_state=[-0.05322133 -0.34297697  0.01023454  0.47747509]
[ Experience replay ] starts
[ episode 49 ][ timestamp 15 ] state=[-0.05322133 -0.34297697  0.01023454  0.47747509], action=0, reward=1.0, next_state=[-0.06008086 -0.53824191  0.01978405  0.77336607]
[ Experience replay ] starts
[ episode 49 ][ timestamp 16 ] state=[-0.06008086 -0.53824191  0.01978405  0.77336607], action=1, reward=1.0, next_state=[-0.0708457  -0.34339766  0.03525137  0.48697301]
[ Experience replay ] starts


[ episode 50 ][ timestamp 37 ] state=[ 0.03179993 -0.07456557  0.16174914  0.80470828], action=1, reward=1.0, next_state=[0.03030862 0.11801319 0.1778433  0.56695785]
[ Experience replay ] starts
[ episode 50 ][ timestamp 38 ] state=[0.03030862 0.11801319 0.1778433  0.56695785], action=1, reward=1.0, next_state=[0.03266888 0.3102533  0.18918246 0.33515659]
[ Experience replay ] starts
[ episode 50 ][ timestamp 39 ] state=[0.03266888 0.3102533  0.18918246 0.33515659], action=1, reward=1.0, next_state=[0.03887395 0.50225016 0.19588559 0.1075881 ]
[ Experience replay ] starts
[ episode 50 ][ timestamp 40 ] state=[0.03887395 0.50225016 0.19588559 0.1075881 ], action=0, reward=1.0, next_state=[0.04891895 0.30493936 0.19803735 0.45511691]
[ Experience replay ] starts
[ episode 50 ][ timestamp 41 ] state=[0.04891895 0.30493936 0.19803735 0.45511691], action=0, reward=1.0, next_state=[0.05501774 0.10764949 0.20713969 0.8031107 ]
[ Experience replay ] starts
[ episode 50 ][ timestamp 42 ] state

[ episode 53 ][ timestamp 5 ] state=[ 0.02728506  0.75086449  0.00692191 -1.1147397 ], action=1, reward=1.0, next_state=[ 0.04230235  0.94589488 -0.01537288 -1.40524325]
[ Experience replay ] starts
[ episode 53 ][ timestamp 6 ] state=[ 0.04230235  0.94589488 -0.01537288 -1.40524325], action=1, reward=1.0, next_state=[ 0.06122025  1.14120427 -0.04347775 -1.70269217]
[ Experience replay ] starts
[ episode 53 ][ timestamp 7 ] state=[ 0.06122025  1.14120427 -0.04347775 -1.70269217], action=0, reward=1.0, next_state=[ 0.08404434  0.94660903 -0.07753159 -1.42385359]
[ Experience replay ] starts
[ episode 53 ][ timestamp 8 ] state=[ 0.08404434  0.94660903 -0.07753159 -1.42385359], action=0, reward=1.0, next_state=[ 0.10297652  0.75252655 -0.10600866 -1.15637587]
[ Experience replay ] starts
[ episode 53 ][ timestamp 9 ] state=[ 0.10297652  0.75252655 -0.10600866 -1.15637587], action=0, reward=1.0, next_state=[ 0.11802705  0.5589341  -0.12913618 -0.89872555]
[ Experience replay ] starts
[ epi

[ episode 55 ][ timestamp 7 ] state=[ 0.0449432   1.16412687 -0.13674036 -1.8725435 ], action=1, reward=1.0, next_state=[ 0.06822574  1.36045228 -0.17419123 -2.20435924]
[ Experience replay ] starts
[ episode 55 ][ timestamp 8 ] state=[ 0.06822574  1.36045228 -0.17419123 -2.20435924], action=1, reward=-1.0, next_state=[ 0.09543479  1.55676907 -0.21827841 -2.54533177]
[ Experience replay ] starts
[ Ended! ] Episode 55: Exploration_rate=0.01. Score=8.
[ episode 56 ] state=[-0.0447301   0.04940131 -0.00751866 -0.02033787]
[ episode 56 ][ timestamp 1 ] state=[-0.0447301   0.04940131 -0.00751866 -0.02033787], action=1, reward=1.0, next_state=[-0.04374207  0.24463027 -0.00792542 -0.31538351]
[ Experience replay ] starts
[ episode 56 ][ timestamp 2 ] state=[-0.04374207  0.24463027 -0.00792542 -0.31538351], action=1, reward=1.0, next_state=[-0.03884947  0.43986422 -0.01423309 -0.61055527]
[ Experience replay ] starts
[ episode 56 ][ timestamp 3 ] state=[-0.03884947  0.43986422 -0.01423309 -0.6

[ episode 57 ][ timestamp 30 ] state=[-0.01466437 -0.1474553   0.07518328  0.16520474], action=0, reward=1.0, next_state=[-0.01761348 -0.34356847  0.07848738  0.48062656]
[ Experience replay ] starts
[ episode 57 ][ timestamp 31 ] state=[-0.01761348 -0.34356847  0.07848738  0.48062656], action=1, reward=1.0, next_state=[-0.02448485 -0.14963709  0.08809991  0.21367663]
[ Experience replay ] starts
[ episode 57 ][ timestamp 32 ] state=[-0.02448485 -0.14963709  0.08809991  0.21367663], action=1, reward=1.0, next_state=[-0.02747759  0.04412207  0.09237344 -0.04996705]
[ Experience replay ] starts
[ episode 57 ][ timestamp 33 ] state=[-0.02747759  0.04412207  0.09237344 -0.04996705], action=0, reward=1.0, next_state=[-0.02659515 -0.15219468  0.0913741   0.2703718 ]
[ Experience replay ] starts
[ episode 57 ][ timestamp 34 ] state=[-0.02659515 -0.15219468  0.0913741   0.2703718 ], action=1, reward=1.0, next_state=[-0.02963904  0.04151261  0.09678154  0.00784962]
[ Experience replay ] starts


[ episode 58 ][ timestamp 14 ] state=[ 0.08887534  0.23390254 -0.15588102 -0.58686362], action=1, reward=1.0, next_state=[ 0.09355339  0.43082455 -0.16761829 -0.92430881]
[ Experience replay ] starts
[ episode 58 ][ timestamp 15 ] state=[ 0.09355339  0.43082455 -0.16761829 -0.92430881], action=0, reward=1.0, next_state=[ 0.10216988  0.23831473 -0.18610447 -0.68864048]
[ Experience replay ] starts
[ episode 58 ][ timestamp 16 ] state=[ 0.10216988  0.23831473 -0.18610447 -0.68864048], action=0, reward=1.0, next_state=[ 0.10693618  0.04619626 -0.19987728 -0.45983831]
[ Experience replay ] starts
[ episode 58 ][ timestamp 17 ] state=[ 0.10693618  0.04619626 -0.19987728 -0.45983831], action=0, reward=1.0, next_state=[ 0.1078601  -0.14562255 -0.20907405 -0.23621191]
[ Experience replay ] starts
[ episode 58 ][ timestamp 18 ] state=[ 0.1078601  -0.14562255 -0.20907405 -0.23621191], action=0, reward=-1.0, next_state=[ 0.10494765 -0.33723999 -0.21379828 -0.01606582]
[ Experience replay ] starts

[ episode 60 ][ timestamp 17 ] state=[ 0.00638157  0.00942107 -0.06217662 -0.1367069 ], action=1, reward=1.0, next_state=[ 0.00656999  0.20537593 -0.06491076 -0.44833937]
[ Experience replay ] starts
[ episode 60 ][ timestamp 18 ] state=[ 0.00656999  0.20537593 -0.06491076 -0.44833937], action=0, reward=1.0, next_state=[ 0.01067751  0.01122934 -0.07387755 -0.17680313]
[ Experience replay ] starts
[ episode 60 ][ timestamp 19 ] state=[ 0.01067751  0.01122934 -0.07387755 -0.17680313], action=1, reward=1.0, next_state=[ 0.01090209  0.20732653 -0.07741361 -0.49184682]
[ Experience replay ] starts
[ episode 60 ][ timestamp 20 ] state=[ 0.01090209  0.20732653 -0.07741361 -0.49184682], action=0, reward=1.0, next_state=[ 0.01504862  0.01337699 -0.08725055 -0.2245307 ]
[ Experience replay ] starts
[ episode 60 ][ timestamp 21 ] state=[ 0.01504862  0.01337699 -0.08725055 -0.2245307 ], action=0, reward=1.0, next_state=[ 0.01531616 -0.18039662 -0.09174116  0.03940495]
[ Experience replay ] starts


[ episode 61 ][ timestamp 22 ] state=[ 0.08899364  0.18596519 -0.06675877 -0.36335641], action=0, reward=1.0, next_state=[ 0.09271294 -0.00814757 -0.0740259  -0.09244836]
[ Experience replay ] starts
[ episode 61 ][ timestamp 23 ] state=[ 0.09271294 -0.00814757 -0.0740259  -0.09244836], action=1, reward=1.0, next_state=[ 0.09254999  0.18795304 -0.07587486 -0.40753744]
[ Experience replay ] starts
[ episode 61 ][ timestamp 24 ] state=[ 0.09254999  0.18795304 -0.07587486 -0.40753744], action=0, reward=1.0, next_state=[ 0.09630905 -0.00601566 -0.08402561 -0.13970731]
[ Experience replay ] starts
[ episode 61 ][ timestamp 25 ] state=[ 0.09630905 -0.00601566 -0.08402561 -0.13970731], action=1, reward=1.0, next_state=[ 0.09618874  0.19020295 -0.08681976 -0.45767129]
[ Experience replay ] starts
[ episode 61 ][ timestamp 26 ] state=[ 0.09618874  0.19020295 -0.08681976 -0.45767129], action=0, reward=1.0, next_state=[ 0.0999928  -0.00359123 -0.09597318 -0.19356785]
[ Experience replay ] starts


[ episode 62 ][ timestamp 33 ] state=[-0.03663173  0.02448559  0.10486388  0.28876645], action=0, reward=1.0, next_state=[-0.03614202 -0.17196326  0.11063921  0.61259454]
[ Experience replay ] starts
[ episode 62 ][ timestamp 34 ] state=[-0.03614202 -0.17196326  0.11063921  0.61259454], action=1, reward=1.0, next_state=[-0.03958128  0.02145277  0.1228911   0.356706  ]
[ Experience replay ] starts
[ episode 62 ][ timestamp 35 ] state=[-0.03958128  0.02145277  0.1228911   0.356706  ], action=0, reward=1.0, next_state=[-0.03915223 -0.17518251  0.13002522  0.68547362]
[ Experience replay ] starts
[ episode 62 ][ timestamp 36 ] state=[-0.03915223 -0.17518251  0.13002522  0.68547362], action=1, reward=1.0, next_state=[-0.04265588  0.01791753  0.14373469  0.43638838]
[ Experience replay ] starts
[ episode 62 ][ timestamp 37 ] state=[-0.04265588  0.01791753  0.14373469  0.43638838], action=1, reward=1.0, next_state=[-0.04229753  0.21074352  0.15246246  0.1922447 ]
[ Experience replay ] starts


[ episode 63 ][ timestamp 24 ] state=[ 0.08477796  0.97003643  0.11123007 -0.71278981], action=1, reward=1.0, next_state=[ 0.10417869  1.16345694  0.09697427 -0.96849341]
[ Experience replay ] starts
[ episode 63 ][ timestamp 25 ] state=[ 0.10417869  1.16345694  0.09697427 -0.96849341], action=1, reward=1.0, next_state=[ 0.12744783  1.35715258  0.07760441 -1.22920603]
[ Experience replay ] starts
[ episode 63 ][ timestamp 26 ] state=[ 0.12744783  1.35715258  0.07760441 -1.22920603], action=1, reward=1.0, next_state=[ 0.15459088  1.55119498  0.05302029 -1.49660081]
[ Experience replay ] starts
[ episode 63 ][ timestamp 27 ] state=[ 0.15459088  1.55119498  0.05302029 -1.49660081], action=1, reward=1.0, next_state=[ 0.18561478  1.74563382  0.02308827 -1.77226855]
[ Experience replay ] starts
[ episode 63 ][ timestamp 28 ] state=[ 0.18561478  1.74563382  0.02308827 -1.77226855], action=0, reward=1.0, next_state=[ 0.22052746  1.55025923 -0.0123571  -1.47249742]
[ Experience replay ] starts


[ episode 64 ][ timestamp 34 ] state=[0.10357724 0.20753184 0.11293995 0.51103135], action=1, reward=1.0, next_state=[0.10772788 0.40089681 0.12316058 0.25596557]
[ Experience replay ] starts
[ episode 64 ][ timestamp 35 ] state=[0.10772788 0.40089681 0.12316058 0.25596557], action=0, reward=1.0, next_state=[0.11574581 0.20425137 0.12827989 0.58481718]
[ Experience replay ] starts
[ episode 64 ][ timestamp 36 ] state=[0.11574581 0.20425137 0.12827989 0.58481718], action=1, reward=1.0, next_state=[0.11983084 0.39736515 0.13997624 0.33513755]
[ Experience replay ] starts
[ episode 64 ][ timestamp 37 ] state=[0.11983084 0.39736515 0.13997624 0.33513755], action=1, reward=1.0, next_state=[0.12777814 0.59024632 0.14667899 0.08966432]
[ Experience replay ] starts
[ episode 64 ][ timestamp 38 ] state=[0.12777814 0.59024632 0.14667899 0.08966432], action=1, reward=1.0, next_state=[ 0.13958307  0.78299461  0.14847227 -0.15338433]
[ Experience replay ] starts
[ episode 64 ][ timestamp 39 ] state

[ episode 65 ][ timestamp 31 ] state=[ 0.07628175  0.0194987  -0.0523442   0.14134575], action=1, reward=1.0, next_state=[ 0.07667173  0.21532973 -0.04951728 -0.16738064]
[ Experience replay ] starts
[ episode 65 ][ timestamp 32 ] state=[ 0.07667173  0.21532973 -0.04951728 -0.16738064], action=0, reward=1.0, next_state=[ 0.08097832  0.02095026 -0.05286489  0.10927905]
[ Experience replay ] starts
[ episode 65 ][ timestamp 33 ] state=[ 0.08097832  0.02095026 -0.05286489  0.10927905], action=0, reward=1.0, next_state=[ 0.08139733 -0.17337585 -0.05067931  0.38482597]
[ Experience replay ] starts
[ episode 65 ][ timestamp 34 ] state=[ 0.08139733 -0.17337585 -0.05067931  0.38482597], action=1, reward=1.0, next_state=[ 0.07792981  0.02242758 -0.04298279  0.07660456]
[ Experience replay ] starts
[ episode 65 ][ timestamp 35 ] state=[ 0.07792981  0.02242758 -0.04298279  0.07660456], action=1, reward=1.0, next_state=[ 0.07837836  0.21813853 -0.0414507  -0.22932376]
[ Experience replay ] starts


[ episode 66 ][ timestamp 5 ] state=[ 0.01558106 -0.03804556 -0.00846608 -0.03117814], action=0, reward=1.0, next_state=[ 0.01482015 -0.23304508 -0.00908964  0.25882167]
[ Experience replay ] starts
[ episode 66 ][ timestamp 6 ] state=[ 0.01482015 -0.23304508 -0.00908964  0.25882167], action=1, reward=1.0, next_state=[ 0.01015925 -0.03779456 -0.00391321 -0.03671434]
[ Experience replay ] starts
[ episode 66 ][ timestamp 7 ] state=[ 0.01015925 -0.03779456 -0.00391321 -0.03671434], action=1, reward=1.0, next_state=[ 0.00940336  0.15738329 -0.00464749 -0.33062935]
[ Experience replay ] starts
[ episode 66 ][ timestamp 8 ] state=[ 0.00940336  0.15738329 -0.00464749 -0.33062935], action=0, reward=1.0, next_state=[ 0.01255102 -0.03767219 -0.01126008 -0.03941564]
[ Experience replay ] starts
[ episode 66 ][ timestamp 9 ] state=[ 0.01255102 -0.03767219 -0.01126008 -0.03941564], action=0, reward=1.0, next_state=[ 0.01179758 -0.23263088 -0.01204839  0.24969346]
[ Experience replay ] starts
[ epi

[ episode 67 ][ timestamp 10 ] state=[ 0.09821754  1.79338903 -0.19509411 -2.71038883], action=0, reward=-1.0, next_state=[ 0.13408532  1.60013658 -0.24930189 -2.48300382]
[ Experience replay ] starts
[ Ended! ] Episode 67: Exploration_rate=0.01. Score=10.
[ episode 68 ] state=[ 0.01233761 -0.04146722 -0.01014843 -0.01164619]
[ episode 68 ][ timestamp 1 ] state=[ 0.01233761 -0.04146722 -0.01014843 -0.01164619], action=0, reward=1.0, next_state=[ 0.01150827 -0.23644217 -0.01038136  0.27781759]
[ Experience replay ] starts
[ episode 68 ][ timestamp 2 ] state=[ 0.01150827 -0.23644217 -0.01038136  0.27781759], action=0, reward=1.0, next_state=[ 0.00677943 -0.43141449 -0.004825    0.56720825]
[ Experience replay ] starts
[ episode 68 ][ timestamp 3 ] state=[ 0.00677943 -0.43141449 -0.004825    0.56720825], action=0, reward=1.0, next_state=[-0.00184886 -0.62646843  0.00651916  0.8583672 ]
[ Experience replay ] starts
[ episode 68 ][ timestamp 4 ] state=[-0.00184886 -0.62646843  0.00651916  0

[ episode 70 ][ timestamp 7 ] state=[ 0.06927512  0.73687295 -0.09832821 -1.23019226], action=0, reward=1.0, next_state=[ 0.08401258  0.54314376 -0.12293206 -0.96986408]
[ Experience replay ] starts
[ episode 70 ][ timestamp 8 ] state=[ 0.08401258  0.54314376 -0.12293206 -0.96986408], action=0, reward=1.0, next_state=[ 0.09487545  0.34986725 -0.14232934 -0.71818824]
[ Experience replay ] starts
[ episode 70 ][ timestamp 9 ] state=[ 0.09487545  0.34986725 -0.14232934 -0.71818824], action=0, reward=1.0, next_state=[ 0.1018728   0.15697167 -0.15669311 -0.47347433]
[ Experience replay ] starts
[ episode 70 ][ timestamp 10 ] state=[ 0.1018728   0.15697167 -0.15669311 -0.47347433], action=1, reward=1.0, next_state=[ 0.10501223  0.35391915 -0.16616259 -0.81115576]
[ Experience replay ] starts
[ episode 70 ][ timestamp 11 ] state=[ 0.10501223  0.35391915 -0.16616259 -0.81115576], action=0, reward=1.0, next_state=[ 0.11209061  0.16141576 -0.18238571 -0.57500509]
[ Experience replay ] starts
[ e

[ episode 71 ][ timestamp 34 ] state=[-0.03329359 -0.58700368 -0.19865822 -0.21665113], action=1, reward=1.0, next_state=[-0.04503366 -0.38967844 -0.20299125 -0.56483968]
[ Experience replay ] starts
[ episode 71 ][ timestamp 35 ] state=[-0.04503366 -0.38967844 -0.20299125 -0.56483968], action=0, reward=-1.0, next_state=[-0.05282723 -0.58146093 -0.21428804 -0.3423429 ]
[ Experience replay ] starts
[ Ended! ] Episode 71: Exploration_rate=0.01. Score=35.
[ episode 72 ] state=[ 0.02549114  0.00114762 -0.01028129 -0.00561367]
[ episode 72 ][ timestamp 1 ] state=[ 0.02549114  0.00114762 -0.01028129 -0.00561367], action=1, reward=1.0, next_state=[ 0.02551409  0.19641549 -0.01039356 -0.30152265]
[ Experience replay ] starts
[ episode 72 ][ timestamp 2 ] state=[ 0.02551409  0.19641549 -0.01039356 -0.30152265], action=0, reward=1.0, next_state=[ 0.0294424   0.00144321 -0.01642401 -0.01213568]
[ Experience replay ] starts
[ episode 72 ][ timestamp 3 ] state=[ 0.0294424   0.00144321 -0.01642401 -

[ episode 73 ][ timestamp 23 ] state=[ 0.04335205  0.00374868 -0.17177753 -0.48407197], action=0, reward=1.0, next_state=[ 0.04342703 -0.18858579 -0.18145897 -0.2500709 ]
[ Experience replay ] starts
[ episode 73 ][ timestamp 24 ] state=[ 0.04342703 -0.18858579 -0.18145897 -0.2500709 ], action=1, reward=1.0, next_state=[ 0.03965531  0.00860089 -0.18646038 -0.59405128]
[ Experience replay ] starts
[ episode 73 ][ timestamp 25 ] state=[ 0.03965531  0.00860089 -0.18646038 -0.59405128], action=0, reward=1.0, next_state=[ 0.03982733 -0.18348929 -0.19834141 -0.36541264]
[ Experience replay ] starts
[ episode 73 ][ timestamp 26 ] state=[ 0.03982733 -0.18348929 -0.19834141 -0.36541264], action=1, reward=1.0, next_state=[ 0.03615754  0.01381695 -0.20564966 -0.71350043]
[ Experience replay ] starts
[ episode 73 ][ timestamp 27 ] state=[ 0.03615754  0.01381695 -0.20564966 -0.71350043], action=1, reward=-1.0, next_state=[ 0.03643388  0.2111024  -0.21991967 -1.06322869]
[ Experience replay ] starts

[ episode 75 ][ timestamp 15 ] state=[ 0.0104382  -0.36734302 -0.036198    0.48579494], action=1, reward=1.0, next_state=[ 0.00309134 -0.17172947 -0.02648211  0.18192693]
[ Experience replay ] starts
[ episode 75 ][ timestamp 16 ] state=[ 0.00309134 -0.17172947 -0.02648211  0.18192693], action=1, reward=1.0, next_state=[-0.00034325  0.02376121 -0.02284357 -0.1189911 ]
[ Experience replay ] starts
[ episode 75 ][ timestamp 17 ] state=[-0.00034325  0.02376121 -0.02284357 -0.1189911 ], action=0, reward=1.0, next_state=[ 1.31974133e-04 -1.71026124e-01 -2.52233886e-02  1.66398244e-01]
[ Experience replay ] starts
[ episode 75 ][ timestamp 18 ] state=[ 1.31974133e-04 -1.71026124e-01 -2.52233886e-02  1.66398244e-01], action=0, reward=1.0, next_state=[-0.00328855 -0.3657781  -0.02189542  0.4510184 ]
[ Experience replay ] starts
[ episode 75 ][ timestamp 19 ] state=[-0.00328855 -0.3657781  -0.02189542  0.4510184 ], action=0, reward=1.0, next_state=[-0.01060411 -0.56058366 -0.01287506  0.7367199

[ episode 75 ][ timestamp 59 ] state=[ 0.28273935  1.1431148  -0.07091768 -0.74369136], action=1, reward=1.0, next_state=[ 0.30560165  1.33914016 -0.0857915  -1.05782263]
[ Experience replay ] starts
[ episode 75 ][ timestamp 60 ] state=[ 0.30560165  1.33914016 -0.0857915  -1.05782263], action=1, reward=1.0, next_state=[ 0.33238445  1.53528754 -0.10694796 -1.37615337]
[ Experience replay ] starts
[ episode 75 ][ timestamp 61 ] state=[ 0.33238445  1.53528754 -0.10694796 -1.37615337], action=0, reward=1.0, next_state=[ 0.3630902   1.34165192 -0.13447102 -1.11874225]
[ Experience replay ] starts
[ episode 75 ][ timestamp 62 ] state=[ 0.3630902   1.34165192 -0.13447102 -1.11874225], action=0, reward=1.0, next_state=[ 0.38992324  1.14852561 -0.15684587 -0.87108342]
[ Experience replay ] starts
[ episode 75 ][ timestamp 63 ] state=[ 0.38992324  1.14852561 -0.15684587 -0.87108342], action=0, reward=1.0, next_state=[ 0.41289375  0.95584471 -0.17426754 -0.6315337 ]
[ Experience replay ] starts


[ episode 76 ][ timestamp 32 ] state=[ 0.04253974  0.94462751 -0.09115786 -1.61481256], action=1, reward=1.0, next_state=[ 0.06143229  1.14069909 -0.12345411 -1.93446211]
[ Experience replay ] starts
[ episode 76 ][ timestamp 33 ] state=[ 0.06143229  1.14069909 -0.12345411 -1.93446211], action=1, reward=1.0, next_state=[ 0.08424627  1.33690635 -0.16214335 -2.26273645]
[ Experience replay ] starts
[ episode 76 ][ timestamp 34 ] state=[ 0.08424627  1.33690635 -0.16214335 -2.26273645], action=1, reward=1.0, next_state=[ 0.1109844   1.53313257 -0.20739808 -2.60067663]
[ Experience replay ] starts
[ episode 76 ][ timestamp 35 ] state=[ 0.1109844   1.53313257 -0.20739808 -2.60067663], action=1, reward=-1.0, next_state=[ 0.14164705  1.72917738 -0.25941162 -2.94898084]
[ Experience replay ] starts
[ Ended! ] Episode 76: Exploration_rate=0.01. Score=35.
[ episode 77 ] state=[-0.04399404  0.0191766  -0.03907563  0.01551216]
[ episode 77 ][ timestamp 1 ] state=[-0.04399404  0.0191766  -0.03907563

[ episode 79 ][ timestamp 8 ] state=[-0.08046829  0.19100006  0.08947031 -0.17941608], action=1, reward=1.0, next_state=[-0.07664829  0.38473535  0.08588199 -0.44258746]
[ Experience replay ] starts
[ episode 79 ][ timestamp 9 ] state=[-0.07664829  0.38473535  0.08588199 -0.44258746], action=0, reward=1.0, next_state=[-0.06895358  0.1885098   0.07703024 -0.12411568]
[ Experience replay ] starts
[ episode 79 ][ timestamp 10 ] state=[-0.06895358  0.1885098   0.07703024 -0.12411568], action=0, reward=1.0, next_state=[-0.06518339 -0.00762637  0.07454793  0.19184065]
[ Experience replay ] starts
[ episode 79 ][ timestamp 11 ] state=[-0.06518339 -0.00762637  0.07454793  0.19184065], action=0, reward=1.0, next_state=[-0.06533591 -0.20373122  0.07838474  0.50707773]
[ Experience replay ] starts
[ episode 79 ][ timestamp 12 ] state=[-0.06533591 -0.20373122  0.07838474  0.50707773], action=0, reward=1.0, next_state=[-0.06941054 -0.39986508  0.0885263   0.8233967 ]
[ Experience replay ] starts
[ 

[ episode 81 ][ timestamp 8 ] state=[-0.00993544 -0.14364042 -0.04350059  0.22924622], action=1, reward=1.0, next_state=[-0.01280824  0.05207529 -0.03891566 -0.07683477]
[ Experience replay ] starts
[ episode 81 ][ timestamp 9 ] state=[-0.01280824  0.05207529 -0.03891566 -0.07683477], action=1, reward=1.0, next_state=[-0.01176674  0.24773289 -0.04045236 -0.38153728]
[ Experience replay ] starts
[ episode 81 ][ timestamp 10 ] state=[-0.01176674  0.24773289 -0.04045236 -0.38153728], action=1, reward=1.0, next_state=[-0.00681208  0.4434052  -0.0480831  -0.68669537]
[ Experience replay ] starts
[ episode 81 ][ timestamp 11 ] state=[-0.00681208  0.4434052  -0.0480831  -0.68669537], action=0, reward=1.0, next_state=[ 0.00205602  0.24898252 -0.06181701 -0.4095294 ]
[ Experience replay ] starts
[ episode 81 ][ timestamp 12 ] state=[ 0.00205602  0.24898252 -0.06181701 -0.4095294 ], action=1, reward=1.0, next_state=[ 0.00703567  0.44492394 -0.0700076  -0.72104276]
[ Experience replay ] starts
[ 

[ episode 82 ][ timestamp 36 ] state=[ 0.0124602  -0.49656651 -0.14155157 -0.04907383], action=1, reward=1.0, next_state=[ 0.00252887 -0.2997285  -0.14253305 -0.38285509]
[ Experience replay ] starts
[ episode 82 ][ timestamp 37 ] state=[ 0.00252887 -0.2997285  -0.14253305 -0.38285509], action=0, reward=1.0, next_state=[-0.0034657  -0.49256948 -0.15019015 -0.13828989]
[ Experience replay ] starts
[ episode 82 ][ timestamp 38 ] state=[-0.0034657  -0.49256948 -0.15019015 -0.13828989], action=1, reward=1.0, next_state=[-0.01331709 -0.2956512  -0.15295595 -0.47433223]
[ Experience replay ] starts
[ episode 82 ][ timestamp 39 ] state=[-0.01331709 -0.2956512  -0.15295595 -0.47433223], action=0, reward=1.0, next_state=[-0.01923012 -0.48831978 -0.1624426  -0.23349737]
[ Experience replay ] starts
[ episode 82 ][ timestamp 40 ] state=[-0.01923012 -0.48831978 -0.1624426  -0.23349737], action=1, reward=1.0, next_state=[-0.02899651 -0.29129462 -0.16711254 -0.57269279]
[ Experience replay ] starts


[ episode 82 ][ timestamp 78 ] state=[-0.99894062 -1.5688614  -0.15013772 -0.4684244 ], action=0, reward=1.0, next_state=[-1.03031785 -1.76157891 -0.15950621 -0.22657495]
[ Experience replay ] starts
[ episode 82 ][ timestamp 79 ] state=[-1.03031785 -1.76157891 -0.15950621 -0.22657495], action=1, reward=1.0, next_state=[-1.06554943 -1.5645796  -0.16403771 -0.56501902]
[ Experience replay ] starts
[ episode 82 ][ timestamp 80 ] state=[-1.06554943 -1.5645796  -0.16403771 -0.56501902], action=0, reward=1.0, next_state=[-1.09684102 -1.75706625 -0.17533809 -0.32817607]
[ Experience replay ] starts
[ episode 82 ][ timestamp 81 ] state=[-1.09684102 -1.75706625 -0.17533809 -0.32817607], action=0, reward=1.0, next_state=[-1.13198234 -1.94931509 -0.18190161 -0.09550992]
[ Experience replay ] starts
[ episode 82 ][ timestamp 82 ] state=[-1.13198234 -1.94931509 -0.18190161 -0.09550992], action=1, reward=1.0, next_state=[-1.17096865 -1.75211538 -0.18381181 -0.43961388]
[ Experience replay ] starts


[ episode 86 ][ timestamp 4 ] state=[ 0.0283496   0.55219668 -0.02186111 -0.92469095], action=1, reward=1.0, next_state=[ 0.03939353  0.74760699 -0.04035492 -1.22416302]
[ Experience replay ] starts
[ episode 86 ][ timestamp 5 ] state=[ 0.03939353  0.74760699 -0.04035492 -1.22416302], action=1, reward=1.0, next_state=[ 0.05434567  0.94322478 -0.06483819 -1.52921194]
[ Experience replay ] starts
[ episode 86 ][ timestamp 6 ] state=[ 0.05434567  0.94322478 -0.06483819 -1.52921194], action=1, reward=1.0, next_state=[ 0.07321017  1.13906602 -0.09542242 -1.84140561]
[ Experience replay ] starts
[ episode 86 ][ timestamp 7 ] state=[ 0.07321017  1.13906602 -0.09542242 -1.84140561], action=0, reward=1.0, next_state=[ 0.09599149  0.94511792 -0.13225054 -1.57981857]
[ Experience replay ] starts
[ episode 86 ][ timestamp 8 ] state=[ 0.09599149  0.94511792 -0.13225054 -1.57981857], action=1, reward=1.0, next_state=[ 0.11489385  1.14154349 -0.16384691 -1.91065246]
[ Experience replay ] starts
[ epi

[ episode 90 ][ timestamp 7 ] state=[ 0.07482647  1.1533872  -0.10371996 -1.78856674], action=1, reward=1.0, next_state=[ 0.09789421  1.34950905 -0.1394913  -2.11160758]
[ Experience replay ] starts
[ episode 90 ][ timestamp 8 ] state=[ 0.09789421  1.34950905 -0.1394913  -2.11160758], action=1, reward=1.0, next_state=[ 0.12488439  1.54572327 -0.18172345 -2.44394769]
[ Experience replay ] starts
[ episode 90 ][ timestamp 9 ] state=[ 0.12488439  1.54572327 -0.18172345 -2.44394769], action=1, reward=-1.0, next_state=[ 0.15579886  1.74187251 -0.23060241 -2.7864599 ]
[ Experience replay ] starts
[ Ended! ] Episode 90: Exploration_rate=0.01. Score=9.
[ episode 91 ] state=[ 0.01318458 -0.0299232  -0.00424924 -0.02295416]
[ episode 91 ][ timestamp 1 ] state=[ 0.01318458 -0.0299232  -0.00424924 -0.02295416], action=1, reward=1.0, next_state=[ 0.01258611  0.16525943 -0.00470832 -0.31697473]
[ Experience replay ] starts
[ episode 91 ][ timestamp 2 ] state=[ 0.01258611  0.16525943 -0.00470832 -0.3

[ episode 94 ][ timestamp 10 ] state=[-0.05640022 -0.22629727  0.02583822  0.20971818], action=1, reward=1.0, next_state=[-0.06092617 -0.03155411  0.03003259 -0.07470345]
[ Experience replay ] starts
[ episode 94 ][ timestamp 11 ] state=[-0.06092617 -0.03155411  0.03003259 -0.07470345], action=1, reward=1.0, next_state=[-0.06155725  0.1631247   0.02853852 -0.35776174]
[ Experience replay ] starts
[ episode 94 ][ timestamp 12 ] state=[-0.06155725  0.1631247   0.02853852 -0.35776174], action=1, reward=1.0, next_state=[-0.05829476  0.35782956  0.02138328 -0.64131091]
[ Experience replay ] starts
[ episode 94 ][ timestamp 13 ] state=[-0.05829476  0.35782956  0.02138328 -0.64131091], action=1, reward=1.0, next_state=[-0.05113817  0.552647    0.00855706 -0.92718406]
[ Experience replay ] starts
[ episode 94 ][ timestamp 14 ] state=[-0.05113817  0.552647    0.00855706 -0.92718406], action=0, reward=1.0, next_state=[-0.04008523  0.35741055 -0.00998662 -0.63182437]
[ Experience replay ] starts


[ episode 97 ][ timestamp 9 ] state=[ 0.04956728 -0.01025529 -0.01834648 -0.00767536], action=1, reward=1.0, next_state=[ 0.04936218  0.18512491 -0.01849999 -0.30608989]
[ Experience replay ] starts
[ episode 97 ][ timestamp 10 ] state=[ 0.04936218  0.18512491 -0.01849999 -0.30608989], action=1, reward=1.0, next_state=[ 0.05306467  0.38050553 -0.02462179 -0.60454936]
[ Experience replay ] starts
[ episode 97 ][ timestamp 11 ] state=[ 0.05306467  0.38050553 -0.02462179 -0.60454936], action=1, reward=1.0, next_state=[ 0.06067479  0.57596301 -0.03671277 -0.90488478]
[ Experience replay ] starts
[ episode 97 ][ timestamp 12 ] state=[ 0.06067479  0.57596301 -0.03671277 -0.90488478], action=0, reward=1.0, next_state=[ 0.07219405  0.38135696 -0.05481047 -0.62396354]
[ Experience replay ] starts
[ episode 97 ][ timestamp 13 ] state=[ 0.07219405  0.38135696 -0.05481047 -0.62396354], action=0, reward=1.0, next_state=[ 0.07982118  0.18704136 -0.06728974 -0.34903406]
[ Experience replay ] starts
[

[ episode 97 ][ timestamp 54 ] state=[-0.07601954  0.58068025 -0.02585509 -1.00870859], action=0, reward=1.0, next_state=[-0.06440594  0.3859128  -0.04602926 -0.7242556 ]
[ Experience replay ] starts
[ episode 97 ][ timestamp 55 ] state=[-0.06440594  0.3859128  -0.04602926 -0.7242556 ], action=0, reward=1.0, next_state=[-0.05668768  0.19145662 -0.06051437 -0.44640809]
[ Experience replay ] starts
[ episode 97 ][ timestamp 56 ] state=[-0.05668768  0.19145662 -0.06051437 -0.44640809], action=1, reward=1.0, next_state=[-0.05285855  0.38738013 -0.06944253 -0.75753578]
[ Experience replay ] starts
[ episode 97 ][ timestamp 57 ] state=[-0.05285855  0.38738013 -0.06944253 -0.75753578], action=1, reward=1.0, next_state=[-0.04511095  0.58338689 -0.08459325 -1.07123702]
[ Experience replay ] starts
[ episode 97 ][ timestamp 58 ] state=[-0.04511095  0.58338689 -0.08459325 -1.07123702], action=0, reward=1.0, next_state=[-0.03344321  0.38947905 -0.10601799 -0.8062561 ]
[ Experience replay ] starts


[ episode 98 ][ timestamp 30 ] state=[ 0.09065909  0.22859164 -0.01246322 -0.31613971], action=1, reward=1.0, next_state=[ 0.09523092  0.42388888 -0.01878601 -0.61272691]
[ Experience replay ] starts
[ episode 98 ][ timestamp 31 ] state=[ 0.09523092  0.42388888 -0.01878601 -0.61272691], action=0, reward=1.0, next_state=[ 0.1037087   0.22903443 -0.03104055 -0.32601958]
[ Experience replay ] starts
[ episode 98 ][ timestamp 32 ] state=[ 0.1037087   0.22903443 -0.03104055 -0.32601958], action=0, reward=1.0, next_state=[ 0.10828939  0.03436786 -0.03756094 -0.04328484]
[ Experience replay ] starts
[ episode 98 ][ timestamp 33 ] state=[ 0.10828939  0.03436786 -0.03756094 -0.04328484], action=1, reward=1.0, next_state=[ 0.10897675  0.23000773 -0.03842664 -0.34757798]
[ Experience replay ] starts
[ episode 98 ][ timestamp 34 ] state=[ 0.10897675  0.23000773 -0.03842664 -0.34757798], action=1, reward=1.0, next_state=[ 0.1135769   0.42565458 -0.0453782  -0.65212626]
[ Experience replay ] starts


[ episode 98 ][ timestamp 72 ] state=[ 0.0061023  -0.496944   -0.19534229 -0.36415152], action=1, reward=1.0, next_state=[-0.00383658 -0.29966026 -0.20262532 -0.7115151 ]
[ Experience replay ] starts
[ episode 98 ][ timestamp 73 ] state=[-0.00383658 -0.29966026 -0.20262532 -0.7115151 ], action=0, reward=-1.0, next_state=[-0.00982979 -0.49148611 -0.21685562 -0.48882802]
[ Experience replay ] starts
[ Ended! ] Episode 98: Exploration_rate=0.01. Score=73.
[ episode 99 ] state=[-0.00680511 -0.01631518  0.0374257   0.02566912]
[ episode 99 ][ timestamp 1 ] state=[-0.00680511 -0.01631518  0.0374257   0.02566912], action=0, reward=1.0, next_state=[-0.00713141 -0.2119533   0.03793908  0.3299214 ]
[ Experience replay ] starts
[ episode 99 ][ timestamp 2 ] state=[-0.00713141 -0.2119533   0.03793908  0.3299214 ], action=1, reward=1.0, next_state=[-0.01137048 -0.01739138  0.04453751  0.04943995]
[ Experience replay ] starts
[ episode 99 ][ timestamp 3 ] state=[-0.01137048 -0.01739138  0.04453751  

[ episode 99 ][ timestamp 42 ] state=[ 0.0121541  -0.55700303 -0.17423687 -0.08296492], action=0, reward=1.0, next_state=[ 0.00101404 -0.74925485 -0.17589616  0.15007966]
[ Experience replay ] starts
[ episode 99 ][ timestamp 43 ] state=[ 0.00101404 -0.74925485 -0.17589616  0.15007966], action=1, reward=1.0, next_state=[-0.01397106 -0.55210757 -0.17289457 -0.19252552]
[ Experience replay ] starts
[ episode 99 ][ timestamp 44 ] state=[-0.01397106 -0.55210757 -0.17289457 -0.19252552], action=1, reward=1.0, next_state=[-0.02501321 -0.35498827 -0.17674508 -0.53437431]
[ Experience replay ] starts
[ episode 99 ][ timestamp 45 ] state=[-0.02501321 -0.35498827 -0.17674508 -0.53437431], action=0, reward=1.0, next_state=[-0.03211298 -0.547242   -0.18743257 -0.30217929]
[ Experience replay ] starts
[ episode 99 ][ timestamp 46 ] state=[-0.03211298 -0.547242   -0.18743257 -0.30217929], action=0, reward=1.0, next_state=[-0.04305782 -0.73926715 -0.19347615 -0.07396938]
[ Experience replay ] starts


[ episode 100 ][ timestamp 36 ] state=[-0.07159252 -0.20819213 -0.1078967  -0.39419647], action=1, reward=1.0, next_state=[-0.07575636 -0.01171781 -0.11578063 -0.71885427]
[ Experience replay ] starts
[ episode 100 ][ timestamp 37 ] state=[-0.07575636 -0.01171781 -0.11578063 -0.71885427], action=0, reward=1.0, next_state=[-0.07599072 -0.20506354 -0.13015771 -0.46474089]
[ Experience replay ] starts
[ episode 100 ][ timestamp 38 ] state=[-0.07599072 -0.20506354 -0.13015771 -0.46474089], action=0, reward=1.0, next_state=[-0.08009199 -0.39812922 -0.13945253 -0.21575036]
[ Experience replay ] starts
[ episode 100 ][ timestamp 39 ] state=[-0.08009199 -0.39812922 -0.13945253 -0.21575036], action=0, reward=1.0, next_state=[-0.08805457 -0.59101067 -0.14376754  0.02989686]
[ Experience replay ] starts
[ episode 100 ][ timestamp 40 ] state=[-0.08805457 -0.59101067 -0.14376754  0.02989686], action=0, reward=1.0, next_state=[-0.09987478 -0.78380963 -0.1431696   0.27398949]
[ Experience replay ] st

[ episode 102 ][ timestamp 12 ] state=[-0.04725056 -0.17919863  0.02750954  0.3009758 ], action=1, reward=1.0, next_state=[-0.05083453  0.01552065  0.03352906  0.01709419]
[ Experience replay ] starts
[ episode 102 ][ timestamp 13 ] state=[-0.05083453  0.01552065  0.03352906  0.01709419], action=1, reward=1.0, next_state=[-0.05052412  0.21014611  0.03387094 -0.26482422]
[ Experience replay ] starts
[ episode 102 ][ timestamp 14 ] state=[-0.05052412  0.21014611  0.03387094 -0.26482422], action=0, reward=1.0, next_state=[-0.04632119  0.0145575   0.02857446  0.03834657]
[ Experience replay ] starts
[ episode 102 ][ timestamp 15 ] state=[-0.04632119  0.0145575   0.02857446  0.03834657], action=1, reward=1.0, next_state=[-0.04603004  0.20925829  0.02934139 -0.24518564]
[ Experience replay ] starts
[ episode 102 ][ timestamp 16 ] state=[-0.04603004  0.20925829  0.02934139 -0.24518564], action=1, reward=1.0, next_state=[-0.04184488  0.40394914  0.02443768 -0.52847109]
[ Experience replay ] st

[ episode 102 ][ timestamp 56 ] state=[-0.26427525 -1.29140109 -0.00293847  0.76642321], action=0, reward=1.0, next_state=[-0.29010327 -1.48648246  0.01239     1.05818009]
[ Experience replay ] starts
[ episode 102 ][ timestamp 57 ] state=[-0.29010327 -1.48648246  0.01239     1.05818009], action=1, reward=1.0, next_state=[-0.31983292 -1.29152683  0.0335536   0.76941167]
[ Experience replay ] starts
[ episode 102 ][ timestamp 58 ] state=[-0.31983292 -1.29152683  0.0335536   0.76941167], action=0, reward=1.0, next_state=[-0.34566346 -1.48709415  0.04894183  1.07246043]
[ Experience replay ] starts
[ episode 102 ][ timestamp 59 ] state=[-0.34566346 -1.48709415  0.04894183  1.07246043], action=0, reward=1.0, next_state=[-0.37540534 -1.6828277   0.07039104  1.38009236]
[ Experience replay ] starts
[ episode 102 ][ timestamp 60 ] state=[-0.37540534 -1.6828277   0.07039104  1.38009236], action=0, reward=1.0, next_state=[-0.4090619  -1.87875423  0.09799289  1.69393222]
[ Experience replay ] st

[ episode 103 ][ timestamp 36 ] state=[ 0.03487988 -0.18378853  0.16246009  0.90418344], action=1, reward=1.0, next_state=[0.03120411 0.00880461 0.18054376 0.66665115]
[ Experience replay ] starts
[ episode 103 ][ timestamp 37 ] state=[0.03120411 0.00880461 0.18054376 0.66665115], action=0, reward=1.0, next_state=[ 0.0313802  -0.1883078   0.19387678  1.010306  ]
[ Experience replay ] starts
[ episode 103 ][ timestamp 38 ] state=[ 0.0313802  -0.1883078   0.19387678  1.010306  ], action=1, reward=-1.0, next_state=[0.02761404 0.00377298 0.2140829  0.78422623]
[ Experience replay ] starts
[ Ended! ] Episode 103: Exploration_rate=0.01. Score=38.
[ episode 104 ] state=[ 0.04166393  0.01334501 -0.01966819  0.0349914 ]
[ episode 104 ][ timestamp 1 ] state=[ 0.04166393  0.01334501 -0.01966819  0.0349914 ], action=1, reward=1.0, next_state=[ 0.04193083  0.2087434  -0.01896836 -0.26383158]
[ Experience replay ] starts
[ episode 104 ][ timestamp 2 ] state=[ 0.04193083  0.2087434  -0.01896836 -0.26

[ episode 105 ][ timestamp 10 ] state=[-0.016645    0.18828302 -0.0015243  -0.27178116], action=0, reward=1.0, next_state=[-0.01287934 -0.00681715 -0.00695992  0.0204206 ]
[ Experience replay ] starts
[ episode 105 ][ timestamp 11 ] state=[-0.01287934 -0.00681715 -0.00695992  0.0204206 ], action=0, reward=1.0, next_state=[-0.01301569 -0.2018386  -0.00655151  0.31089949]
[ Experience replay ] starts
[ episode 105 ][ timestamp 12 ] state=[-0.01301569 -0.2018386  -0.00655151  0.31089949], action=0, reward=1.0, next_state=[-1.70524588e-02 -3.96866600e-01 -3.33517542e-04  6.01509088e-01]
[ Experience replay ] starts
[ episode 105 ][ timestamp 13 ] state=[-1.70524588e-02 -3.96866600e-01 -3.33517542e-04  6.01509088e-01], action=1, reward=1.0, next_state=[-0.02498979 -0.20173998  0.01169666  0.30872113]
[ Experience replay ] starts
[ episode 105 ][ timestamp 14 ] state=[-0.02498979 -0.20173998  0.01169666  0.30872113], action=0, reward=1.0, next_state=[-0.02902459 -0.39702663  0.01787109  0.60

[ episode 107 ][ timestamp 9 ] state=[-0.04915217 -0.81330621  0.06759487  1.16781098], action=0, reward=1.0, next_state=[-0.0654183  -1.00923935  0.09095108  1.48089728]
[ Experience replay ] starts
[ episode 107 ][ timestamp 10 ] state=[-0.0654183  -1.00923935  0.09095108  1.48089728], action=0, reward=1.0, next_state=[-0.08560309 -1.20534579  0.12056903  1.80054389]
[ Experience replay ] starts
[ episode 107 ][ timestamp 11 ] state=[-0.08560309 -1.20534579  0.12056903  1.80054389], action=0, reward=1.0, next_state=[-0.10971    -1.40159201  0.15657991  2.12813769]
[ Experience replay ] starts
[ episode 107 ][ timestamp 12 ] state=[-0.10971    -1.40159201  0.15657991  2.12813769], action=0, reward=1.0, next_state=[-0.13774184 -1.59788473  0.19914266  2.46482132]
[ Experience replay ] starts
[ episode 107 ][ timestamp 13 ] state=[-0.13774184 -1.59788473  0.19914266  2.46482132], action=0, reward=-1.0, next_state=[-0.16969954 -1.79405344  0.24843909  2.81142067]
[ Experience replay ] st

[ episode 109 ][ timestamp 28 ] state=[ 0.05915337  0.21800923  0.01351593 -0.2591664 ], action=1, reward=1.0, next_state=[ 0.06351355  0.41293565  0.0083326  -0.54755576]
[ Experience replay ] starts
[ episode 109 ][ timestamp 29 ] state=[ 0.06351355  0.41293565  0.0083326  -0.54755576], action=0, reward=1.0, next_state=[ 0.07177226  0.21769763 -0.00261852 -0.25225914]
[ Experience replay ] starts
[ episode 109 ][ timestamp 30 ] state=[ 0.07177226  0.21769763 -0.00261852 -0.25225914], action=0, reward=1.0, next_state=[ 0.07612622  0.02261317 -0.0076637   0.03959671]
[ Experience replay ] starts
[ episode 109 ][ timestamp 31 ] state=[ 0.07612622  0.02261317 -0.0076637   0.03959671], action=1, reward=1.0, next_state=[ 0.07657848  0.21784417 -0.00687177 -0.25549431]
[ Experience replay ] starts
[ episode 109 ][ timestamp 32 ] state=[ 0.07657848  0.21784417 -0.00687177 -0.25549431], action=1, reward=1.0, next_state=[ 0.08093536  0.41306356 -0.01198165 -0.55033676]
[ Experience replay ] st

[ episode 109 ][ timestamp 70 ] state=[-0.05280685 -0.52757374 -0.03278972  0.14183301], action=0, reward=1.0, next_state=[-0.06335833 -0.72221111 -0.02995306  0.42399368]
[ Experience replay ] starts
[ episode 109 ][ timestamp 71 ] state=[-0.06335833 -0.72221111 -0.02995306  0.42399368], action=0, reward=1.0, next_state=[-0.07780255 -0.91689622 -0.02147319  0.70708547]
[ Experience replay ] starts
[ episode 109 ][ timestamp 72 ] state=[-0.07780255 -0.91689622 -0.02147319  0.70708547], action=1, reward=1.0, next_state=[-0.09614047 -0.72148347 -0.00733148  0.4077213 ]
[ Experience replay ] starts
[ episode 109 ][ timestamp 73 ] state=[-0.09614047 -0.72148347 -0.00733148  0.4077213 ], action=0, reward=1.0, next_state=[-1.10570141e-01 -9.16500707e-01  8.22947335e-04  6.98083848e-01]
[ Experience replay ] starts
[ episode 109 ][ timestamp 74 ] state=[-1.10570141e-01 -9.16500707e-01  8.22947335e-04  6.98083848e-01], action=0, reward=1.0, next_state=[-0.12890016 -1.11163406  0.01478462  0.99

[ episode 109 ][ timestamp 113 ] state=[-0.24135809  0.42374948 -0.06392285 -0.78725914], action=0, reward=1.0, next_state=[-0.2328831   0.22956114 -0.07966803 -0.51535205]
[ Experience replay ] starts
[ episode 109 ][ timestamp 114 ] state=[-0.2328831   0.22956114 -0.07966803 -0.51535205], action=0, reward=1.0, next_state=[-0.22829188  0.03564616 -0.08997507 -0.24879981]
[ Experience replay ] starts
[ episode 109 ][ timestamp 115 ] state=[-0.22829188  0.03564616 -0.08997507 -0.24879981], action=0, reward=1.0, next_state=[-0.22757895 -0.15808335 -0.09495107  0.014202  ]
[ Experience replay ] starts
[ episode 109 ][ timestamp 116 ] state=[-0.22757895 -0.15808335 -0.09495107  0.014202  ], action=1, reward=1.0, next_state=[-0.23074062  0.03826301 -0.09466703 -0.30686458]
[ Experience replay ] starts
[ episode 109 ][ timestamp 117 ] state=[-0.23074062  0.03826301 -0.09466703 -0.30686458], action=0, reward=1.0, next_state=[-0.22997536 -0.15539145 -0.10080432 -0.0454741 ]
[ Experience replay

[ episode 110 ][ timestamp 5 ] state=[-0.06752146 -0.81992179  0.0083302   1.12488452], action=0, reward=1.0, next_state=[-0.0839199  -1.01515193  0.03082789  1.42016862]
[ Experience replay ] starts
[ episode 110 ][ timestamp 6 ] state=[-0.0839199  -1.01515193  0.03082789  1.42016862], action=1, reward=1.0, next_state=[-0.10422294 -0.8204247   0.05923126  1.13727852]
[ Experience replay ] starts
[ episode 110 ][ timestamp 7 ] state=[-0.10422294 -0.8204247   0.05923126  1.13727852], action=0, reward=1.0, next_state=[-0.12063143 -1.01626922  0.08197683  1.44793394]
[ Experience replay ] starts
[ episode 110 ][ timestamp 8 ] state=[-0.12063143 -1.01626922  0.08197683  1.44793394], action=1, reward=1.0, next_state=[-0.14095682 -0.82224538  0.11093551  1.18194975]
[ Experience replay ] starts
[ episode 110 ][ timestamp 9 ] state=[-0.14095682 -0.82224538  0.11093551  1.18194975], action=1, reward=1.0, next_state=[-0.15740172 -0.62872405  0.1345745   0.92600031]
[ Experience replay ] starts


[ episode 111 ][ timestamp 4 ] state=[-0.04132662  0.19032089  0.05284659 -0.25620736], action=0, reward=1.0, next_state=[-0.0375202  -0.00551419  0.04772244  0.05266482]
[ Experience replay ] starts
[ episode 111 ][ timestamp 5 ] state=[-0.0375202  -0.00551419  0.04772244  0.05266482], action=0, reward=1.0, next_state=[-0.03763049 -0.20128678  0.04877574  0.36001446]
[ Experience replay ] starts
[ episode 111 ][ timestamp 6 ] state=[-0.03763049 -0.20128678  0.04877574  0.36001446], action=1, reward=1.0, next_state=[-0.04165622 -0.00689088  0.05597603  0.08310179]
[ Experience replay ] starts
[ episode 111 ][ timestamp 7 ] state=[-0.04165622 -0.00689088  0.05597603  0.08310179], action=1, reward=1.0, next_state=[-0.04179404  0.18738585  0.05763806 -0.19140852]
[ Experience replay ] starts
[ episode 111 ][ timestamp 8 ] state=[-0.04179404  0.18738585  0.05763806 -0.19140852], action=1, reward=1.0, next_state=[-0.03804632  0.3816379   0.05380989 -0.46536653]
[ Experience replay ] starts


[ episode 112 ][ timestamp 4 ] state=[-0.04760779 -0.1649084   0.02065481  0.31800394], action=1, reward=1.0, next_state=[-0.05090596  0.02991338  0.02701489  0.0319057 ]
[ Experience replay ] starts
[ episode 112 ][ timestamp 5 ] state=[-0.05090596  0.02991338  0.02701489  0.0319057 ], action=1, reward=1.0, next_state=[-0.05030769  0.22463771  0.027653   -0.25213282]
[ Experience replay ] starts
[ episode 112 ][ timestamp 6 ] state=[-0.05030769  0.22463771  0.027653   -0.25213282], action=1, reward=1.0, next_state=[-0.04581493  0.4193541   0.02261034 -0.5359668 ]
[ Experience replay ] starts
[ episode 112 ][ timestamp 7 ] state=[-0.04581493  0.4193541   0.02261034 -0.5359668 ], action=1, reward=1.0, next_state=[-0.03742785  0.61415095  0.01189101 -0.82144051]
[ Experience replay ] starts
[ episode 112 ][ timestamp 8 ] state=[-0.03742785  0.61415095  0.01189101 -0.82144051], action=0, reward=1.0, next_state=[-0.02514483  0.41886833 -0.0045378  -0.52504141]
[ Experience replay ] starts


[ episode 113 ][ timestamp 21 ] state=[-0.12445961  0.34724229  0.20619126 -0.01618193], action=0, reward=1.0, next_state=[-0.11751476  0.14985181  0.20586763  0.33382364]
[ Experience replay ] starts
[ episode 113 ][ timestamp 22 ] state=[-0.11751476  0.14985181  0.20586763  0.33382364], action=0, reward=-1.0, next_state=[-0.11451773 -0.04751419  0.2125441   0.68371974]
[ Experience replay ] starts
[ Ended! ] Episode 113: Exploration_rate=0.01. Score=22.
[ episode 114 ] state=[-0.00087059 -0.01127649  0.01616434 -0.01784859]
[ episode 114 ][ timestamp 1 ] state=[-0.00087059 -0.01127649  0.01616434 -0.01784859], action=1, reward=1.0, next_state=[-0.00109612  0.18360996  0.01580736 -0.30538797]
[ Experience replay ] starts
[ episode 114 ][ timestamp 2 ] state=[-0.00109612  0.18360996  0.01580736 -0.30538797], action=1, reward=1.0, next_state=[ 0.00257608  0.37850312  0.00969961 -0.59304402]
[ Experience replay ] starts
[ episode 114 ][ timestamp 3 ] state=[ 0.00257608  0.37850312  0.009

[ episode 114 ][ timestamp 43 ] state=[ 0.00957081 -0.74030771 -0.19838875  0.01857082], action=0, reward=1.0, next_state=[-0.00523534 -0.93211345 -0.19801734  0.2426917 ]
[ Experience replay ] starts
[ episode 114 ][ timestamp 44 ] state=[-0.00523534 -0.93211345 -0.19801734  0.2426917 ], action=0, reward=1.0, next_state=[-0.02387761 -1.1239373  -0.1931635   0.46696731]
[ Experience replay ] starts
[ episode 114 ][ timestamp 45 ] state=[-0.02387761 -1.1239373  -0.1931635   0.46696731], action=1, reward=1.0, next_state=[-0.04635635 -0.9266862  -0.18382416  0.12015584]
[ Experience replay ] starts
[ episode 114 ][ timestamp 46 ] state=[-0.04635635 -0.9266862  -0.18382416  0.12015584], action=0, reward=1.0, next_state=[-0.06489008 -1.11876381 -0.18142104  0.3496776 ]
[ Experience replay ] starts
[ episode 114 ][ timestamp 47 ] state=[-0.06489008 -1.11876381 -0.18142104  0.3496776 ], action=0, reward=1.0, next_state=[-0.08726535 -1.31090448 -0.17442749  0.58011289]
[ Experience replay ] st

[ episode 114 ][ timestamp 84 ] state=[-1.10799196 -1.63280132 -0.15296295 -0.35536729], action=1, reward=1.0, next_state=[-1.14064798 -1.43587301 -0.1600703  -0.69210668]
[ Experience replay ] starts
[ episode 114 ][ timestamp 85 ] state=[-1.14064798 -1.43587301 -0.1600703  -0.69210668], action=0, reward=1.0, next_state=[-1.16936544 -1.62845484 -0.17391243 -0.45378682]
[ Experience replay ] starts
[ episode 114 ][ timestamp 86 ] state=[-1.16936544 -1.62845484 -0.17391243 -0.45378682], action=1, reward=1.0, next_state=[-1.20193454 -1.4313553  -0.18298817 -0.79584925]
[ Experience replay ] starts
[ episode 114 ][ timestamp 87 ] state=[-1.20193454 -1.4313553  -0.18298817 -0.79584925], action=0, reward=1.0, next_state=[-1.23056165 -1.62355788 -0.19890515 -0.56585759]
[ Experience replay ] starts
[ episode 114 ][ timestamp 88 ] state=[-1.23056165 -1.62355788 -0.19890515 -0.56585759], action=0, reward=-1.0, next_state=[-1.26303281 -1.8154155  -0.21022231 -0.34183859]
[ Experience replay ] s

[ episode 115 ][ timestamp 40 ] state=[-0.22907936 -0.53266839  0.08894456  0.22813782], action=1, reward=1.0, next_state=[-0.23973272 -0.33892267  0.09350732 -0.03521673]
[ Experience replay ] starts
[ episode 115 ][ timestamp 41 ] state=[-0.23973272 -0.33892267  0.09350732 -0.03521673], action=1, reward=1.0, next_state=[-0.24651118 -0.14525737  0.09280298 -0.29699449]
[ Experience replay ] starts
[ episode 115 ][ timestamp 42 ] state=[-0.24651118 -0.14525737  0.09280298 -0.29699449], action=0, reward=1.0, next_state=[-0.24941633 -0.34157129  0.08686309  0.02345418]
[ Experience replay ] starts
[ episode 115 ][ timestamp 43 ] state=[-0.24941633 -0.34157129  0.08686309  0.02345418], action=1, reward=1.0, next_state=[-0.25624775 -0.14779549  0.08733218 -0.240608  ]
[ Experience replay ] starts
[ episode 115 ][ timestamp 44 ] state=[-0.25624775 -0.14779549  0.08733218 -0.240608  ], action=1, reward=1.0, next_state=[-0.25920366  0.04597741  0.08252002 -0.5045166 ]
[ Experience replay ] st

[ episode 115 ][ timestamp 84 ] state=[-0.55637214 -1.26772728 -0.10346238  0.39856435], action=1, reward=1.0, next_state=[-0.58172668 -1.0713014  -0.09549109  0.07513741]
[ Experience replay ] starts
[ episode 115 ][ timestamp 85 ] state=[-0.58172668 -1.0713014  -0.09549109  0.07513741], action=0, reward=1.0, next_state=[-0.60315271 -1.2649339  -0.09398834  0.3362312 ]
[ Experience replay ] starts
[ episode 115 ][ timestamp 86 ] state=[-0.60315271 -1.2649339  -0.09398834  0.3362312 ], action=0, reward=1.0, next_state=[-0.62845139 -1.45860138 -0.08726372  0.59785834]
[ Experience replay ] starts
[ episode 115 ][ timestamp 87 ] state=[-0.62845139 -1.45860138 -0.08726372  0.59785834], action=1, reward=1.0, next_state=[-0.65762342 -1.26237374 -0.07530655  0.27901387]
[ Experience replay ] starts
[ episode 115 ][ timestamp 88 ] state=[-0.65762342 -1.26237374 -0.07530655  0.27901387], action=0, reward=1.0, next_state=[-0.68287089 -1.45634514 -0.06972628  0.54702714]
[ Experience replay ] st

[ episode 115 ][ timestamp 125 ] state=[-1.52199614 -0.68813225  0.03540814 -0.35475771], action=0, reward=1.0, next_state=[-1.53575879 -0.8837393   0.02831299 -0.05112323]
[ Experience replay ] starts
[ episode 115 ][ timestamp 126 ] state=[-1.53575879 -0.8837393   0.02831299 -0.05112323], action=1, reward=1.0, next_state=[-1.55343357 -0.68903452  0.02729052 -0.33474043]
[ Experience replay ] starts
[ episode 115 ][ timestamp 127 ] state=[-1.55343357 -0.68903452  0.02729052 -0.33474043], action=0, reward=1.0, next_state=[-1.56721426 -0.88453403  0.02059572 -0.03357794]
[ Experience replay ] starts
[ episode 115 ][ timestamp 128 ] state=[-1.56721426 -0.88453403  0.02059572 -0.03357794], action=1, reward=1.0, next_state=[-1.58490495 -0.68971339  0.01992416 -0.31969221]
[ Experience replay ] starts
[ episode 115 ][ timestamp 129 ] state=[-1.58490495 -0.68971339  0.01992416 -0.31969221], action=1, reward=1.0, next_state=[-1.59869921 -0.49488078  0.01353031 -0.60602581]
[ Experience replay

[ episode 116 ][ timestamp 11 ] state=[ 0.00651182 -0.02953847  0.01558676 -0.02315013], action=0, reward=1.0, next_state=[ 0.00592105 -0.22488044  0.01512376  0.27440955]
[ Experience replay ] starts
[ episode 116 ][ timestamp 12 ] state=[ 0.00592105 -0.22488044  0.01512376  0.27440955], action=1, reward=1.0, next_state=[ 0.00142345 -0.02997751  0.02061195 -0.0134652 ]
[ Experience replay ] starts
[ episode 116 ][ timestamp 13 ] state=[ 0.00142345 -0.02997751  0.02061195 -0.0134652 ], action=1, reward=1.0, next_state=[ 0.0008239   0.16484287  0.02034264 -0.29957421]
[ Experience replay ] starts
[ episode 116 ][ timestamp 14 ] state=[ 0.0008239   0.16484287  0.02034264 -0.29957421], action=1, reward=1.0, next_state=[ 0.00412075  0.35966904  0.01435116 -0.58577268]
[ Experience replay ] starts
[ episode 116 ][ timestamp 15 ] state=[ 0.00412075  0.35966904  0.01435116 -0.58577268], action=0, reward=1.0, next_state=[ 0.01131413  0.16434905  0.0026357  -0.28860376]
[ Experience replay ] st

[ episode 116 ][ timestamp 54 ] state=[-0.05094893 -0.22578575 -0.01917004  0.294454  ], action=1, reward=1.0, next_state=[-0.05546464 -0.03039581 -0.01328096 -0.0042127 ]
[ Experience replay ] starts
[ episode 116 ][ timestamp 55 ] state=[-0.05546464 -0.03039581 -0.01328096 -0.0042127 ], action=0, reward=1.0, next_state=[-0.05607256 -0.2253248  -0.01336521  0.28425051]
[ Experience replay ] starts
[ episode 116 ][ timestamp 56 ] state=[-0.05607256 -0.2253248  -0.01336521  0.28425051], action=1, reward=1.0, next_state=[-0.06057905 -0.0300148  -0.0076802  -0.01261758]
[ Experience replay ] starts
[ episode 116 ][ timestamp 57 ] state=[-0.06057905 -0.0300148  -0.0076802  -0.01261758], action=0, reward=1.0, next_state=[-0.06117935 -0.22502577 -0.00793256  0.27763229]
[ Experience replay ] starts
[ episode 116 ][ timestamp 58 ] state=[-0.06117935 -0.22502577 -0.00793256  0.27763229], action=1, reward=1.0, next_state=[-0.06567986 -0.02979156 -0.00237991 -0.01754197]
[ Experience replay ] st

[ episode 116 ][ timestamp 95 ] state=[ 0.02159158  0.7291257  -0.00657338 -0.71255066], action=0, reward=1.0, next_state=[ 0.0361741   0.53409538 -0.02082439 -0.42194406]
[ Experience replay ] starts
[ episode 116 ][ timestamp 96 ] state=[ 0.0361741   0.53409538 -0.02082439 -0.42194406], action=0, reward=1.0, next_state=[ 0.04685601  0.33927455 -0.02926327 -0.13589812]
[ Experience replay ] starts
[ episode 116 ][ timestamp 97 ] state=[ 0.04685601  0.33927455 -0.02926327 -0.13589812], action=0, reward=1.0, next_state=[ 0.0536415   0.14458371 -0.03198123  0.14741094]
[ Experience replay ] starts
[ episode 116 ][ timestamp 98 ] state=[ 0.0536415   0.14458371 -0.03198123  0.14741094], action=1, reward=1.0, next_state=[ 0.05653317  0.3401487  -0.02903301 -0.15518741]
[ Experience replay ] starts
[ episode 116 ][ timestamp 99 ] state=[ 0.05653317  0.3401487  -0.02903301 -0.15518741], action=1, reward=1.0, next_state=[ 0.06333615  0.53567405 -0.03213676 -0.45688635]
[ Experience replay ] st

[ episode 117 ][ timestamp 31 ] state=[-0.11524991  0.05256215  0.10667135 -0.09573074], action=1, reward=1.0, next_state=[-0.11419866  0.24600636  0.10475674 -0.35294581]
[ Experience replay ] starts
[ episode 117 ][ timestamp 32 ] state=[-0.11419866  0.24600636  0.10475674 -0.35294581], action=1, reward=1.0, next_state=[-0.10927854  0.43949482  0.09769782 -0.61084528]
[ Experience replay ] starts
[ episode 117 ][ timestamp 33 ] state=[-0.10927854  0.43949482  0.09769782 -0.61084528], action=0, reward=1.0, next_state=[-0.10048864  0.24315284  0.08548091 -0.28905924]
[ Experience replay ] starts
[ episode 117 ][ timestamp 34 ] state=[-0.10048864  0.24315284  0.08548091 -0.28905924], action=1, reward=1.0, next_state=[-0.09562558  0.43695842  0.07969973 -0.55360537]
[ Experience replay ] starts
[ episode 117 ][ timestamp 35 ] state=[-0.09562558  0.43695842  0.07969973 -0.55360537], action=0, reward=1.0, next_state=[-0.08688642  0.24081307  0.06862762 -0.23691437]
[ Experience replay ] st

[ episode 118 ][ timestamp 10 ] state=[-0.08142557 -1.00565678  0.17600307  1.68373134], action=0, reward=-1.0, next_state=[-0.10153871 -1.2023256   0.20967769  2.02565534]
[ Experience replay ] starts
[ Ended! ] Episode 118: Exploration_rate=0.01. Score=10.
[ episode 119 ] state=[ 0.04896425 -0.01667025 -0.02636561 -0.02444538]
[ episode 119 ][ timestamp 1 ] state=[ 0.04896425 -0.01667025 -0.02636561 -0.02444538], action=0, reward=1.0, next_state=[ 0.04863085 -0.21140437 -0.02685452  0.25980369]
[ Experience replay ] starts
[ episode 119 ][ timestamp 2 ] state=[ 0.04863085 -0.21140437 -0.02685452  0.25980369], action=0, reward=1.0, next_state=[ 0.04440276 -0.40613287 -0.02165844  0.54389684]
[ Experience replay ] starts
[ episode 119 ][ timestamp 3 ] state=[ 0.04440276 -0.40613287 -0.02165844  0.54389684], action=0, reward=1.0, next_state=[ 0.0362801  -0.60094387 -0.01078051  0.82967772]
[ Experience replay ] starts
[ episode 119 ][ timestamp 4 ] state=[ 0.0362801  -0.60094387 -0.0107

[ episode 119 ][ timestamp 47 ] state=[ 0.10170079 -0.02318296 -0.02622501  0.11920445], action=0, reward=1.0, next_state=[ 0.10123714 -0.21791955 -0.02384092  0.40349962]
[ Experience replay ] starts
[ episode 119 ][ timestamp 48 ] state=[ 0.10123714 -0.21791955 -0.02384092  0.40349962], action=1, reward=1.0, next_state=[ 0.09687874 -0.02246772 -0.01577092  0.10339663]
[ Experience replay ] starts
[ episode 119 ][ timestamp 49 ] state=[ 0.09687874 -0.02246772 -0.01577092  0.10339663], action=0, reward=1.0, next_state=[ 0.09642939 -0.21736015 -0.01370299  0.39106246]
[ Experience replay ] starts
[ episode 119 ][ timestamp 50 ] state=[ 0.09642939 -0.21736015 -0.01370299  0.39106246], action=1, reward=1.0, next_state=[ 0.09208219 -0.02204643 -0.00588174  0.09409084]
[ Experience replay ] starts
[ episode 119 ][ timestamp 51 ] state=[ 0.09208219 -0.02204643 -0.00588174  0.09409084], action=1, reward=1.0, next_state=[ 0.09164126  0.17315932 -0.00399993 -0.20044196]
[ Experience replay ] st

[ episode 119 ][ timestamp 96 ] state=[ 0.01375931  0.16749324  0.08523522 -0.07575547], action=0, reward=1.0, next_state=[ 0.01710918 -0.02874066  0.08372011  0.24255561]
[ Experience replay ] starts
[ episode 119 ][ timestamp 97 ] state=[ 0.01710918 -0.02874066  0.08372011  0.24255561], action=1, reward=1.0, next_state=[ 0.01653436  0.16509185  0.08857123 -0.02258984]
[ Experience replay ] starts
[ episode 119 ][ timestamp 98 ] state=[ 0.01653436  0.16509185  0.08857123 -0.02258984], action=1, reward=1.0, next_state=[ 0.0198362   0.35883932  0.08811943 -0.28606593]
[ Experience replay ] starts
[ episode 119 ][ timestamp 99 ] state=[ 0.0198362   0.35883932  0.08811943 -0.28606593], action=0, reward=1.0, next_state=[0.02701299 0.16257838 0.08239811 0.03305683]
[ Experience replay ] starts
[ episode 119 ][ timestamp 100 ] state=[0.02701299 0.16257838 0.08239811 0.03305683], action=1, reward=1.0, next_state=[ 0.03026455  0.35642797  0.08305925 -0.23253337]
[ Experience replay ] starts
[ 

[ episode 119 ][ timestamp 141 ] state=[ 0.53386788  0.33736041 -0.10795819  0.19002068], action=0, reward=1.0, next_state=[ 0.54061509  0.14393525 -0.10415778  0.4467912 ]
[ Experience replay ] starts
[ episode 119 ][ timestamp 142 ] state=[ 0.54061509  0.14393525 -0.10415778  0.4467912 ], action=1, reward=1.0, next_state=[ 0.54349379  0.34036459 -0.09522195  0.12317698]
[ Experience replay ] starts
[ episode 119 ][ timestamp 143 ] state=[ 0.54349379  0.34036459 -0.09522195  0.12317698], action=1, reward=1.0, next_state=[ 0.55030108  0.53671264 -0.09275841 -0.19796382]
[ Experience replay ] starts
[ episode 119 ][ timestamp 144 ] state=[ 0.55030108  0.53671264 -0.09275841 -0.19796382], action=0, reward=1.0, next_state=[ 0.56103534  0.3430314  -0.09671769  0.0640772 ]
[ Experience replay ] starts
[ episode 119 ][ timestamp 145 ] state=[ 0.56103534  0.3430314  -0.09671769  0.0640772 ], action=0, reward=1.0, next_state=[ 0.56789596  0.14941961 -0.09543615  0.32474692]
[ Experience replay

[ episode 119 ][ timestamp 187 ] state=[ 0.38619567 -1.53362617 -0.0187017   1.34696813], action=0, reward=1.0, next_state=[ 0.35552315 -1.72850809  0.00823767  1.63374191]
[ Experience replay ] starts
[ episode 119 ][ timestamp 188 ] state=[ 0.35552315 -1.72850809  0.00823767  1.63374191], action=1, reward=1.0, next_state=[ 0.32095299 -1.53348379  0.0409125   1.34363723]
[ Experience replay ] starts
[ episode 119 ][ timestamp 189 ] state=[ 0.32095299 -1.53348379  0.0409125   1.34363723], action=1, reward=1.0, next_state=[ 0.29028331 -1.33889971  0.06778525  1.06403027]
[ Experience replay ] starts
[ episode 119 ][ timestamp 190 ] state=[ 0.29028331 -1.33889971  0.06778525  1.06403027], action=1, reward=1.0, next_state=[ 0.26350532 -1.14473728  0.08906585  0.79336909]
[ Experience replay ] starts
[ episode 119 ][ timestamp 191 ] state=[ 0.26350532 -1.14473728  0.08906585  0.79336909], action=1, reward=1.0, next_state=[ 0.24061057 -0.95094349  0.10493324  0.52998138]
[ Experience replay

[ episode 120 ][ timestamp 33 ] state=[ 0.11291755 -0.00594191 -0.05377114  0.15798688], action=1, reward=1.0, next_state=[ 0.11279872  0.18990701 -0.0506114  -0.151163  ]
[ Experience replay ] starts
[ episode 120 ][ timestamp 34 ] state=[ 0.11279872  0.18990701 -0.0506114  -0.151163  ], action=1, reward=1.0, next_state=[ 0.11659686  0.38571577 -0.05363466 -0.45937344]
[ Experience replay ] starts
[ episode 120 ][ timestamp 35 ] state=[ 0.11659686  0.38571577 -0.05363466 -0.45937344], action=0, reward=1.0, next_state=[ 0.12431117  0.19139137 -0.06282213 -0.18406703]
[ Experience replay ] starts
[ episode 120 ][ timestamp 36 ] state=[ 0.12431117  0.19139137 -0.06282213 -0.18406703], action=0, reward=1.0, next_state=[ 0.128139   -0.00277806 -0.06650347  0.08815502]
[ Experience replay ] starts
[ episode 120 ][ timestamp 37 ] state=[ 0.128139   -0.00277806 -0.06650347  0.08815502], action=1, reward=1.0, next_state=[ 0.12808344  0.19323099 -0.06474037 -0.22474625]
[ Experience replay ] st

[ Ended! ] Episode 121: Exploration_rate=0.01. Score=9.
[ episode 122 ] state=[ 0.00185129 -0.04566346  0.04989536  0.00481088]
[ episode 122 ][ timestamp 1 ] state=[ 0.00185129 -0.04566346  0.04989536  0.00481088], action=1, reward=1.0, next_state=[ 0.00093802  0.14870873  0.04999157 -0.27172141]
[ Experience replay ] starts
[ episode 122 ][ timestamp 2 ] state=[ 0.00093802  0.14870873  0.04999157 -0.27172141], action=0, reward=1.0, next_state=[ 0.0039122  -0.04708961  0.04455715  0.03630057]
[ Experience replay ] starts
[ episode 122 ][ timestamp 3 ] state=[ 0.0039122  -0.04708961  0.04455715  0.03630057], action=1, reward=1.0, next_state=[ 0.0029704   0.147366    0.04528316 -0.24199788]
[ Experience replay ] starts
[ episode 122 ][ timestamp 4 ] state=[ 0.0029704   0.147366    0.04528316 -0.24199788], action=1, reward=1.0, next_state=[ 0.00591772  0.34181284  0.0404432  -0.52006045]
[ Experience replay ] starts
[ episode 122 ][ timestamp 5 ] state=[ 0.00591772  0.34181284  0.0404432

[ episode 122 ][ timestamp 42 ] state=[ 0.09772074  0.13668312  0.0010134  -0.00626389], action=0, reward=1.0, next_state=[ 0.1004544  -0.05845335  0.00088813  0.2867386 ]
[ Experience replay ] starts
[ episode 122 ][ timestamp 43 ] state=[ 0.1004544  -0.05845335  0.00088813  0.2867386 ], action=1, reward=1.0, next_state=[ 0.09928534  0.13665592  0.0066229  -0.00566408]
[ Experience replay ] starts
[ episode 122 ][ timestamp 44 ] state=[ 0.09928534  0.13665592  0.0066229  -0.00566408], action=1, reward=1.0, next_state=[ 0.10201846  0.33168227  0.00650962 -0.29625007]
[ Experience replay ] starts
[ episode 122 ][ timestamp 45 ] state=[ 0.10201846  0.33168227  0.00650962 -0.29625007], action=1, reward=1.0, next_state=[ 1.08652101e-01  5.26710818e-01  5.84614748e-04 -5.86872881e-01]
[ Experience replay ] starts
[ episode 122 ][ timestamp 46 ] state=[ 1.08652101e-01  5.26710818e-01  5.84614748e-04 -5.86872881e-01], action=0, reward=1.0, next_state=[ 0.11918632  0.33158068 -0.01115284 -0.29

[ episode 122 ][ timestamp 86 ] state=[ 0.03409609 -0.23574487  0.0410654   0.18640204], action=1, reward=1.0, next_state=[ 0.0293812  -0.04123379  0.04479344 -0.09304877]
[ Experience replay ] starts
[ episode 122 ][ timestamp 87 ] state=[ 0.0293812  -0.04123379  0.04479344 -0.09304877], action=1, reward=1.0, next_state=[ 0.02855652  0.15321847  0.04293246 -0.37126972]
[ Experience replay ] starts
[ episode 122 ][ timestamp 88 ] state=[ 0.02855652  0.15321847  0.04293246 -0.37126972], action=1, reward=1.0, next_state=[ 0.03162089  0.34770502  0.03550707 -0.65011246]
[ Experience replay ] starts
[ episode 122 ][ timestamp 89 ] state=[ 0.03162089  0.34770502  0.03550707 -0.65011246], action=0, reward=1.0, next_state=[ 0.03857499  0.15210695  0.02250482 -0.3464634 ]
[ Experience replay ] starts
[ episode 122 ][ timestamp 90 ] state=[ 0.03857499  0.15210695  0.02250482 -0.3464634 ], action=0, reward=1.0, next_state=[ 0.04161713 -0.04332777  0.01557555 -0.0467697 ]
[ Experience replay ] st

[ episode 122 ][ timestamp 128 ] state=[-0.02701299 -0.2258826  -0.01465432 -0.03129217], action=1, reward=1.0, next_state=[-0.03153064 -0.03055359 -0.01528016 -0.32856243]
[ Experience replay ] starts
[ episode 122 ][ timestamp 129 ] state=[-0.03153064 -0.03055359 -0.01528016 -0.32856243], action=1, reward=1.0, next_state=[-0.03214171  0.16478252 -0.02185141 -0.62602458]
[ Experience replay ] starts
[ episode 122 ][ timestamp 130 ] state=[-0.03214171  0.16478252 -0.02185141 -0.62602458], action=0, reward=1.0, next_state=[-0.02884606 -0.0300277  -0.0343719  -0.34030282]
[ Experience replay ] starts
[ episode 122 ][ timestamp 131 ] state=[-0.02884606 -0.0300277  -0.0343719  -0.34030282], action=1, reward=1.0, next_state=[-0.02944661  0.16556602 -0.04117796 -0.64362346]
[ Experience replay ] starts
[ episode 122 ][ timestamp 132 ] state=[-0.02944661  0.16556602 -0.04117796 -0.64362346], action=0, reward=1.0, next_state=[-0.02613529 -0.02895856 -0.05405043 -0.36418683]
[ Experience replay

[ episode 122 ][ timestamp 174 ] state=[-0.51175794 -0.59423107  0.04827806  0.07121203], action=0, reward=1.0, next_state=[-0.52364256 -0.79001072  0.0497023   0.37872757]
[ Experience replay ] starts
[ episode 122 ][ timestamp 175 ] state=[-0.52364256 -0.79001072  0.0497023   0.37872757], action=1, reward=1.0, next_state=[-0.53944277 -0.59562855  0.05727685  0.10212085]
[ Experience replay ] starts
[ episode 122 ][ timestamp 176 ] state=[-0.53944277 -0.59562855  0.05727685  0.10212085], action=0, reward=1.0, next_state=[-0.55135535 -0.79152259  0.05931927  0.41231023]
[ Experience replay ] starts
[ episode 122 ][ timestamp 177 ] state=[-0.55135535 -0.79152259  0.05931927  0.41231023], action=0, reward=1.0, next_state=[-0.5671858  -0.98743306  0.06756547  0.7230887 ]
[ Experience replay ] starts
[ episode 122 ][ timestamp 178 ] state=[-0.5671858  -0.98743306  0.06756547  0.7230887 ], action=1, reward=1.0, next_state=[-0.58693446 -0.79330745  0.08202725  0.45241383]
[ Experience replay

[ episode 122 ][ timestamp 215 ] state=[-0.83802371 -0.80243142  0.08495254  0.65419173], action=0, reward=1.0, next_state=[-0.85407234 -0.99862708  0.09803637  0.97236992]
[ Experience replay ] starts
[ episode 122 ][ timestamp 216 ] state=[-0.85407234 -0.99862708  0.09803637  0.97236992], action=1, reward=1.0, next_state=[-0.87404488 -0.8049476   0.11748377  0.71202224]
[ Experience replay ] starts
[ episode 122 ][ timestamp 217 ] state=[-0.87404488 -0.8049476   0.11748377  0.71202224], action=1, reward=1.0, next_state=[-0.89014383 -0.61163145  0.13172422  0.45850771]
[ Experience replay ] starts
[ episode 122 ][ timestamp 218 ] state=[-0.89014383 -0.61163145  0.13172422  0.45850771], action=1, reward=1.0, next_state=[-0.90237646 -0.41859357  0.14089437  0.21007437]
[ Experience replay ] starts
[ episode 122 ][ timestamp 219 ] state=[-0.90237646 -0.41859357  0.14089437  0.21007437], action=0, reward=1.0, next_state=[-0.91074833 -0.61541946  0.14509586  0.54367366]
[ Experience replay

[ episode 123 ][ timestamp 35 ] state=[ 0.04580955 -0.71745063 -0.09315263  0.35617565], action=1, reward=1.0, next_state=[ 0.03146054 -0.52113625 -0.08602911  0.03563349]
[ Experience replay ] starts
[ episode 123 ][ timestamp 36 ] state=[ 0.03146054 -0.52113625 -0.08602911  0.03563349], action=0, reward=1.0, next_state=[ 0.02103781 -0.71492591 -0.08531644  0.29998161]
[ Experience replay ] starts
[ episode 123 ][ timestamp 37 ] state=[ 0.02103781 -0.71492591 -0.08531644  0.29998161], action=0, reward=1.0, next_state=[ 0.00673929 -0.90873474 -0.07931681  0.56458483]
[ Experience replay ] starts
[ episode 123 ][ timestamp 38 ] state=[ 0.00673929 -0.90873474 -0.07931681  0.56458483], action=0, reward=1.0, next_state=[-0.0114354  -1.1026595  -0.06802511  0.83126274]
[ Experience replay ] starts
[ episode 123 ][ timestamp 39 ] state=[-0.0114354  -1.1026595  -0.06802511  0.83126274], action=0, reward=1.0, next_state=[-0.03348859 -1.29678907 -0.05139986  1.10179965]
[ Experience replay ] st

[ episode 124 ][ timestamp 29 ] state=[-0.03625367 -0.40237746 -0.03632114  0.4347415 ], action=0, reward=1.0, next_state=[-0.04430121 -0.59696689 -0.02762631  0.71575707]
[ Experience replay ] starts
[ episode 124 ][ timestamp 30 ] state=[-0.04430121 -0.59696689 -0.02762631  0.71575707], action=0, reward=1.0, next_state=[-0.05624055 -0.79169578 -0.01331117  0.99961784]
[ Experience replay ] starts
[ episode 124 ][ timestamp 31 ] state=[-0.05624055 -0.79169578 -0.01331117  0.99961784], action=1, reward=1.0, next_state=[-0.07207447 -0.59639846  0.00668119  0.70278444]
[ Experience replay ] starts
[ episode 124 ][ timestamp 32 ] state=[-0.07207447 -0.59639846  0.00668119  0.70278444], action=0, reward=1.0, next_state=[-0.08400244 -0.79161237  0.02073688  0.99756303]
[ Experience replay ] starts
[ episode 124 ][ timestamp 33 ] state=[-0.08400244 -0.79161237  0.02073688  0.99756303], action=1, reward=1.0, next_state=[-0.09983468 -0.59677373  0.04068814  0.7114641 ]
[ Experience replay ] st

[ episode 125 ][ timestamp 33 ] state=[ 0.09636764  1.48996538 -0.02219895 -1.81650356], action=0, reward=1.0, next_state=[ 0.12616695  1.29509725 -0.05852902 -1.53079936]
[ Experience replay ] starts
[ episode 125 ][ timestamp 34 ] state=[ 0.12616695  1.29509725 -0.05852902 -1.53079936], action=0, reward=1.0, next_state=[ 0.15206889  1.10072771 -0.08914501 -1.25694199]
[ Experience replay ] starts
[ episode 125 ][ timestamp 35 ] state=[ 0.15206889  1.10072771 -0.08914501 -1.25694199], action=0, reward=1.0, next_state=[ 0.17408344  0.90685268 -0.11428385 -0.99345813]
[ Experience replay ] starts
[ episode 125 ][ timestamp 36 ] state=[ 0.17408344  0.90685268 -0.11428385 -0.99345813], action=1, reward=1.0, next_state=[ 0.1922205   1.10330267 -0.13415301 -1.31973722]
[ Experience replay ] starts
[ episode 125 ][ timestamp 37 ] state=[ 0.1922205   1.10330267 -0.13415301 -1.31973722], action=1, reward=1.0, next_state=[ 0.21428655  1.29984103 -0.16054775 -1.6512187 ]
[ Experience replay ] st

[ episode 126 ][ timestamp 38 ] state=[-0.07271356 -0.57468194 -0.04097573  0.35362955], action=0, reward=1.0, next_state=[-0.0842072  -0.76919801 -0.03390314  0.63311525]
[ Experience replay ] starts
[ episode 126 ][ timestamp 39 ] state=[-0.0842072  -0.76919801 -0.03390314  0.63311525], action=0, reward=1.0, next_state=[-0.09959116 -0.96383101 -0.02124083  0.91493136]
[ Experience replay ] starts
[ episode 126 ][ timestamp 40 ] state=[-0.09959116 -0.96383101 -0.02124083  0.91493136], action=0, reward=1.0, next_state=[-0.11886778 -1.15865934 -0.00294221  1.2008636 ]
[ Experience replay ] starts
[ episode 126 ][ timestamp 41 ] state=[-0.11886778 -1.15865934 -0.00294221  1.2008636 ], action=1, reward=1.0, next_state=[-0.14204096 -0.96349946  0.02107507  0.90726004]
[ Experience replay ] starts
[ episode 126 ][ timestamp 42 ] state=[-0.14204096 -0.96349946  0.02107507  0.90726004], action=0, reward=1.0, next_state=[-0.16131095 -1.15890029  0.03922027  1.20649181]
[ Experience replay ] st

[ episode 127 ][ timestamp 17 ] state=[ 0.05810842 -0.3405599  -0.03698841  0.42102063], action=1, reward=1.0, next_state=[ 0.05129722 -0.14493394 -0.028568    0.11691028]
[ Experience replay ] starts
[ episode 127 ][ timestamp 18 ] state=[ 0.05129722 -0.14493394 -0.028568    0.11691028], action=1, reward=1.0, next_state=[ 0.04839854  0.05058544 -0.02622979 -0.18464697]
[ Experience replay ] starts
[ episode 127 ][ timestamp 19 ] state=[ 0.04839854  0.05058544 -0.02622979 -0.18464697], action=0, reward=1.0, next_state=[ 0.04941025 -0.14415158 -0.02992273  0.0996474 ]
[ Experience replay ] starts
[ episode 127 ][ timestamp 20 ] state=[ 0.04941025 -0.14415158 -0.02992273  0.0996474 ], action=0, reward=1.0, next_state=[ 0.04652722 -0.33883219 -0.02792979  0.38274163]
[ Experience replay ] starts
[ episode 127 ][ timestamp 21 ] state=[ 0.04652722 -0.33883219 -0.02792979  0.38274163], action=1, reward=1.0, next_state=[ 0.03975058 -0.14332504 -0.02027495  0.08138499]
[ Experience replay ] st

[ episode 128 ][ timestamp 27 ] state=[-0.0300541  -0.35629779  0.05150134  0.48035276], action=0, reward=1.0, next_state=[-0.03718006 -0.55210749  0.0611084   0.78881256]
[ Experience replay ] starts
[ episode 128 ][ timestamp 28 ] state=[-0.03718006 -0.55210749  0.0611084   0.78881256], action=1, reward=1.0, next_state=[-0.04822221 -0.35787567  0.07688465  0.51596334]
[ Experience replay ] starts
[ episode 128 ][ timestamp 29 ] state=[-0.04822221 -0.35787567  0.07688465  0.51596334], action=1, reward=1.0, next_state=[-0.05537972 -0.1639158   0.08720392  0.24846484]
[ Experience replay ] starts
[ episode 128 ][ timestamp 30 ] state=[-0.05537972 -0.1639158   0.08720392  0.24846484], action=1, reward=1.0, next_state=[-0.05865804  0.02985955  0.09217321 -0.01548823]
[ Experience replay ] starts
[ episode 128 ][ timestamp 31 ] state=[-0.05865804  0.02985955  0.09217321 -0.01548823], action=1, reward=1.0, next_state=[-0.05806085  0.22354703  0.09186345 -0.2777256 ]
[ Experience replay ] st

[ episode 129 ][ timestamp 15 ] state=[-0.07338491  0.01353733  0.0387223   0.02014103], action=0, reward=1.0, next_state=[-0.07311417 -0.18211793  0.03912512  0.32478543]
[ Experience replay ] starts
[ episode 129 ][ timestamp 16 ] state=[-0.07311417 -0.18211793  0.03912512  0.32478543], action=1, reward=1.0, next_state=[-0.07675652  0.01242573  0.04562083  0.04469312]
[ Experience replay ] starts
[ episode 129 ][ timestamp 17 ] state=[-0.07675652  0.01242573  0.04562083  0.04469312], action=0, reward=1.0, next_state=[-0.07650801 -0.18331971  0.04651469  0.35141365]
[ Experience replay ] starts
[ episode 129 ][ timestamp 18 ] state=[-0.07650801 -0.18331971  0.04651469  0.35141365], action=0, reward=1.0, next_state=[-0.0801744  -0.37907122  0.05354297  0.65839371]
[ Experience replay ] starts
[ episode 129 ][ timestamp 19 ] state=[-0.0801744  -0.37907122  0.05354297  0.65839371], action=1, reward=1.0, next_state=[-0.08775583 -0.18473378  0.06671084  0.38303941]
[ Experience replay ] st

[ episode 129 ][ timestamp 60 ] state=[ 0.16886651 -0.19243038 -0.04412663  0.55267371], action=1, reward=1.0, next_state=[ 0.1650179   0.00328257 -0.03307316  0.24642102]
[ Experience replay ] starts
[ episode 129 ][ timestamp 61 ] state=[ 0.1650179   0.00328257 -0.03307316  0.24642102], action=0, reward=1.0, next_state=[ 0.16508355 -0.19135179 -0.02814474  0.52849117]
[ Experience replay ] starts
[ episode 129 ][ timestamp 62 ] state=[ 0.16508355 -0.19135179 -0.02814474  0.52849117], action=1, reward=1.0, next_state=[ 0.16125651  0.00415459 -0.01757492  0.22707429]
[ Experience replay ] starts
[ episode 129 ][ timestamp 63 ] state=[ 0.16125651  0.00415459 -0.01757492  0.22707429], action=0, reward=1.0, next_state=[ 0.16133961 -0.19071185 -0.01303343  0.51416204]
[ Experience replay ] starts
[ episode 129 ][ timestamp 64 ] state=[ 0.16133961 -0.19071185 -0.01303343  0.51416204], action=1, reward=1.0, next_state=[ 0.15752537  0.00459121 -0.00275019  0.21740061]
[ Experience replay ] st

[ episode 131 ][ timestamp 10 ] state=[-0.00683106 -0.59343867 -0.03358633  0.67872399], action=1, reward=1.0, next_state=[-0.01869983 -0.39786663 -0.02001185  0.37565886]
[ Experience replay ] starts
[ episode 131 ][ timestamp 11 ] state=[-0.01869983 -0.39786663 -0.02001185  0.37565886], action=1, reward=1.0, next_state=[-0.02665716 -0.20246624 -0.01249867  0.07673387]
[ Experience replay ] starts
[ episode 131 ][ timestamp 12 ] state=[-0.02665716 -0.20246624 -0.01249867  0.07673387], action=1, reward=1.0, next_state=[-0.03070649 -0.00716736 -0.01096399 -0.21986608]
[ Experience replay ] starts
[ episode 131 ][ timestamp 13 ] state=[-0.03070649 -0.00716736 -0.01096399 -0.21986608], action=0, reward=1.0, next_state=[-0.03084983 -0.20213089 -0.01536131  0.06933828]
[ Experience replay ] starts
[ episode 131 ][ timestamp 14 ] state=[-0.03084983 -0.20213089 -0.01536131  0.06933828], action=0, reward=1.0, next_state=[-0.03489245 -0.39702928 -0.01397455  0.35713532]
[ Experience replay ] st

[ episode 131 ][ timestamp 54 ] state=[-0.19860663  0.18656654  0.01310755 -0.48202689], action=0, reward=1.0, next_state=[-0.1948753  -0.00873794  0.00346701 -0.18524182]
[ Experience replay ] starts
[ episode 131 ][ timestamp 55 ] state=[-0.1948753  -0.00873794  0.00346701 -0.18524182], action=1, reward=1.0, next_state=[-1.95050059e-01  1.86334234e-01 -2.37828689e-04 -4.76829020e-01]
[ Experience replay ] starts
[ episode 131 ][ timestamp 56 ] state=[-1.95050059e-01  1.86334234e-01 -2.37828689e-04 -4.76829020e-01], action=0, reward=1.0, next_state=[-0.19132337 -0.00878436 -0.00977441 -0.18422106]
[ Experience replay ] starts
[ episode 131 ][ timestamp 57 ] state=[-0.19132337 -0.00878436 -0.00977441 -0.18422106], action=0, reward=1.0, next_state=[-0.19149906 -0.2037651  -0.01345883  0.10536245]
[ Experience replay ] starts
[ episode 131 ][ timestamp 58 ] state=[-0.19149906 -0.2037651  -0.01345883  0.10536245], action=1, reward=1.0, next_state=[-0.19557436 -0.00845289 -0.01135158 -0.19

[ Ended! ] Episode 131: Exploration_rate=0.01. Score=95.
[ episode 132 ] state=[ 0.00565933 -0.03695251 -0.03243902 -0.02869907]
[ episode 132 ][ timestamp 1 ] state=[ 0.00565933 -0.03695251 -0.03243902 -0.02869907], action=1, reward=1.0, next_state=[ 0.00492028  0.15861926 -0.03301301 -0.33143779]
[ Experience replay ] starts
[ episode 132 ][ timestamp 2 ] state=[ 0.00492028  0.15861926 -0.03301301 -0.33143779], action=0, reward=1.0, next_state=[ 0.00809267 -0.03601761 -0.03964176 -0.04934564]
[ Experience replay ] starts
[ episode 132 ][ timestamp 3 ] state=[ 0.00809267 -0.03601761 -0.03964176 -0.04934564], action=1, reward=1.0, next_state=[ 0.00737232  0.15964969 -0.04062867 -0.35426762]
[ Experience replay ] starts
[ episode 132 ][ timestamp 4 ] state=[ 0.00737232  0.15964969 -0.04062867 -0.35426762], action=1, reward=1.0, next_state=[ 0.01056531  0.35532508 -0.04771403 -0.65948003]
[ Experience replay ] starts
[ episode 132 ][ timestamp 5 ] state=[ 0.01056531  0.35532508 -0.047714

[ episode 132 ][ timestamp 47 ] state=[-0.26224911 -0.80350544  0.04659517  0.83529395], action=0, reward=1.0, next_state=[-0.27831922 -0.99923189  0.06330104  1.142259  ]
[ Experience replay ] starts
[ episode 132 ][ timestamp 48 ] state=[-0.27831922 -0.99923189  0.06330104  1.142259  ], action=1, reward=1.0, next_state=[-0.29830386 -0.8049917   0.08614622  0.87008033]
[ Experience replay ] starts
[ episode 132 ][ timestamp 49 ] state=[-0.29830386 -0.8049917   0.08614622  0.87008033], action=1, reward=1.0, next_state=[-0.31440369 -0.61114056  0.10354783  0.60567759]
[ Experience replay ] starts
[ episode 132 ][ timestamp 50 ] state=[-0.31440369 -0.61114056  0.10354783  0.60567759], action=1, reward=1.0, next_state=[-0.3266265  -0.41760735  0.11566138  0.34732139]
[ Experience replay ] starts
[ episode 132 ][ timestamp 51 ] state=[-0.3266265  -0.41760735  0.11566138  0.34732139], action=1, reward=1.0, next_state=[-0.33497865 -0.22430419  0.12260781  0.09323261]
[ Experience replay ] st

[ episode 132 ][ timestamp 90 ] state=[-0.20802854  0.15081671 -0.03031165 -0.15946989], action=0, reward=1.0, next_state=[-0.20501221 -0.04385847 -0.03350104  0.12349847]
[ Experience replay ] starts
[ episode 132 ][ timestamp 91 ] state=[-0.20501221 -0.04385847 -0.03350104  0.12349847], action=1, reward=1.0, next_state=[-0.20588938  0.15172703 -0.03103107 -0.17956261]
[ Experience replay ] starts
[ episode 132 ][ timestamp 92 ] state=[-0.20588938  0.15172703 -0.03103107 -0.17956261], action=0, reward=1.0, next_state=[-0.20285484 -0.04293744 -0.03462233  0.10317185]
[ Experience replay ] starts
[ episode 132 ][ timestamp 93 ] state=[-0.20285484 -0.04293744 -0.03462233  0.10317185], action=0, reward=1.0, next_state=[-0.20371358 -0.23754655 -0.03255889  0.38473364]
[ Experience replay ] starts
[ episode 132 ][ timestamp 94 ] state=[-0.20371358 -0.23754655 -0.03255889  0.38473364], action=1, reward=1.0, next_state=[-0.20846452 -0.04197786 -0.02486422  0.08196545]
[ Experience replay ] st

[ episode 132 ][ timestamp 137 ] state=[-0.39079782 -0.40511765 -0.00111472  0.06945175], action=1, reward=1.0, next_state=[-3.98900176e-01 -2.09979737e-01  2.74316956e-04 -2.23582668e-01]
[ Experience replay ] starts
[ episode 132 ][ timestamp 138 ] state=[-3.98900176e-01 -2.09979737e-01  2.74316956e-04 -2.23582668e-01], action=0, reward=1.0, next_state=[-0.40309977 -0.40510561 -0.00419734  0.06918678]
[ Experience replay ] starts
[ episode 132 ][ timestamp 139 ] state=[-0.40309977 -0.40510561 -0.00419734  0.06918678], action=0, reward=1.0, next_state=[-0.41120188 -0.60016713 -0.0028136   0.36054247]
[ Experience replay ] starts
[ episode 132 ][ timestamp 140 ] state=[-0.41120188 -0.60016713 -0.0028136   0.36054247], action=0, reward=1.0, next_state=[-0.42320523 -0.79524898  0.00439725  0.65233688]
[ Experience replay ] starts
[ episode 132 ][ timestamp 141 ] state=[-0.42320523 -0.79524898  0.00439725  0.65233688], action=1, reward=1.0, next_state=[-0.43911021 -0.60018854  0.01744399 

[ episode 133 ][ timestamp 27 ] state=[ 0.07985897  0.44261238 -0.14102743 -0.82602039], action=0, reward=1.0, next_state=[ 0.08871121  0.24967161 -0.15754783 -0.58080726]
[ Experience replay ] starts
[ episode 133 ][ timestamp 28 ] state=[ 0.08871121  0.24967161 -0.15754783 -0.58080726], action=0, reward=1.0, next_state=[ 0.09370464  0.05706721 -0.16916398 -0.34160645]
[ Experience replay ] starts
[ episode 133 ][ timestamp 29 ] state=[ 0.09370464  0.05706721 -0.16916398 -0.34160645], action=0, reward=1.0, next_state=[ 0.09484599 -0.13529491 -0.17599611 -0.10667932]
[ Experience replay ] starts
[ episode 133 ][ timestamp 30 ] state=[ 0.09484599 -0.13529491 -0.17599611 -0.10667932], action=0, reward=1.0, next_state=[ 0.09214009 -0.32751539 -0.1781297   0.12572129]
[ Experience replay ] starts
[ episode 133 ][ timestamp 31 ] state=[ 0.09214009 -0.32751539 -0.1781297   0.12572129], action=0, reward=1.0, next_state=[ 0.08558978 -0.51969763 -0.17561527  0.35733964]
[ Experience replay ] st

[ episode 133 ][ timestamp 70 ] state=[-0.42627827 -0.50823249  0.02387784  0.09885543], action=0, reward=1.0, next_state=[-0.43644292 -0.70368837  0.02585495  0.39897509]
[ Experience replay ] starts
[ episode 133 ][ timestamp 71 ] state=[-0.43644292 -0.70368837  0.02585495  0.39897509], action=0, reward=1.0, next_state=[-0.45051669 -0.89916738  0.03383445  0.69969612]
[ Experience replay ] starts
[ episode 133 ][ timestamp 72 ] state=[-0.45051669 -0.89916738  0.03383445  0.69969612], action=0, reward=1.0, next_state=[-0.46850003 -1.09474167  0.04782837  1.00283507]
[ Experience replay ] starts
[ episode 133 ][ timestamp 73 ] state=[-0.46850003 -1.09474167  0.04782837  1.00283507], action=0, reward=1.0, next_state=[-0.49039487 -1.29046885  0.06788507  1.3101463 ]
[ Experience replay ] starts
[ episode 133 ][ timestamp 74 ] state=[-0.49039487 -1.29046885  0.06788507  1.3101463 ], action=0, reward=1.0, next_state=[-0.51620424 -1.48638183  0.094088    1.62328178]
[ Experience replay ] st

[ episode 135 ][ timestamp 16 ] state=[ 0.05872527 -0.59605618 -0.12254952  0.44955303], action=0, reward=1.0, next_state=[ 0.04680415 -0.78925099 -0.11355846  0.70123243]
[ Experience replay ] starts
[ episode 135 ][ timestamp 17 ] state=[ 0.04680415 -0.78925099 -0.11355846  0.70123243], action=0, reward=1.0, next_state=[ 0.03101913 -0.98263099 -0.09953381  0.95611965]
[ Experience replay ] starts
[ episode 135 ][ timestamp 18 ] state=[ 0.03101913 -0.98263099 -0.09953381  0.95611965], action=0, reward=1.0, next_state=[ 0.01136651 -1.1762836  -0.08041142  1.21594622]
[ Experience replay ] starts
[ episode 135 ][ timestamp 19 ] state=[ 0.01136651 -1.1762836  -0.08041142  1.21594622], action=1, reward=1.0, next_state=[-0.01215917 -0.98022182 -0.0560925   0.89918835]
[ Experience replay ] starts
[ episode 135 ][ timestamp 20 ] state=[-0.01215917 -0.98022182 -0.0560925   0.89918835], action=0, reward=1.0, next_state=[-0.0317636  -1.17454054 -0.03810873  1.17372546]
[ Experience replay ] st

[ episode 136 ][ timestamp 35 ] state=[-0.04335054  0.04827863 -0.02200382 -0.30163701], action=0, reward=1.0, next_state=[-0.04238497 -0.14652291 -0.02803656 -0.01597403]
[ Experience replay ] starts
[ episode 136 ][ timestamp 36 ] state=[-0.04238497 -0.14652291 -0.02803656 -0.01597403], action=1, reward=1.0, next_state=[-0.04531543  0.04898967 -0.02835604 -0.31736931]
[ Experience replay ] starts
[ episode 136 ][ timestamp 37 ] state=[-0.04531543  0.04898967 -0.02835604 -0.31736931], action=1, reward=1.0, next_state=[-0.04433564  0.24450378 -0.03470342 -0.61885814]
[ Experience replay ] starts
[ episode 136 ][ timestamp 38 ] state=[-0.04433564  0.24450378 -0.03470342 -0.61885814], action=0, reward=1.0, next_state=[-0.03944556  0.04988331 -0.04708059 -0.33730397]
[ Experience replay ] starts
[ episode 136 ][ timestamp 39 ] state=[-0.03944556  0.04988331 -0.04708059 -0.33730397], action=0, reward=1.0, next_state=[-0.03844789 -0.14453815 -0.05382667 -0.05983151]
[ Experience replay ] st

[ episode 136 ][ timestamp 82 ] state=[-0.50348504 -1.6892796   0.17556089  1.94747567], action=1, reward=-1.0, next_state=[-0.53727063 -1.49640829  0.2145104   1.71396588]
[ Experience replay ] starts
[ Ended! ] Episode 136: Exploration_rate=0.01. Score=82.
[ episode 137 ] state=[-0.04011504  0.00272797  0.02955492 -0.00266256]
[ episode 137 ][ timestamp 1 ] state=[-0.04011504  0.00272797  0.02955492 -0.00266256], action=1, reward=1.0, next_state=[-0.04006048  0.19741386  0.02950167 -0.285876  ]
[ Experience replay ] starts
[ episode 137 ][ timestamp 2 ] state=[-0.04006048  0.19741386  0.02950167 -0.285876  ], action=0, reward=1.0, next_state=[-0.0361122   0.00188386  0.02378415  0.01596362]
[ Experience replay ] starts
[ episode 137 ][ timestamp 3 ] state=[-0.0361122   0.00188386  0.02378415  0.01596362], action=1, reward=1.0, next_state=[-0.03607452  0.19665679  0.02410343 -0.26912126]
[ Experience replay ] starts
[ episode 137 ][ timestamp 4 ] state=[-0.03607452  0.19665679  0.0241

[ episode 137 ][ timestamp 46 ] state=[ 0.01723465 -0.18337118 -0.06055768  0.09116472], action=1, reward=1.0, next_state=[ 0.01356723  0.01256413 -0.05873438 -0.21999259]
[ Experience replay ] starts
[ episode 137 ][ timestamp 47 ] state=[ 0.01356723  0.01256413 -0.05873438 -0.21999259], action=1, reward=1.0, next_state=[ 0.01381851  0.20847432 -0.06313423 -0.53060912]
[ Experience replay ] starts
[ episode 137 ][ timestamp 48 ] state=[ 0.01381851  0.20847432 -0.06313423 -0.53060912], action=0, reward=1.0, next_state=[ 0.017988    0.01429463 -0.07374642 -0.25846902]
[ Experience replay ] starts
[ episode 137 ][ timestamp 49 ] state=[ 0.017988    0.01429463 -0.07374642 -0.25846902], action=0, reward=1.0, next_state=[ 0.01827389 -0.17970126 -0.0789158   0.01007209]
[ Experience replay ] starts
[ episode 137 ][ timestamp 50 ] state=[ 0.01827389 -0.17970126 -0.0789158   0.01007209], action=1, reward=1.0, next_state=[ 0.01467986  0.01645855 -0.07871436 -0.30642906]
[ Experience replay ] st

[ episode 138 ][ timestamp 23 ] state=[-0.06011075 -0.40225461 -0.01980176  0.24893769], action=1, reward=1.0, next_state=[-0.06815584 -0.20685555 -0.01482301 -0.04992478]
[ Experience replay ] starts
[ episode 138 ][ timestamp 24 ] state=[-0.06815584 -0.20685555 -0.01482301 -0.04992478], action=0, reward=1.0, next_state=[-0.07229296 -0.40176185 -0.01582151  0.23804475]
[ Experience replay ] starts
[ episode 138 ][ timestamp 25 ] state=[-0.07229296 -0.40176185 -0.01582151  0.23804475], action=1, reward=1.0, next_state=[-0.08032819 -0.20641749 -0.01106061 -0.05958646]
[ Experience replay ] starts
[ episode 138 ][ timestamp 26 ] state=[-0.08032819 -0.20641749 -0.01106061 -0.05958646], action=0, reward=1.0, next_state=[-0.08445654 -0.40137912 -0.01225234  0.22958635]
[ Experience replay ] starts
[ episode 138 ][ timestamp 27 ] state=[-0.08445654 -0.40137912 -0.01225234  0.22958635], action=1, reward=1.0, next_state=[-0.09248412 -0.20608424 -0.00766061 -0.06693607]
[ Experience replay ] st

[ episode 138 ][ timestamp 65 ] state=[-0.36090732 -0.40924301  0.09310184  0.40464145], action=1, reward=1.0, next_state=[-0.36909218 -0.21555621  0.10119467  0.14270191]
[ Experience replay ] starts
[ episode 138 ][ timestamp 66 ] state=[-0.36909218 -0.21555621  0.10119467  0.14270191], action=0, reward=1.0, next_state=[-0.3734033  -0.41197087  0.10404871  0.46551716]
[ Experience replay ] starts
[ episode 138 ][ timestamp 67 ] state=[-0.3734033  -0.41197087  0.10404871  0.46551716], action=1, reward=1.0, next_state=[-0.38164272 -0.21846117  0.11335905  0.20735756]
[ Experience replay ] starts
[ episode 138 ][ timestamp 68 ] state=[-0.38164272 -0.21846117  0.11335905  0.20735756], action=1, reward=1.0, next_state=[-0.38601194 -0.02512732  0.11750621 -0.04752568]
[ Experience replay ] starts
[ episode 138 ][ timestamp 69 ] state=[-0.38601194 -0.02512732  0.11750621 -0.04752568], action=1, reward=1.0, next_state=[-0.38651449  0.16813079  0.11655569 -0.30094643]
[ Experience replay ] st

[ episode 139 ][ timestamp 21 ] state=[ 0.01322159 -0.05602021  0.04779257  0.2406455 ], action=1, reward=1.0, next_state=[ 0.01210118  0.1383876   0.05260548 -0.03658758]
[ Experience replay ] starts
[ episode 139 ][ timestamp 22 ] state=[ 0.01210118  0.1383876   0.05260548 -0.03658758], action=1, reward=1.0, next_state=[ 0.01486893  0.33271726  0.05187373 -0.31221994]
[ Experience replay ] starts
[ episode 139 ][ timestamp 23 ] state=[ 0.01486893  0.33271726  0.05187373 -0.31221994], action=0, reward=1.0, next_state=[ 0.02152328  0.13689615  0.04562933 -0.00363935]
[ Experience replay ] starts
[ episode 139 ][ timestamp 24 ] state=[ 0.02152328  0.13689615  0.04562933 -0.00363935], action=1, reward=1.0, next_state=[ 0.0242612   0.33133501  0.04555655 -0.28158371]
[ Experience replay ] starts
[ episode 139 ][ timestamp 25 ] state=[ 0.0242612   0.33133501  0.04555655 -0.28158371], action=1, reward=1.0, next_state=[ 0.0308879   0.52577854  0.03992487 -0.5595574 ]
[ Experience replay ] st

[ episode 139 ][ timestamp 68 ] state=[ 0.18716511  0.14010204  0.00870979 -0.07435277], action=0, reward=1.0, next_state=[ 0.18996715 -0.05514369  0.00722273  0.22106535]
[ Experience replay ] starts
[ episode 139 ][ timestamp 69 ] state=[ 0.18996715 -0.05514369  0.00722273  0.22106535], action=1, reward=1.0, next_state=[ 0.18886427  0.13987428  0.01164404 -0.06933051]
[ Experience replay ] starts
[ episode 139 ][ timestamp 70 ] state=[ 0.18886427  0.13987428  0.01164404 -0.06933051], action=0, reward=1.0, next_state=[ 0.19166176 -0.05541265  0.01025743  0.22700331]
[ Experience replay ] starts
[ episode 139 ][ timestamp 71 ] state=[ 0.19166176 -0.05541265  0.01025743  0.22700331], action=0, reward=1.0, next_state=[ 0.1905535  -0.25067968  0.01479749  0.52290407]
[ Experience replay ] starts
[ episode 139 ][ timestamp 72 ] state=[ 0.1905535  -0.25067968  0.01479749  0.52290407], action=1, reward=1.0, next_state=[ 0.18553991 -0.05576909  0.02525557  0.2349205 ]
[ Experience replay ] st

[ episode 142 ][ timestamp 5 ] state=[ 0.05608092  0.81047343 -0.0080179  -1.13720183], action=1, reward=1.0, next_state=[ 0.07229039  1.00569933 -0.03076193 -1.4323885 ]
[ Experience replay ] starts
[ episode 142 ][ timestamp 6 ] state=[ 0.07229039  1.00569933 -0.03076193 -1.4323885 ], action=1, reward=1.0, next_state=[ 0.09240438  1.20118708 -0.0594097  -1.73452397]
[ Experience replay ] starts
[ episode 142 ][ timestamp 7 ] state=[ 0.09240438  1.20118708 -0.0594097  -1.73452397], action=1, reward=1.0, next_state=[ 0.11642812  1.39693428 -0.09410018 -2.04508294]
[ Experience replay ] starts
[ episode 142 ][ timestamp 8 ] state=[ 0.11642812  1.39693428 -0.09410018 -2.04508294], action=1, reward=1.0, next_state=[ 0.14436681  1.59288783 -0.13500184 -2.36533751]
[ Experience replay ] starts
[ episode 142 ][ timestamp 9 ] state=[ 0.14436681  1.59288783 -0.13500184 -2.36533751], action=1, reward=1.0, next_state=[ 0.17622456  1.78892796 -0.18230859 -2.69629217]
[ Experience replay ] starts


[ episode 146 ][ timestamp 9 ] state=[ 0.07638647  1.58591508 -0.14837223 -2.40542786], action=1, reward=1.0, next_state=[ 0.10810477  1.78198581 -0.19648078 -2.73976417]
[ Experience replay ] starts
[ episode 146 ][ timestamp 10 ] state=[ 0.10810477  1.78198581 -0.19648078 -2.73976417], action=1, reward=-1.0, next_state=[ 0.14374449  1.97787773 -0.25127607 -3.08534291]
[ Experience replay ] starts
[ Ended! ] Episode 146: Exploration_rate=0.01. Score=10.
[ episode 147 ] state=[ 0.00292609 -0.02939866  0.02254782  0.0265651 ]
[ episode 147 ][ timestamp 1 ] state=[ 0.00292609 -0.02939866  0.02254782  0.0265651 ], action=1, reward=1.0, next_state=[ 0.00233811  0.1653928   0.02307912 -0.25891933]
[ Experience replay ] starts
[ episode 147 ][ timestamp 2 ] state=[ 0.00233811  0.1653928   0.02307912 -0.25891933], action=1, reward=1.0, next_state=[ 0.00564597  0.3601778   0.01790074 -0.54423436]
[ Experience replay ] starts
[ episode 147 ][ timestamp 3 ] state=[ 0.00564597  0.3601778   0.0179

[ episode 151 ][ timestamp 3 ] state=[ 0.00198926  0.42639944  0.03678995 -0.51373858], action=1, reward=1.0, next_state=[ 0.01051725  0.62098445  0.02651518 -0.79460479]
[ Experience replay ] starts
[ episode 151 ][ timestamp 4 ] state=[ 0.01051725  0.62098445  0.02651518 -0.79460479], action=1, reward=1.0, next_state=[ 0.02293694  0.81573263  0.01062308 -1.07882982]
[ Experience replay ] starts
[ episode 151 ][ timestamp 5 ] state=[ 0.02293694  0.81573263  0.01062308 -1.07882982], action=1, reward=1.0, next_state=[ 0.03925159  1.01071269 -0.01095352 -1.36816029]
[ Experience replay ] starts
[ episode 151 ][ timestamp 6 ] state=[ 0.03925159  1.01071269 -0.01095352 -1.36816029], action=1, reward=1.0, next_state=[ 0.05946585  1.20597001 -0.03831672 -1.66424895]
[ Experience replay ] starts
[ episode 151 ][ timestamp 7 ] state=[ 0.05946585  1.20597001 -0.03831672 -1.66424895], action=1, reward=1.0, next_state=[ 0.08358525  1.40151643 -0.0716017  -1.96861565]
[ Experience replay ] starts


[ episode 155 ][ timestamp 9 ] state=[ 0.13805116  1.52847163 -0.157448   -2.39676649], action=1, reward=1.0, next_state=[ 0.16862059  1.72458293 -0.20538333 -2.73339348]
[ Experience replay ] starts
[ episode 155 ][ timestamp 10 ] state=[ 0.16862059  1.72458293 -0.20538333 -2.73339348], action=1, reward=-1.0, next_state=[ 0.20311225  1.92048547 -0.2600512  -3.0810304 ]
[ Experience replay ] starts
[ Ended! ] Episode 155: Exploration_rate=0.01. Score=10.
[ episode 156 ] state=[-0.01576573  0.0240623  -0.01023444  0.04085849]
[ episode 156 ][ timestamp 1 ] state=[-0.01576573  0.0240623  -0.01023444  0.04085849], action=1, reward=1.0, next_state=[-0.01528448  0.21932951 -0.00941727 -0.25503585]
[ Experience replay ] starts
[ episode 156 ][ timestamp 2 ] state=[-0.01528448  0.21932951 -0.00941727 -0.25503585], action=1, reward=1.0, next_state=[-0.01089789  0.41458464 -0.01451798 -0.5506742 ]
[ Experience replay ] starts
[ episode 156 ][ timestamp 3 ] state=[-0.01089789  0.41458464 -0.0145

[ episode 159 ][ timestamp 10 ] state=[ 0.10545587  1.75202425 -0.1686442  -2.6590394 ], action=1, reward=-1.0, next_state=[ 0.14049635  1.9479575  -0.22182498 -2.9981165 ]
[ Experience replay ] starts
[ Ended! ] Episode 159: Exploration_rate=0.01. Score=10.
[ episode 160 ] state=[ 0.02788666 -0.01375458  0.0263237  -0.02025658]
[ episode 160 ][ timestamp 1 ] state=[ 0.02788666 -0.01375458  0.0263237  -0.02025658], action=1, reward=1.0, next_state=[ 0.02761157  0.18098016  0.02591857 -0.30451922]
[ Experience replay ] starts
[ episode 160 ][ timestamp 2 ] state=[ 0.02761157  0.18098016  0.02591857 -0.30451922], action=1, reward=1.0, next_state=[ 0.03123117  0.37572334  0.01982818 -0.58891668]
[ Experience replay ] starts
[ episode 160 ][ timestamp 3 ] state=[ 0.03123117  0.37572334  0.01982818 -0.58891668], action=1, reward=1.0, next_state=[ 0.03874564  0.57056211  0.00804985 -0.87528827]
[ Experience replay ] starts
[ episode 160 ][ timestamp 4 ] state=[ 0.03874564  0.57056211  0.0080

[ episode 163 ][ timestamp 15 ] state=[ 0.04333244  1.20910419 -0.1053056  -1.79854693], action=0, reward=1.0, next_state=[ 0.06751452  1.01530629 -0.14127654 -1.54036305]
[ Experience replay ] starts
[ episode 163 ][ timestamp 16 ] state=[ 0.06751452  1.01530629 -0.14127654 -1.54036305], action=0, reward=1.0, next_state=[ 0.08782065  0.82213789 -0.1720838  -1.29489451]
[ Experience replay ] starts
[ episode 163 ][ timestamp 17 ] state=[ 0.08782065  0.82213789 -0.1720838  -1.29489451], action=0, reward=1.0, next_state=[ 0.10426341  0.62956847 -0.19798169 -1.06065005]
[ Experience replay ] starts
[ episode 163 ][ timestamp 18 ] state=[ 0.10426341  0.62956847 -0.19798169 -1.06065005], action=0, reward=-1.0, next_state=[ 0.11685478  0.43753997 -0.21919469 -0.83606116]
[ Experience replay ] starts
[ Ended! ] Episode 163: Exploration_rate=0.01. Score=18.
[ episode 164 ] state=[ 0.03794026  0.00385104  0.00030561 -0.00850178]
[ episode 164 ][ timestamp 1 ] state=[ 0.03794026  0.00385104  0.0

[ Ended! ] Episode 167: Exploration_rate=0.01. Score=10.
[ episode 168 ] state=[ 0.00313351 -0.00710415  0.03041037  0.03271747]
[ episode 168 ][ timestamp 1 ] state=[ 0.00313351 -0.00710415  0.03041037  0.03271747], action=0, reward=1.0, next_state=[ 0.00299143 -0.2026487   0.03106472  0.33483795]
[ Experience replay ] starts
[ episode 168 ][ timestamp 2 ] state=[ 0.00299143 -0.2026487   0.03106472  0.33483795], action=0, reward=1.0, next_state=[-0.00106154 -0.39819868  0.03776148  0.63715296]
[ Experience replay ] starts
[ episode 168 ][ timestamp 3 ] state=[-0.00106154 -0.39819868  0.03776148  0.63715296], action=0, reward=1.0, next_state=[-0.00902552 -0.59382632  0.05050453  0.94148446]
[ Experience replay ] starts
[ episode 168 ][ timestamp 4 ] state=[-0.00902552 -0.59382632  0.05050453  0.94148446], action=0, reward=1.0, next_state=[-0.02090204 -0.78959118  0.06933422  1.24959936]
[ Experience replay ] starts
[ episode 168 ][ timestamp 5 ] state=[-0.02090204 -0.78959118  0.069334

[ episode 172 ][ timestamp 7 ] state=[-0.07485892 -1.133979    0.07677881  1.79596163], action=0, reward=1.0, next_state=[-0.0975385  -1.32987205  0.11269804  2.11148634]
[ Experience replay ] starts
[ episode 172 ][ timestamp 8 ] state=[-0.0975385  -1.32987205  0.11269804  2.11148634], action=0, reward=1.0, next_state=[-0.12413594 -1.52592601  0.15492777  2.43676486]
[ Experience replay ] starts
[ episode 172 ][ timestamp 9 ] state=[-0.12413594 -1.52592601  0.15492777  2.43676486], action=0, reward=1.0, next_state=[-0.15465446 -1.72199895  0.20366307  2.77271839]
[ Experience replay ] starts
[ episode 172 ][ timestamp 10 ] state=[-0.15465446 -1.72199895  0.20366307  2.77271839], action=0, reward=-1.0, next_state=[-0.18909444 -1.91785833  0.25911744  3.11989938]
[ Experience replay ] starts
[ Ended! ] Episode 172: Exploration_rate=0.01. Score=10.
[ episode 173 ] state=[-0.04622466 -0.01658417  0.04646375  0.03353385]
[ episode 173 ][ timestamp 1 ] state=[-0.04622466 -0.01658417  0.0464

[ episode 176 ][ timestamp 9 ] state=[-0.13693486 -1.5829181   0.12882085  2.31803625], action=0, reward=1.0, next_state=[-0.16859322 -1.77895699  0.17518158  2.6474267 ]
[ Experience replay ] starts
[ episode 176 ][ timestamp 10 ] state=[-0.16859322 -1.77895699  0.17518158  2.6474267 ], action=0, reward=-1.0, next_state=[-0.20417236 -1.97491292  0.22813011  2.98810227]
[ Experience replay ] starts
[ Ended! ] Episode 176: Exploration_rate=0.01. Score=10.
[ episode 177 ] state=[-0.00364855  0.04819682  0.02226791 -0.03699246]
[ episode 177 ][ timestamp 1 ] state=[-0.00364855  0.04819682  0.02226791 -0.03699246], action=0, reward=1.0, next_state=[-0.00268462 -0.14723726  0.02152806  0.26263221]
[ Experience replay ] starts
[ episode 177 ][ timestamp 2 ] state=[-0.00268462 -0.14723726  0.02152806  0.26263221], action=0, reward=1.0, next_state=[-0.00562936 -0.34265978  0.02678071  0.56202684]
[ Experience replay ] starts
[ episode 177 ][ timestamp 3 ] state=[-0.00562936 -0.34265978  0.0267

[ episode 181 ][ timestamp 4 ] state=[-0.00223257 -0.6134982   0.04529154  0.87469429], action=0, reward=1.0, next_state=[-0.01450253 -0.80920566  0.06278543  1.1812656 ]
[ Experience replay ] starts
[ episode 181 ][ timestamp 5 ] state=[-0.01450253 -0.80920566  0.06278543  1.1812656 ], action=0, reward=1.0, next_state=[-0.03068664 -1.00508384  0.08641074  1.49295074]
[ Experience replay ] starts
[ episode 181 ][ timestamp 6 ] state=[-0.03068664 -1.00508384  0.08641074  1.49295074], action=0, reward=1.0, next_state=[-0.05078832 -1.20114437  0.11626975  1.8113174 ]
[ Experience replay ] starts
[ episode 181 ][ timestamp 7 ] state=[-0.05078832 -1.20114437  0.11626975  1.8113174 ], action=0, reward=1.0, next_state=[-0.07481121 -1.39735427  0.1524961   2.13775147]
[ Experience replay ] starts
[ episode 181 ][ timestamp 8 ] state=[-0.07481121 -1.39735427  0.1524961   2.13775147], action=0, reward=1.0, next_state=[-0.10275829 -1.59362083  0.19525113  2.47339509]
[ Experience replay ] starts


[ episode 185 ][ timestamp 7 ] state=[-0.00707392 -1.12734105  0.0463326   1.73795442], action=0, reward=1.0, next_state=[-0.02962074 -1.32295934  0.08109169  2.04468387]
[ Experience replay ] starts
[ episode 185 ][ timestamp 8 ] state=[-0.02962074 -1.32295934  0.08109169  2.04468387], action=0, reward=1.0, next_state=[-0.05607992 -1.51881475  0.12198536  2.36131641]
[ Experience replay ] starts
[ episode 185 ][ timestamp 9 ] state=[-0.05607992 -1.51881475  0.12198536  2.36131641], action=0, reward=1.0, next_state=[-0.08645622 -1.71479458  0.16921169  2.68887649]
[ Experience replay ] starts
[ episode 185 ][ timestamp 10 ] state=[-0.08645622 -1.71479458  0.16921169  2.68887649], action=0, reward=-1.0, next_state=[-0.12075211 -1.9107028   0.22298922  3.028053  ]
[ Experience replay ] starts
[ Ended! ] Episode 185: Exploration_rate=0.01. Score=10.
[ episode 186 ] state=[-0.04080568  0.00208479 -0.02259522 -0.00050439]
[ episode 186 ][ timestamp 1 ] state=[-0.04080568  0.00208479 -0.0225

[ Ended! ] Episode 188: Exploration_rate=0.01. Score=13.
[ episode 189 ] state=[-0.01301399 -0.03763022  0.04787367 -0.00685203]
[ episode 189 ][ timestamp 1 ] state=[-0.01301399 -0.03763022  0.04787367 -0.00685203], action=0, reward=1.0, next_state=[-0.01376659 -0.2334049   0.04773663  0.30054301]
[ Experience replay ] starts
[ episode 189 ][ timestamp 2 ] state=[-0.01376659 -0.2334049   0.04773663  0.30054301], action=0, reward=1.0, next_state=[-0.01843469 -0.4291736   0.05374749  0.60789078]
[ Experience replay ] starts
[ episode 189 ][ timestamp 3 ] state=[-0.01843469 -0.4291736   0.05374749  0.60789078], action=0, reward=1.0, next_state=[-0.02701816 -0.62500416  0.06590531  0.9170066 ]
[ Experience replay ] starts
[ episode 189 ][ timestamp 4 ] state=[-0.02701816 -0.62500416  0.06590531  0.9170066 ], action=0, reward=1.0, next_state=[-0.03951824 -0.82095234  0.08424544  1.22965291]
[ Experience replay ] starts
[ episode 189 ][ timestamp 5 ] state=[-0.03951824 -0.82095234  0.084245

[ episode 191 ][ timestamp 18 ] state=[ 0.00090936 -0.1851509   0.00406523  0.27845842], action=1, reward=1.0, next_state=[-0.00279366  0.00991282  0.0096344  -0.01293957]
[ Experience replay ] starts
[ episode 191 ][ timestamp 19 ] state=[-0.00279366  0.00991282  0.0096344  -0.01293957], action=1, reward=1.0, next_state=[-0.0025954   0.20489529  0.00937561 -0.30256722]
[ Experience replay ] starts
[ episode 191 ][ timestamp 20 ] state=[-0.0025954   0.20489529  0.00937561 -0.30256722], action=0, reward=1.0, next_state=[ 0.0015025   0.00964098  0.00332426 -0.00694224]
[ Experience replay ] starts
[ episode 191 ][ timestamp 21 ] state=[ 0.0015025   0.00964098  0.00332426 -0.00694224], action=1, reward=1.0, next_state=[ 0.00169532  0.2047151   0.00318542 -0.29857447]
[ Experience replay ] starts
[ episode 191 ][ timestamp 22 ] state=[ 0.00169532  0.2047151   0.00318542 -0.29857447], action=1, reward=1.0, next_state=[ 0.00578962  0.3997915  -0.00278607 -0.59025107]
[ Experience replay ] st

[ episode 192 ][ timestamp 11 ] state=[ 0.02933402 -0.37511833  0.05639554  0.65555148], action=0, reward=1.0, next_state=[ 0.02183165 -0.5709782   0.06950657  0.96544571]
[ Experience replay ] starts
[ episode 192 ][ timestamp 12 ] state=[ 0.02183165 -0.5709782   0.06950657  0.96544571], action=1, reward=1.0, next_state=[ 0.01041208 -0.37685523  0.08881548  0.69538282]
[ Experience replay ] starts
[ episode 192 ][ timestamp 13 ] state=[ 0.01041208 -0.37685523  0.08881548  0.69538282], action=0, reward=1.0, next_state=[ 0.00287498 -0.57308942  0.10272314  1.01465137]
[ Experience replay ] starts
[ episode 192 ][ timestamp 14 ] state=[ 0.00287498 -0.57308942  0.10272314  1.01465137], action=0, reward=1.0, next_state=[-0.00858681 -0.76942018  0.12301616  1.33774262]
[ Experience replay ] starts
[ episode 192 ][ timestamp 15 ] state=[-0.00858681 -0.76942018  0.12301616  1.33774262], action=1, reward=1.0, next_state=[-0.02397521 -0.57604353  0.14977102  1.08594525]
[ Experience replay ] st

[ episode 194 ][ timestamp 9 ] state=[-0.01699001  0.0060698  -0.01689316 -0.01550323], action=1, reward=1.0, next_state=[-0.01686861  0.20142989 -0.01720323 -0.31346791]
[ Experience replay ] starts
[ episode 194 ][ timestamp 10 ] state=[-0.01686861  0.20142989 -0.01720323 -0.31346791], action=0, reward=1.0, next_state=[-0.01284001  0.00655718 -0.02347259 -0.0262596 ]
[ Experience replay ] starts
[ episode 194 ][ timestamp 11 ] state=[-0.01284001  0.00655718 -0.02347259 -0.0262596 ], action=1, reward=1.0, next_state=[-0.01270887  0.20200775 -0.02399778 -0.326255  ]
[ Experience replay ] starts
[ episode 194 ][ timestamp 12 ] state=[-0.01270887  0.20200775 -0.02399778 -0.326255  ], action=0, reward=1.0, next_state=[-0.00866872  0.00723554 -0.03052288 -0.04123548]
[ Experience replay ] starts
[ episode 194 ][ timestamp 13 ] state=[-0.00866872  0.00723554 -0.03052288 -0.04123548], action=0, reward=1.0, next_state=[-0.008524   -0.18743572 -0.03134759  0.24166307]
[ Experience replay ] sta

[ episode 194 ][ timestamp 52 ] state=[-0.00918999 -0.15574313 -0.16796008 -0.4636902 ], action=0, reward=1.0, next_state=[-0.01230485 -0.34814298 -0.17723389 -0.22830008]
[ Experience replay ] starts
[ episode 194 ][ timestamp 53 ] state=[-0.01230485 -0.34814298 -0.17723389 -0.22830008], action=1, reward=1.0, next_state=[-0.01926771 -0.15098951 -0.18179989 -0.5712321 ]
[ Experience replay ] starts
[ episode 194 ][ timestamp 54 ] state=[-0.01926771 -0.15098951 -0.18179989 -0.5712321 ], action=1, reward=1.0, next_state=[-0.0222875   0.04615355 -0.19322453 -0.91522851]
[ Experience replay ] starts
[ episode 194 ][ timestamp 55 ] state=[-0.0222875   0.04615355 -0.19322453 -0.91522851], action=0, reward=-1.0, next_state=[-0.02136443 -0.14590448 -0.2115291  -0.68895789]
[ Experience replay ] starts
[ Ended! ] Episode 194: Exploration_rate=0.01. Score=55.
[ episode 195 ] state=[-0.02503787  0.00643576  0.03536584  0.00891365]
[ episode 195 ][ timestamp 1 ] state=[-0.02503787  0.00643576  0.0

[ episode 196 ][ timestamp 9 ] state=[-0.05334946  0.01084294  0.05995442  0.0757598 ], action=0, reward=1.0, next_state=[-0.0531326  -0.18508499  0.06146962  0.38673968]
[ Experience replay ] starts
[ episode 196 ][ timestamp 10 ] state=[-0.0531326  -0.18508499  0.06146962  0.38673968], action=0, reward=1.0, next_state=[-0.0568343  -0.38102321  0.06920441  0.69815262]
[ Experience replay ] starts
[ episode 196 ][ timestamp 11 ] state=[-0.0568343  -0.38102321  0.06920441  0.69815262], action=1, reward=1.0, next_state=[-0.06445477 -0.18692562  0.08316746  0.42803299]
[ Experience replay ] starts
[ episode 196 ][ timestamp 12 ] state=[-0.06445477 -0.18692562  0.08316746  0.42803299], action=1, reward=1.0, next_state=[-0.06819328  0.00692605  0.09172812  0.1626836 ]
[ Experience replay ] starts
[ episode 196 ][ timestamp 13 ] state=[-0.06819328  0.00692605  0.09172812  0.1626836 ], action=1, reward=1.0, next_state=[-0.06805476  0.2006233   0.09498179 -0.09971054]
[ Experience replay ] sta

[ episode 197 ][ timestamp 26 ] state=[-0.01068126 -0.17524914  0.04611311  0.36503865], action=0, reward=1.0, next_state=[-0.01418625 -0.37099505  0.05341389  0.67189785]
[ Experience replay ] starts
[ episode 197 ][ timestamp 27 ] state=[-0.01418625 -0.37099505  0.05341389  0.67189785], action=0, reward=1.0, next_state=[-0.02160615 -0.56681722  0.06685184  0.98090841]
[ Experience replay ] starts
[ episode 197 ][ timestamp 28 ] state=[-0.02160615 -0.56681722  0.06685184  0.98090841], action=0, reward=1.0, next_state=[-0.03294249 -0.76276838  0.08647001  1.29381839]
[ Experience replay ] starts
[ episode 197 ][ timestamp 29 ] state=[-0.03294249 -0.76276838  0.08647001  1.29381839], action=1, reward=1.0, next_state=[-0.04819786 -0.56884516  0.11234638  1.02941087]
[ Experience replay ] starts
[ episode 197 ][ timestamp 30 ] state=[-0.04819786 -0.56884516  0.11234638  1.02941087], action=0, reward=1.0, next_state=[-0.05957476 -0.76526825  0.1329346   1.35514847]
[ Experience replay ] st

[ episode 198 ][ timestamp 34 ] state=[-0.02203629 -0.18220369  0.0254614   0.23795754], action=0, reward=1.0, next_state=[-0.02568037 -0.37767996  0.03022055  0.53856175]
[ Experience replay ] starts
[ episode 198 ][ timestamp 35 ] state=[-0.02568037 -0.37767996  0.03022055  0.53856175], action=1, reward=1.0, next_state=[-0.03323397 -0.1829956   0.04099179  0.25555206]
[ Experience replay ] starts
[ episode 198 ][ timestamp 36 ] state=[-0.03323397 -0.1829956   0.04099179  0.25555206], action=0, reward=1.0, next_state=[-0.03689388 -0.37867812  0.04610283  0.56087747]
[ Experience replay ] starts
[ episode 198 ][ timestamp 37 ] state=[-0.03689388 -0.37867812  0.04610283  0.56087747], action=0, reward=1.0, next_state=[-0.04446744 -0.57441575  0.05732038  0.86772138]
[ Experience replay ] starts
[ episode 198 ][ timestamp 38 ] state=[-0.04446744 -0.57441575  0.05732038  0.86772138], action=0, reward=1.0, next_state=[-0.05595576 -0.77026885  0.0746748   1.17786149]
[ Experience replay ] st

[ episode 200 ][ timestamp 6 ] state=[-0.00914199 -0.16163937  0.01012198  0.27328494], action=1, reward=1.0, next_state=[-0.01237477  0.0333367   0.01558768 -0.01618838]
[ Experience replay ] starts
[ episode 200 ][ timestamp 7 ] state=[-0.01237477  0.0333367   0.01558768 -0.01618838], action=0, reward=1.0, next_state=[-0.01170804 -0.16200528  0.01526392  0.2813716 ]
[ Experience replay ] starts
[ episode 200 ][ timestamp 8 ] state=[-0.01170804 -0.16200528  0.01526392  0.2813716 ], action=1, reward=1.0, next_state=[-0.01494815  0.03289565  0.02089135 -0.00645833]
[ Experience replay ] starts
[ episode 200 ][ timestamp 9 ] state=[-0.01494815  0.03289565  0.02089135 -0.00645833], action=1, reward=1.0, next_state=[-0.01429023  0.22771186  0.02076218 -0.29247726]
[ Experience replay ] starts
[ episode 200 ][ timestamp 10 ] state=[-0.01429023  0.22771186  0.02076218 -0.29247726], action=0, reward=1.0, next_state=[-0.009736    0.03230013  0.01491264  0.0066808 ]
[ Experience replay ] starts

[ episode 201 ][ timestamp 25 ] state=[-0.07646127 -0.79849816 -0.00759464  1.00711529], action=1, reward=1.0, next_state=[-0.09243123 -0.60327563  0.01254766  0.71205714]
[ Experience replay ] starts
[ episode 201 ][ timestamp 26 ] state=[-0.09243123 -0.60327563  0.01254766  0.71205714], action=0, reward=1.0, next_state=[-0.10449675 -0.79856906  0.0267888   1.00866314]
[ Experience replay ] starts
[ episode 201 ][ timestamp 27 ] state=[-0.10449675 -0.79856906  0.0267888   1.00866314], action=1, reward=1.0, next_state=[-0.12046813 -0.60381475  0.04696207  0.72451146]
[ Experience replay ] starts
[ episode 201 ][ timestamp 28 ] state=[-0.12046813 -0.60381475  0.04696207  0.72451146], action=0, reward=1.0, next_state=[-0.13254442 -0.79955362  0.0614523   1.03159782]
[ Experience replay ] starts
[ episode 201 ][ timestamp 29 ] state=[-0.13254442 -0.79955362  0.0614523   1.03159782], action=0, reward=1.0, next_state=[-0.14853549 -0.99543685  0.08208425  1.34292364]
[ Experience replay ] st

[ episode 202 ][ timestamp 30 ] state=[-0.14105671 -0.57697029 -0.1852488  -0.36610808], action=1, reward=1.0, next_state=[-0.15259611 -0.37976559 -0.19257096 -0.71100617]
[ Experience replay ] starts
[ episode 202 ][ timestamp 31 ] state=[-0.15259611 -0.37976559 -0.19257096 -0.71100617], action=0, reward=1.0, next_state=[-0.16019143 -0.57177337 -0.20679109 -0.48458487]
[ Experience replay ] starts
[ episode 202 ][ timestamp 32 ] state=[-0.16019143 -0.57177337 -0.20679109 -0.48458487], action=0, reward=-1.0, next_state=[-0.17162689 -0.76346919 -0.21648279 -0.26353152]
[ Experience replay ] starts
[ Ended! ] Episode 202: Exploration_rate=0.01. Score=32.
[ episode 203 ] state=[-0.00711852 -0.04237722 -0.02606771  0.04374384]
[ episode 203 ][ timestamp 1 ] state=[-0.00711852 -0.04237722 -0.02606771  0.04374384], action=0, reward=1.0, next_state=[-0.00796607 -0.23711586 -0.02519283  0.32808952]
[ Experience replay ] starts
[ episode 203 ][ timestamp 2 ] state=[-0.00796607 -0.23711586 -0.02

[ episode 205 ][ timestamp 2 ] state=[-0.03121622 -0.23794799  0.00366968  0.34117454], action=1, reward=1.0, next_state=[-0.03597518 -0.04287844  0.01049318  0.04965107]
[ Experience replay ] starts
[ episode 205 ][ timestamp 3 ] state=[-0.03597518 -0.04287844  0.01049318  0.04965107], action=1, reward=1.0, next_state=[-0.03683275  0.15209149  0.0114862  -0.23970278]
[ Experience replay ] starts
[ episode 205 ][ timestamp 4 ] state=[-0.03683275  0.15209149  0.0114862  -0.23970278], action=1, reward=1.0, next_state=[-0.03379092  0.34704748  0.00669214 -0.52874062]
[ Experience replay ] starts
[ episode 205 ][ timestamp 5 ] state=[-0.03379092  0.34704748  0.00669214 -0.52874062], action=1, reward=1.0, next_state=[-0.02684997  0.54207465 -0.00388267 -0.81930734]
[ Experience replay ] starts
[ episode 205 ][ timestamp 6 ] state=[-0.02684997  0.54207465 -0.00388267 -0.81930734], action=0, reward=1.0, next_state=[-0.01600848  0.34700605 -0.02026882 -0.52784816]
[ Experience replay ] starts


[ episode 207 ][ timestamp 5 ] state=[-0.02456684  0.05123973 -0.02020414  0.00267539], action=0, reward=1.0, next_state=[-0.02354204 -0.14358673 -0.02015063  0.28891581]
[ Experience replay ] starts
[ episode 207 ][ timestamp 6 ] state=[-0.02354204 -0.14358673 -0.02015063  0.28891581], action=1, reward=1.0, next_state=[-0.02641378  0.05181669 -0.01437231 -0.01005369]
[ Experience replay ] starts
[ episode 207 ][ timestamp 7 ] state=[-0.02641378  0.05181669 -0.01437231 -0.01005369], action=0, reward=1.0, next_state=[-0.02537744 -0.14309623 -0.01457339  0.27806017]
[ Experience replay ] starts
[ episode 207 ][ timestamp 8 ] state=[-0.02537744 -0.14309623 -0.01457339  0.27806017], action=0, reward=1.0, next_state=[-0.02823937 -0.33800727 -0.00901218  0.56611127]
[ Experience replay ] starts
[ episode 207 ][ timestamp 9 ] state=[-0.02823937 -0.33800727 -0.00901218  0.56611127], action=1, reward=1.0, next_state=[-0.03499951 -0.14276006  0.00231004  0.2706028 ]
[ Experience replay ] starts


[ episode 209 ][ timestamp 4 ] state=[ 0.01107766  0.22805932 -0.05282033 -0.36881005], action=1, reward=1.0, next_state=[ 0.01563884  0.42389044 -0.06019653 -0.67766901]
[ Experience replay ] starts
[ episode 209 ][ timestamp 5 ] state=[ 0.01563884  0.42389044 -0.06019653 -0.67766901], action=0, reward=1.0, next_state=[ 0.02411665  0.2296542  -0.07374991 -0.40452945]
[ Experience replay ] starts
[ episode 209 ][ timestamp 6 ] state=[ 0.02411665  0.2296542  -0.07374991 -0.40452945], action=0, reward=1.0, next_state=[ 0.02870974  0.0356514  -0.0818405  -0.13597912]
[ Experience replay ] starts
[ episode 209 ][ timestamp 7 ] state=[ 0.02870974  0.0356514  -0.0818405  -0.13597912], action=1, reward=1.0, next_state=[ 0.02942276  0.23184442 -0.08456008 -0.4533179 ]
[ Experience replay ] starts
[ episode 209 ][ timestamp 8 ] state=[ 0.02942276  0.23184442 -0.08456008 -0.4533179 ], action=0, reward=1.0, next_state=[ 0.03405965  0.03801365 -0.09362644 -0.18844164]
[ Experience replay ] starts


[ episode 210 ][ timestamp 31 ] state=[-0.05132607 -0.32518895 -0.06857887  0.04032143], action=1, reward=1.0, next_state=[-0.05782985 -0.12915402 -0.06777244 -0.27318615]
[ Experience replay ] starts
[ episode 210 ][ timestamp 32 ] state=[-0.05782985 -0.12915402 -0.06777244 -0.27318615], action=0, reward=1.0, next_state=[-0.06041293 -0.32324679 -0.07323616 -0.0026252 ]
[ Experience replay ] starts
[ episode 210 ][ timestamp 33 ] state=[-0.06041293 -0.32324679 -0.07323616 -0.0026252 ], action=1, reward=1.0, next_state=[-0.06687787 -0.1271551  -0.07328867 -0.31748648]
[ Experience replay ] starts
[ episode 210 ][ timestamp 34 ] state=[-0.06687787 -0.1271551  -0.07328867 -0.31748648], action=1, reward=1.0, next_state=[-0.06942097  0.06893003 -0.0796384  -0.6323522 ]
[ Experience replay ] starts
[ episode 210 ][ timestamp 35 ] state=[-0.06942097  0.06893003 -0.0796384  -0.6323522 ], action=1, reward=1.0, next_state=[-0.06804237  0.26506743 -0.09228544 -0.94901476]
[ Experience replay ] st

[ episode 212 ][ timestamp 6 ] state=[-0.03752525 -0.57605861 -0.00121679  0.88063928], action=0, reward=1.0, next_state=[-0.04904642 -0.77116401  0.016396    1.17293943]
[ Experience replay ] starts
[ episode 212 ][ timestamp 7 ] state=[-0.04904642 -0.77116401  0.016396    1.17293943], action=0, reward=1.0, next_state=[-0.0644697  -0.96649522  0.03985479  1.47071706]
[ Experience replay ] starts
[ episode 212 ][ timestamp 8 ] state=[-0.0644697  -0.96649522  0.03985479  1.47071706], action=0, reward=1.0, next_state=[-0.08379961 -1.16208134  0.06926913  1.77557747]
[ Experience replay ] starts
[ episode 212 ][ timestamp 9 ] state=[-0.08379961 -1.16208134  0.06926913  1.77557747], action=1, reward=1.0, next_state=[-0.10704123 -0.96780484  0.10478068  1.50521043]
[ Experience replay ] starts
[ episode 212 ][ timestamp 10 ] state=[-0.10704123 -0.96780484  0.10478068  1.50521043], action=1, reward=1.0, next_state=[-0.12639733 -0.77409849  0.13488489  1.24699365]
[ Experience replay ] starts

[ episode 214 ][ timestamp 15 ] state=[ 0.10429579 -0.39310934 -0.04332495  0.53055886], action=1, reward=1.0, next_state=[ 0.0964336  -0.19740558 -0.03271378  0.22454513]
[ Experience replay ] starts
[ episode 214 ][ timestamp 16 ] state=[ 0.0964336  -0.19740558 -0.03271378  0.22454513], action=1, reward=1.0, next_state=[ 0.09248549 -0.00183172 -0.02822287 -0.07827483]
[ Experience replay ] starts
[ episode 214 ][ timestamp 17 ] state=[ 0.09248549 -0.00183172 -0.02822287 -0.07827483], action=1, reward=1.0, next_state=[ 0.09244885  0.19368322 -0.02978837 -0.37972686]
[ Experience replay ] starts
[ episode 214 ][ timestamp 18 ] state=[ 0.09244885  0.19368322 -0.02978837 -0.37972686], action=1, reward=1.0, next_state=[ 0.09632252  0.38921524 -0.03738291 -0.68165127]
[ Experience replay ] starts
[ episode 214 ][ timestamp 19 ] state=[ 0.09632252  0.38921524 -0.03738291 -0.68165127], action=1, reward=1.0, next_state=[ 0.10410682  0.58483588 -0.05101593 -0.98586523]
[ Experience replay ] st

[ episode 214 ][ timestamp 59 ] state=[-0.27764172 -0.36504426  0.0942225  -0.08951961], action=0, reward=1.0, next_state=[-0.2849426  -0.56138162  0.09243211  0.23134054]
[ Experience replay ] starts
[ episode 214 ][ timestamp 60 ] state=[-0.2849426  -0.56138162  0.09243211  0.23134054], action=0, reward=1.0, next_state=[-0.29617023 -0.75769445  0.09705892  0.55168912]
[ Experience replay ] starts
[ episode 214 ][ timestamp 61 ] state=[-0.29617023 -0.75769445  0.09705892  0.55168912], action=1, reward=1.0, next_state=[-0.31132412 -0.56406002  0.1080927   0.29109503]
[ Experience replay ] starts
[ episode 214 ][ timestamp 62 ] state=[-0.31132412 -0.56406002  0.1080927   0.29109503], action=1, reward=1.0, next_state=[-0.32260532 -0.37063203  0.1139146   0.03436382]
[ Experience replay ] starts
[ episode 214 ][ timestamp 63 ] state=[-0.32260532 -0.37063203  0.1139146   0.03436382], action=1, reward=1.0, next_state=[-0.33001796 -0.17731232  0.11460188 -0.22031781]
[ Experience replay ] st

[ episode 215 ][ timestamp 19 ] state=[-0.0125696   0.05233257  0.01604537 -0.10374546], action=1, reward=1.0, next_state=[-0.01152294  0.24722094  0.01397047 -0.39132325]
[ Experience replay ] starts
[ episode 215 ][ timestamp 20 ] state=[-0.01152294  0.24722094  0.01397047 -0.39132325], action=0, reward=1.0, next_state=[-0.00657853  0.05190354  0.006144   -0.09426855]
[ Experience replay ] starts
[ episode 215 ][ timestamp 21 ] state=[-0.00657853  0.05190354  0.006144   -0.09426855], action=0, reward=1.0, next_state=[-0.00554045 -0.14330593  0.00425863  0.20034645]
[ Experience replay ] starts
[ episode 215 ][ timestamp 22 ] state=[-0.00554045 -0.14330593  0.00425863  0.20034645], action=1, reward=1.0, next_state=[-0.00840657  0.05175485  0.00826556 -0.09099004]
[ Experience replay ] starts
[ episode 215 ][ timestamp 23 ] state=[-0.00840657  0.05175485  0.00826556 -0.09099004], action=0, reward=1.0, next_state=[-0.00737148 -0.14348459  0.00644576  0.20428917]
[ Experience replay ] st

[ episode 216 ][ timestamp 12 ] state=[-0.02276156 -0.18015607  0.05294966  0.23789878], action=1, reward=1.0, next_state=[-0.02636468  0.01417102  0.05770764 -0.0376234 ]
[ Experience replay ] starts
[ episode 216 ][ timestamp 13 ] state=[-0.02636468  0.01417102  0.05770764 -0.0376234 ], action=1, reward=1.0, next_state=[-0.02608126  0.20841999  0.05695517 -0.31155521]
[ Experience replay ] starts
[ episode 216 ][ timestamp 14 ] state=[-0.02608126  0.20841999  0.05695517 -0.31155521], action=0, reward=1.0, next_state=[-0.02191286  0.01253482  0.05072407 -0.00146812]
[ Experience replay ] starts
[ episode 216 ][ timestamp 15 ] state=[-0.02191286  0.01253482  0.05072407 -0.00146812], action=1, reward=1.0, next_state=[-0.02166217  0.206894    0.05069471 -0.27772544]
[ Experience replay ] starts
[ episode 216 ][ timestamp 16 ] state=[-0.02166217  0.206894    0.05069471 -0.27772544], action=1, reward=1.0, next_state=[-0.01752429  0.40125746  0.0451402  -0.55399822]
[ Experience replay ] st

[ episode 217 ][ timestamp 25 ] state=[ 0.07081007  0.43191522 -0.17904199 -1.00465531], action=0, reward=1.0, next_state=[ 0.07944837  0.23957745 -0.19913509 -0.77311806]
[ Experience replay ] starts
[ episode 217 ][ timestamp 26 ] state=[ 0.07944837  0.23957745 -0.19913509 -0.77311806], action=1, reward=-1.0, next_state=[ 0.08423992  0.4368003  -0.21459745 -1.12126561]
[ Experience replay ] starts
[ Ended! ] Episode 217: Exploration_rate=0.01. Score=26.
[ episode 218 ] state=[-0.02431798 -0.00383046 -0.01255144  0.03690203]
[ episode 218 ][ timestamp 1 ] state=[-0.02431798 -0.00383046 -0.01255144  0.03690203], action=1, reward=1.0, next_state=[-0.02439459  0.19146922 -0.0118134  -0.25971443]
[ Experience replay ] starts
[ episode 218 ][ timestamp 2 ] state=[-0.02439459  0.19146922 -0.0118134  -0.25971443], action=1, reward=1.0, next_state=[-0.02056521  0.3867578  -0.01700769 -0.55609993]
[ Experience replay ] starts
[ episode 218 ][ timestamp 3 ] state=[-0.02056521  0.3867578  -0.017

[ episode 218 ][ timestamp 42 ] state=[-0.02131305  0.17779964  0.04251044  0.04181625], action=1, reward=1.0, next_state=[-0.01775706  0.37228704  0.04334676 -0.23715698]
[ Experience replay ] starts
[ episode 218 ][ timestamp 43 ] state=[-0.01775706  0.37228704  0.04334676 -0.23715698], action=1, reward=1.0, next_state=[-0.01031132  0.56676377  0.03860362 -0.5158581 ]
[ Experience replay ] starts
[ episode 218 ][ timestamp 44 ] state=[-0.01031132  0.56676377  0.03860362 -0.5158581 ], action=0, reward=1.0, next_state=[ 0.00102396  0.37112008  0.02828646 -0.21126456]
[ Experience replay ] starts
[ episode 218 ][ timestamp 45 ] state=[ 0.00102396  0.37112008  0.02828646 -0.21126456], action=1, reward=1.0, next_state=[ 0.00844636  0.56582641  0.02406117 -0.49489212]
[ Experience replay ] starts
[ episode 218 ][ timestamp 46 ] state=[ 0.00844636  0.56582641  0.02406117 -0.49489212], action=0, reward=1.0, next_state=[ 0.01976289  0.37037355  0.01416333 -0.19472438]
[ Experience replay ] st

[ episode 219 ][ timestamp 27 ] state=[-0.08401096  0.73501558  0.05140626 -1.12592888], action=1, reward=1.0, next_state=[-0.06931065  0.92942758  0.02888769 -1.40205486]
[ Experience replay ] starts
[ episode 219 ][ timestamp 28 ] state=[-0.06931065  0.92942758  0.02888769 -1.40205486], action=1, reward=1.0, next_state=[-5.07220951e-02  1.12417898e+00  8.46588413e-04 -1.68556828e+00]
[ Experience replay ] starts
[ episode 219 ][ timestamp 29 ] state=[-5.07220951e-02  1.12417898e+00  8.46588413e-04 -1.68556828e+00], action=1, reward=1.0, next_state=[-0.02823852  1.31929112 -0.03286478 -1.97798749]
[ Experience replay ] starts
[ episode 219 ][ timestamp 30 ] state=[-0.02823852  1.31929112 -0.03286478 -1.97798749], action=1, reward=1.0, next_state=[-1.85269305e-03  1.51474320e+00 -7.24245270e-02 -2.28066780e+00]
[ Experience replay ] starts
[ episode 219 ][ timestamp 31 ] state=[-1.85269305e-03  1.51474320e+00 -7.24245270e-02 -2.28066780e+00], action=1, reward=1.0, next_state=[ 0.028442

[ episode 222 ][ timestamp 2 ] state=[ 0.01656639 -0.21276465 -0.030592    0.28342765], action=1, reward=1.0, next_state=[ 0.01231109 -0.01722002 -0.02492345 -0.0187447 ]
[ Experience replay ] starts
[ episode 222 ][ timestamp 3 ] state=[ 0.01231109 -0.01722002 -0.02492345 -0.0187447 ], action=0, reward=1.0, next_state=[ 0.01196669 -0.21197584 -0.02529834  0.26597157]
[ Experience replay ] starts
[ episode 222 ][ timestamp 4 ] state=[ 0.01196669 -0.21197584 -0.02529834  0.26597157], action=1, reward=1.0, next_state=[ 0.00772718 -0.01650212 -0.01997891 -0.0345821 ]
[ Experience replay ] starts
[ episode 222 ][ timestamp 5 ] state=[ 0.00772718 -0.01650212 -0.01997891 -0.0345821 ], action=0, reward=1.0, next_state=[ 0.00739713 -0.21133196 -0.02067055  0.25173092]
[ Experience replay ] starts
[ episode 222 ][ timestamp 6 ] state=[ 0.00739713 -0.21133196 -0.02067055  0.25173092], action=1, reward=1.0, next_state=[ 0.00317049 -0.01592103 -0.01563593 -0.04739956]
[ Experience replay ] starts


[ episode 222 ][ timestamp 47 ] state=[-0.03227916 -0.36759031 -0.19531605 -0.31865612], action=1, reward=1.0, next_state=[-0.03963096 -0.1703009  -0.20168917 -0.66602197]
[ Experience replay ] starts
[ episode 222 ][ timestamp 48 ] state=[-0.03963096 -0.1703009  -0.20168917 -0.66602197], action=0, reward=-1.0, next_state=[-0.04303698 -0.36213155 -0.21500961 -0.44300413]
[ Experience replay ] starts
[ Ended! ] Episode 222: Exploration_rate=0.01. Score=48.
[ episode 223 ] state=[ 0.00203269 -0.0416713   0.00070437 -0.01157174]
[ episode 223 ][ timestamp 1 ] state=[ 0.00203269 -0.0416713   0.00070437 -0.01157174], action=1, reward=1.0, next_state=[ 0.00119926  0.15344054  0.00047294 -0.30403235]
[ Experience replay ] starts
[ episode 223 ][ timestamp 2 ] state=[ 0.00119926  0.15344054  0.00047294 -0.30403235], action=1, reward=1.0, next_state=[ 0.00426807  0.34855575 -0.00560771 -0.59656608]
[ Experience replay ] starts
[ episode 223 ][ timestamp 3 ] state=[ 0.00426807  0.34855575 -0.005

[ episode 223 ][ timestamp 42 ] state=[-0.19688769 -0.1892413  -0.06487682 -0.76888424], action=1, reward=1.0, next_state=[-0.20067251  0.00671081 -0.0802545  -1.08125447]
[ Experience replay ] starts
[ episode 223 ][ timestamp 43 ] state=[-0.20067251  0.00671081 -0.0802545  -1.08125447], action=1, reward=1.0, next_state=[-0.2005383   0.20279514 -0.10187959 -1.39800377]
[ Experience replay ] starts
[ episode 223 ][ timestamp 44 ] state=[-0.2005383   0.20279514 -0.10187959 -1.39800377], action=0, reward=1.0, next_state=[-0.19648239  0.0090769  -0.12983967 -1.13883393]
[ Experience replay ] starts
[ episode 223 ][ timestamp 45 ] state=[-0.19648239  0.0090769  -0.12983967 -1.13883393], action=0, reward=1.0, next_state=[-0.19630086 -0.18413065 -0.15261635 -0.88952774]
[ Experience replay ] starts
[ episode 223 ][ timestamp 46 ] state=[-0.19630086 -0.18413065 -0.15261635 -0.88952774], action=0, reward=1.0, next_state=[-0.19998347 -0.37688908 -0.1704069  -0.64844606]
[ Experience replay ] st

[ episode 226 ][ timestamp 10 ] state=[ 0.18345329  1.72983943 -0.19120604 -2.69075908], action=1, reward=-1.0, next_state=[ 0.21805008  1.92577703 -0.24502123 -3.03518192]
[ Experience replay ] starts
[ Ended! ] Episode 226: Exploration_rate=0.01. Score=10.
[ episode 227 ] state=[-0.00799127 -0.04159598 -0.01770179 -0.00688262]
[ episode 227 ][ timestamp 1 ] state=[-0.00799127 -0.04159598 -0.01770179 -0.00688262], action=1, reward=1.0, next_state=[-0.00882319  0.15377531 -0.01783944 -0.30509769]
[ Experience replay ] starts
[ episode 227 ][ timestamp 2 ] state=[-0.00882319  0.15377531 -0.01783944 -0.30509769], action=1, reward=1.0, next_state=[-0.00574769  0.34914688 -0.0239414  -0.60335294]
[ Experience replay ] starts
[ episode 227 ][ timestamp 3 ] state=[-0.00574769  0.34914688 -0.0239414  -0.60335294], action=1, reward=1.0, next_state=[ 0.00123525  0.54459536 -0.03600846 -0.90347973]
[ Experience replay ] starts
[ episode 227 ][ timestamp 4 ] state=[ 0.00123525  0.54459536 -0.0360

[ episode 231 ][ timestamp 4 ] state=[ 0.05434832  0.57446003  0.02345239 -0.80396527], action=1, reward=1.0, next_state=[ 0.06583752  0.76925271  0.00737309 -1.08917957]
[ Experience replay ] starts
[ episode 231 ][ timestamp 5 ] state=[ 0.06583752  0.76925271  0.00737309 -1.08917957], action=1, reward=1.0, next_state=[ 0.08122258  0.96427668 -0.01441051 -1.37953991]
[ Experience replay ] starts
[ episode 231 ][ timestamp 6 ] state=[ 0.08122258  0.96427668 -0.01441051 -1.37953991], action=0, reward=1.0, next_state=[ 0.10050811  0.76933758 -0.0420013  -1.09139815]
[ Experience replay ] starts
[ episode 231 ][ timestamp 7 ] state=[ 0.10050811  0.76933758 -0.0420013  -1.09139815], action=0, reward=1.0, next_state=[ 0.11589486  0.57479358 -0.06382927 -0.81218427]
[ Experience replay ] starts
[ episode 231 ][ timestamp 8 ] state=[ 0.11589486  0.57479358 -0.06382927 -0.81218427], action=0, reward=1.0, next_state=[ 0.12739073  0.38060131 -0.08007295 -0.54024211]
[ Experience replay ] starts


[ episode 232 ][ timestamp 25 ] state=[-0.04888299 -0.75997769 -0.04238129  0.48213105], action=0, reward=1.0, next_state=[-0.06408254 -0.95447662 -0.03273867  0.7611611 ]
[ Experience replay ] starts
[ episode 232 ][ timestamp 26 ] state=[-0.06408254 -0.95447662 -0.03273867  0.7611611 ], action=0, reward=1.0, next_state=[-0.08317207 -1.14913263 -0.01751545  1.0433652 ]
[ Experience replay ] starts
[ episode 232 ][ timestamp 27 ] state=[-0.08317207 -1.14913263 -0.01751545  1.0433652 ], action=1, reward=1.0, next_state=[-0.10615473 -0.95378252  0.00335186  0.7452357 ]
[ Experience replay ] starts
[ episode 232 ][ timestamp 28 ] state=[-0.10615473 -0.95378252  0.00335186  0.7452357 ], action=1, reward=1.0, next_state=[-0.12523038 -0.75870698  0.01825657  0.45360949]
[ Experience replay ] starts
[ episode 232 ][ timestamp 29 ] state=[-0.12523038 -0.75870698  0.01825657  0.45360949], action=1, reward=1.0, next_state=[-0.14040452 -0.56384788  0.02732876  0.16673668]
[ Experience replay ] st

[ episode 233 ][ timestamp 9 ] state=[-0.02852376  0.00664325  0.03340199  0.05037578], action=1, reward=1.0, next_state=[-0.02839089  0.20127072  0.03440951 -0.23158422]
[ Experience replay ] starts
[ episode 233 ][ timestamp 10 ] state=[-0.02839089  0.20127072  0.03440951 -0.23158422], action=0, reward=1.0, next_state=[-0.02436548  0.00567442  0.02977782  0.07175096]
[ Experience replay ] starts
[ episode 233 ][ timestamp 11 ] state=[-0.02436548  0.00567442  0.02977782  0.07175096], action=1, reward=1.0, next_state=[-0.02425199  0.20035709  0.03121284 -0.21139019]
[ Experience replay ] starts
[ episode 233 ][ timestamp 12 ] state=[-0.02425199  0.20035709  0.03121284 -0.21139019], action=0, reward=1.0, next_state=[-0.02024485  0.00480308  0.02698504  0.09097302]
[ Experience replay ] starts
[ episode 233 ][ timestamp 13 ] state=[-0.02024485  0.00480308  0.02698504  0.09097302], action=0, reward=1.0, next_state=[-0.02014879 -0.19069505  0.0288045   0.3920461 ]
[ Experience replay ] sta

[ episode 233 ][ timestamp 55 ] state=[-0.0093754   0.01078801  0.03285136 -0.0410992 ], action=1, reward=1.0, next_state=[-0.00915964  0.20542385  0.03202938 -0.32323887]
[ Experience replay ] starts
[ episode 233 ][ timestamp 56 ] state=[-0.00915964  0.20542385  0.03202938 -0.32323887], action=0, reward=1.0, next_state=[-0.00505116  0.00986081  0.0255646  -0.02062973]
[ Experience replay ] starts
[ episode 233 ][ timestamp 57 ] state=[-0.00505116  0.00986081  0.0255646  -0.02062973], action=1, reward=1.0, next_state=[-0.00485395  0.20460699  0.02515201 -0.30513837]
[ Experience replay ] starts
[ episode 233 ][ timestamp 58 ] state=[-0.00485395  0.20460699  0.02515201 -0.30513837], action=0, reward=1.0, next_state=[-0.00076181  0.0091358   0.01904924 -0.00463042]
[ Experience replay ] starts
[ episode 233 ][ timestamp 59 ] state=[-0.00076181  0.0091358   0.01904924 -0.00463042], action=1, reward=1.0, next_state=[-0.00057909  0.20397945  0.01895663 -0.29124273]
[ Experience replay ] st

[ episode 233 ][ timestamp 97 ] state=[-0.11174583 -0.7293662  -0.03040813  0.24029012], action=0, reward=1.0, next_state=[-0.12633315 -0.92404086 -0.02560232  0.52322851]
[ Experience replay ] starts
[ episode 233 ][ timestamp 98 ] state=[-0.12633315 -0.92404086 -0.02560232  0.52322851], action=1, reward=1.0, next_state=[-0.14481397 -0.72856811 -0.01513775  0.22258921]
[ Experience replay ] starts
[ episode 233 ][ timestamp 99 ] state=[-0.14481397 -0.72856811 -0.01513775  0.22258921], action=0, reward=1.0, next_state=[-0.15938533 -0.92347046 -0.01068597  0.51045891]
[ Experience replay ] starts
[ episode 233 ][ timestamp 100 ] state=[-0.15938533 -0.92347046 -0.01068597  0.51045891], action=0, reward=1.0, next_state=[-1.77854743e-01 -1.11844026e+00 -4.76790286e-04  7.99755298e-01]
[ Experience replay ] starts
[ episode 233 ][ timestamp 101 ] state=[-1.77854743e-01 -1.11844026e+00 -4.76790286e-04  7.99755298e-01], action=1, reward=1.0, next_state=[-0.20022355 -0.92331177  0.01551832  0.

[ episode 234 ][ timestamp 11 ] state=[ 0.00179467  0.032847   -0.0323036  -0.16959966], action=1, reward=1.0, next_state=[ 0.00245161  0.22841608 -0.03569559 -0.47229583]
[ Experience replay ] starts
[ episode 234 ][ timestamp 12 ] state=[ 0.00245161  0.22841608 -0.03569559 -0.47229583], action=0, reward=1.0, next_state=[ 0.00701993  0.03381599 -0.04514151 -0.19107392]
[ Experience replay ] starts
[ episode 234 ][ timestamp 13 ] state=[ 0.00701993  0.03381599 -0.04514151 -0.19107392], action=0, reward=1.0, next_state=[ 0.00769625 -0.16063208 -0.04896299  0.08703396]
[ Experience replay ] starts
[ episode 234 ][ timestamp 14 ] state=[ 0.00769625 -0.16063208 -0.04896299  0.08703396], action=1, reward=1.0, next_state=[ 0.00448361  0.03515627 -0.04722231 -0.22068597]
[ Experience replay ] starts
[ episode 234 ][ timestamp 15 ] state=[ 0.00448361  0.03515627 -0.04722231 -0.22068597], action=0, reward=1.0, next_state=[ 0.00518674 -0.15925999 -0.05163603  0.05673514]
[ Experience replay ] st

[ episode 234 ][ timestamp 56 ] state=[-0.04872799  1.02006042 -0.18393158 -1.91509004], action=1, reward=-1.0, next_state=[-0.02832678  1.21662558 -0.22223338 -2.25873585]
[ Experience replay ] starts
[ Ended! ] Episode 234: Exploration_rate=0.01. Score=56.
[ episode 235 ] state=[-0.00834208  0.02232874  0.0339267   0.04692574]
[ episode 235 ][ timestamp 1 ] state=[-0.00834208  0.02232874  0.0339267   0.04692574], action=1, reward=1.0, next_state=[-0.0078955   0.2169482   0.03486521 -0.23486292]
[ Experience replay ] starts
[ episode 235 ][ timestamp 2 ] state=[-0.0078955   0.2169482   0.03486521 -0.23486292], action=1, reward=1.0, next_state=[-0.00355654  0.41155511  0.03016796 -0.51634759]
[ Experience replay ] starts
[ episode 235 ][ timestamp 3 ] state=[-0.00355654  0.41155511  0.03016796 -0.51634759], action=1, reward=1.0, next_state=[ 0.00467456  0.60623955  0.019841   -0.79937334]
[ Experience replay ] starts
[ episode 235 ][ timestamp 4 ] state=[ 0.00467456  0.60623955  0.0198

[ episode 235 ][ timestamp 42 ] state=[-0.07004817 -1.67671334 -0.08772807  1.43049433], action=0, reward=1.0, next_state=[-0.10358244 -1.87064957 -0.05911818  1.69452099]
[ Experience replay ] starts
[ episode 235 ][ timestamp 43 ] state=[-0.10358244 -1.87064957 -0.05911818  1.69452099], action=0, reward=1.0, next_state=[-0.14099543 -2.06504152 -0.02522776  1.96822888]
[ Experience replay ] starts
[ episode 235 ][ timestamp 44 ] state=[-0.14099543 -2.06504152 -0.02522776  1.96822888], action=0, reward=1.0, next_state=[-0.18229626 -2.25988808  0.01413681  2.25298956]
[ Experience replay ] starts
[ episode 235 ][ timestamp 45 ] state=[-0.18229626 -2.25988808  0.01413681  2.25298956], action=1, reward=1.0, next_state=[-0.22749402 -2.06490169  0.05919661  1.96469528]
[ Experience replay ] starts
[ episode 235 ][ timestamp 46 ] state=[-0.22749402 -2.06490169  0.05919661  1.96469528], action=1, reward=1.0, next_state=[-0.26879205 -1.87045373  0.09849051  1.69092787]
[ Experience replay ] st

[ episode 236 ][ timestamp 32 ] state=[-0.0153213   0.23262683 -0.10635571 -0.63722432], action=0, reward=1.0, next_state=[-0.01066877  0.03913619 -0.11910019 -0.37983798]
[ Experience replay ] starts
[ episode 236 ][ timestamp 33 ] state=[-0.01066877  0.03913619 -0.11910019 -0.37983798], action=1, reward=1.0, next_state=[-0.00988604  0.23573029 -0.12669695 -0.70757285]
[ Experience replay ] starts
[ episode 236 ][ timestamp 34 ] state=[-0.00988604  0.23573029 -0.12669695 -0.70757285], action=1, reward=1.0, next_state=[-0.00517144  0.43235845 -0.14084841 -1.03730035]
[ Experience replay ] starts
[ episode 236 ][ timestamp 35 ] state=[-0.00517144  0.43235845 -0.14084841 -1.03730035], action=1, reward=1.0, next_state=[ 0.00347573  0.62904277 -0.16159442 -1.37067792]
[ Experience replay ] starts
[ episode 236 ][ timestamp 36 ] state=[ 0.00347573  0.62904277 -0.16159442 -1.37067792], action=1, reward=1.0, next_state=[ 0.01605659  0.82577455 -0.18900797 -1.70923332]
[ Experience replay ] st

[ episode 237 ][ timestamp 44 ] state=[ 0.05002368 -0.91217414 -0.09867981  0.72902429], action=0, reward=1.0, next_state=[ 0.03178019 -1.10580361 -0.08409932  0.98909072]
[ Experience replay ] starts
[ episode 237 ][ timestamp 45 ] state=[ 0.03178019 -1.10580361 -0.08409932  0.98909072], action=1, reward=1.0, next_state=[ 0.00966412 -0.90966267 -0.06431751  0.67122306]
[ Experience replay ] starts
[ episode 237 ][ timestamp 46 ] state=[ 0.00966412 -0.90966267 -0.06431751  0.67122306], action=1, reward=1.0, next_state=[-0.00852913 -0.71370834 -0.05089305  0.359003  ]
[ Experience replay ] starts
[ episode 237 ][ timestamp 47 ] state=[-0.00852913 -0.71370834 -0.05089305  0.359003  ], action=0, reward=1.0, next_state=[-0.0228033  -0.90807126 -0.04371299  0.6352138 ]
[ Experience replay ] starts
[ episode 237 ][ timestamp 48 ] state=[-0.0228033  -0.90807126 -0.04371299  0.6352138 ], action=1, reward=1.0, next_state=[-0.04096472 -0.71236775 -0.03100871  0.32909143]
[ Experience replay ] st

[ episode 237 ][ timestamp 85 ] state=[-0.45734815  0.07179208  0.00216968 -0.92290663], action=0, reward=1.0, next_state=[-0.45591231 -0.12335912 -0.01628845 -0.62954264]
[ Experience replay ] starts
[ episode 237 ][ timestamp 86 ] state=[-0.45591231 -0.12335912 -0.01628845 -0.62954264], action=1, reward=1.0, next_state=[-0.45837949  0.0719863  -0.0288793  -0.92731049]
[ Experience replay ] starts
[ episode 237 ][ timestamp 87 ] state=[-0.45837949  0.0719863  -0.0288793  -0.92731049], action=1, reward=1.0, next_state=[-0.45693976  0.26748604 -0.04742551 -1.22892716]
[ Experience replay ] starts
[ episode 237 ][ timestamp 88 ] state=[-0.45693976  0.26748604 -0.04742551 -1.22892716], action=0, reward=1.0, next_state=[-0.45159004  0.07300535 -0.07200406 -0.951472  ]
[ Experience replay ] starts
[ episode 237 ][ timestamp 89 ] state=[-0.45159004  0.07300535 -0.07200406 -0.951472  ], action=1, reward=1.0, next_state=[-0.45012993  0.26901861 -0.0910335  -1.26588094]
[ Experience replay ] st

[ episode 238 ][ timestamp 32 ] state=[-0.03631999 -0.16017281  0.07029007  0.3298366 ], action=1, reward=1.0, next_state=[-0.03952344  0.03388181  0.0768868   0.06012171]
[ Experience replay ] starts
[ episode 238 ][ timestamp 33 ] state=[-0.03952344  0.03388181  0.0768868   0.06012171], action=0, reward=1.0, next_state=[-0.03884581 -0.16225352  0.07808924  0.376038  ]
[ Experience replay ] starts
[ episode 238 ][ timestamp 34 ] state=[-0.03884581 -0.16225352  0.07808924  0.376038  ], action=1, reward=1.0, next_state=[-0.04209088  0.03167747  0.08561     0.1089629 ]
[ Experience replay ] starts
[ episode 238 ][ timestamp 35 ] state=[-0.04209088  0.03167747  0.08561     0.1089629 ], action=1, reward=1.0, next_state=[-0.04145733  0.22547495  0.08778925 -0.15553011]
[ Experience replay ] starts
[ episode 238 ][ timestamp 36 ] state=[-0.04145733  0.22547495  0.08778925 -0.15553011], action=0, reward=1.0, next_state=[-0.03694783  0.02921289  0.08467865  0.16350617]
[ Experience replay ] st

[ episode 239 ][ timestamp 28 ] state=[ 0.01149028  0.21737328 -0.04034284 -0.42986341], action=1, reward=1.0, next_state=[ 0.01583775  0.41304261 -0.04894011 -0.73498619]
[ Experience replay ] starts
[ episode 239 ][ timestamp 29 ] state=[ 0.01583775  0.41304261 -0.04894011 -0.73498619], action=0, reward=1.0, next_state=[ 0.0240986   0.21862968 -0.06363983 -0.45809859]
[ Experience replay ] starts
[ episode 239 ][ timestamp 30 ] state=[ 0.0240986   0.21862968 -0.06363983 -0.45809859], action=0, reward=1.0, next_state=[ 0.02847119  0.02446239 -0.0728018  -0.18613474]
[ Experience replay ] starts
[ episode 239 ][ timestamp 31 ] state=[ 0.02847119  0.02446239 -0.0728018  -0.18613474], action=1, reward=1.0, next_state=[ 0.02896044  0.22054638 -0.0765245  -0.50086644]
[ Experience replay ] starts
[ episode 239 ][ timestamp 32 ] state=[ 0.02896044  0.22054638 -0.0765245  -0.50086644], action=0, reward=1.0, next_state=[ 0.03337137  0.02658186 -0.08654182 -0.23324738]
[ Experience replay ] st

[ episode 239 ][ timestamp 73 ] state=[-0.36186469  0.06999739 -0.07022315 -1.19197171], action=0, reward=1.0, next_state=[-0.36046474 -0.12414809 -0.09406259 -0.92209988]
[ Experience replay ] starts
[ episode 239 ][ timestamp 74 ] state=[-0.36046474 -0.12414809 -0.09406259 -0.92209988], action=1, reward=1.0, next_state=[-0.3629477   0.07211032 -0.11250459 -1.24279975]
[ Experience replay ] starts
[ episode 239 ][ timestamp 75 ] state=[-0.3629477   0.07211032 -0.11250459 -1.24279975], action=0, reward=1.0, next_state=[-0.3615055  -0.12140244 -0.13736058 -0.9873723 ]
[ Experience replay ] starts
[ episode 239 ][ timestamp 76 ] state=[-0.3615055  -0.12140244 -0.13736058 -0.9873723 ], action=0, reward=1.0, next_state=[-0.36393355 -0.31444452 -0.15710803 -0.74079375]
[ Experience replay ] starts
[ episode 239 ][ timestamp 77 ] state=[-0.36393355 -0.31444452 -0.15710803 -0.74079375], action=0, reward=1.0, next_state=[-0.37022244 -0.5070889  -0.1719239  -0.5013861 ]
[ Experience replay ] st

[ episode 240 ][ timestamp 34 ] state=[-0.07034267 -0.93168507 -0.02441494  1.05811351], action=0, reward=1.0, next_state=[-0.08897637 -1.12647518 -0.00325267  1.34303433]
[ Experience replay ] starts
[ episode 240 ][ timestamp 35 ] state=[-0.08897637 -1.12647518 -0.00325267  1.34303433], action=1, reward=1.0, next_state=[-0.11150587 -0.93131246  0.02360802  1.04933551]
[ Experience replay ] starts
[ episode 240 ][ timestamp 36 ] state=[-0.11150587 -0.93131246  0.02360802  1.04933551], action=0, reward=1.0, next_state=[-0.13013212 -1.12673953  0.04459473  1.34933454]
[ Experience replay ] starts
[ episode 240 ][ timestamp 37 ] state=[-0.13013212 -1.12673953  0.04459473  1.34933454], action=0, reward=1.0, next_state=[-0.15266691 -1.32239255  0.07158142  1.6556288 ]
[ Experience replay ] starts
[ episode 240 ][ timestamp 38 ] state=[-0.15266691 -1.32239255  0.07158142  1.6556288 ], action=0, reward=1.0, next_state=[-0.17911476 -1.518273    0.10469399  1.96972402]
[ Experience replay ] st

[ episode 241 ][ timestamp 40 ] state=[ 0.04141516 -0.16578436 -0.03545171  0.04799024], action=0, reward=1.0, next_state=[ 0.03809948 -0.3603805  -0.0344919   0.32928041]
[ Experience replay ] starts
[ episode 241 ][ timestamp 41 ] state=[ 0.03809948 -0.3603805  -0.0344919   0.32928041], action=1, reward=1.0, next_state=[ 0.03089187 -0.16478495 -0.0279063   0.02592298]
[ Experience replay ] starts
[ episode 241 ][ timestamp 42 ] state=[ 0.03089187 -0.16478495 -0.0279063   0.02592298], action=0, reward=1.0, next_state=[ 0.02759617 -0.35949581 -0.02738784  0.30967218]
[ Experience replay ] starts
[ episode 241 ][ timestamp 43 ] state=[ 0.02759617 -0.35949581 -0.02738784  0.30967218], action=1, reward=1.0, next_state=[ 0.02040625 -0.16399457 -0.02119439  0.00847926]
[ Experience replay ] starts
[ episode 241 ][ timestamp 44 ] state=[ 0.02040625 -0.16399457 -0.02119439  0.00847926], action=0, reward=1.0, next_state=[ 0.01712636 -0.35880625 -0.02102481  0.29440047]
[ Experience replay ] st

[ episode 241 ][ timestamp 83 ] state=[ 0.0069801   0.79576947 -0.05308419 -1.10644986], action=0, reward=1.0, next_state=[ 0.02289549  0.60138407 -0.07521319 -0.8308819 ]
[ Experience replay ] starts
[ episode 241 ][ timestamp 84 ] state=[ 0.02289549  0.60138407 -0.07521319 -0.8308819 ], action=0, reward=1.0, next_state=[ 0.03492317  0.40736627 -0.09183083 -0.56276982]
[ Experience replay ] starts
[ episode 241 ][ timestamp 85 ] state=[ 0.03492317  0.40736627 -0.09183083 -0.56276982], action=1, reward=1.0, next_state=[ 0.04307049  0.60364871 -0.10308622 -0.88291326]
[ Experience replay ] starts
[ episode 241 ][ timestamp 86 ] state=[ 0.04307049  0.60364871 -0.10308622 -0.88291326], action=1, reward=1.0, next_state=[ 0.05514347  0.80000821 -0.12074449 -1.2061426 ]
[ Experience replay ] starts
[ episode 241 ][ timestamp 87 ] state=[ 0.05514347  0.80000821 -0.12074449 -1.2061426 ], action=1, reward=1.0, next_state=[ 0.07114363  0.99646552 -0.14486734 -1.53409571]
[ Experience replay ] st

[ episode 242 ][ timestamp 39 ] state=[-0.08995712 -0.72829145 -0.2022038   0.05474369], action=0, reward=1.0, next_state=[-0.10452295 -0.92002665 -0.20110893  0.27744334]
[ Experience replay ] starts
[ episode 242 ][ timestamp 40 ] state=[-0.10452295 -0.92002665 -0.20110893  0.27744334], action=0, reward=1.0, next_state=[-0.12292348 -1.11179664 -0.19556006  0.50057255]
[ Experience replay ] starts
[ episode 242 ][ timestamp 41 ] state=[-0.12292348 -1.11179664 -0.19556006  0.50057255], action=0, reward=1.0, next_state=[-0.14515942 -1.30370223 -0.18554861  0.72581518]
[ Experience replay ] starts
[ episode 242 ][ timestamp 42 ] state=[-0.14515942 -1.30370223 -0.18554861  0.72581518], action=1, reward=1.0, next_state=[-0.17123346 -1.10656566 -0.1710323   0.38094723]
[ Experience replay ] starts
[ episode 242 ][ timestamp 43 ] state=[-0.17123346 -1.10656566 -0.1710323   0.38094723], action=0, reward=1.0, next_state=[-0.19336477 -1.29889874 -0.16341336  0.61519882]
[ Experience replay ] st

[ episode 243 ][ timestamp 21 ] state=[ 0.1435456   0.05252508 -0.11822578 -0.05314827], action=1, reward=1.0, next_state=[ 0.1445961   0.24912635 -0.11928875 -0.38066906]
[ Experience replay ] starts
[ episode 243 ][ timestamp 22 ] state=[ 0.1445961   0.24912635 -0.11928875 -0.38066906], action=0, reward=1.0, next_state=[ 0.14957863  0.05588242 -0.12690213 -0.12785086]
[ Experience replay ] starts
[ episode 243 ][ timestamp 23 ] state=[ 0.14957863  0.05588242 -0.12690213 -0.12785086], action=0, reward=1.0, next_state=[ 0.15069628 -0.13721475 -0.12945915  0.1222566 ]
[ Experience replay ] starts
[ episode 243 ][ timestamp 24 ] state=[ 0.15069628 -0.13721475 -0.12945915  0.1222566 ], action=0, reward=1.0, next_state=[ 0.14795199 -0.3302672  -0.12701402  0.37145727]
[ Experience replay ] starts
[ episode 243 ][ timestamp 25 ] state=[ 0.14795199 -0.3302672  -0.12701402  0.37145727], action=0, reward=1.0, next_state=[ 0.14134664 -0.52337743 -0.11958487  0.62154745]
[ Experience replay ] st

[ episode 243 ][ timestamp 69 ] state=[-0.29679104 -1.0813843   0.06691138  0.89266847], action=1, reward=1.0, next_state=[-0.31841873 -0.88723062  0.08476475  0.62174693]
[ Experience replay ] starts
[ episode 243 ][ timestamp 70 ] state=[-0.31841873 -0.88723062  0.08476475  0.62174693], action=1, reward=1.0, next_state=[-0.33616334 -0.69338824  0.09719969  0.35691832]
[ Experience replay ] starts
[ episode 243 ][ timestamp 71 ] state=[-0.33616334 -0.69338824  0.09719969  0.35691832], action=1, reward=1.0, next_state=[-0.35003111 -0.49977288  0.10433806  0.09639785]
[ Experience replay ] starts
[ episode 243 ][ timestamp 72 ] state=[-0.35003111 -0.49977288  0.10433806  0.09639785], action=1, reward=1.0, next_state=[-0.36002656 -0.30628907  0.10626601 -0.16162977]
[ Experience replay ] starts
[ episode 243 ][ timestamp 73 ] state=[-0.36002656 -0.30628907  0.10626601 -0.16162977], action=0, reward=1.0, next_state=[-0.36615235 -0.50275915  0.10303342  0.16259638]
[ Experience replay ] st

[ episode 243 ][ timestamp 113 ] state=[-0.71014996 -1.84336921  0.04936404  1.65469553], action=0, reward=1.0, next_state=[-0.74701734 -2.0390313   0.08245796  1.96233827]
[ Experience replay ] starts
[ episode 243 ][ timestamp 114 ] state=[-0.74701734 -2.0390313   0.08245796  1.96233827], action=1, reward=1.0, next_state=[-0.78779797 -1.84487351  0.12170472  1.69630631]
[ Experience replay ] starts
[ episode 243 ][ timestamp 115 ] state=[-0.78779797 -1.84487351  0.12170472  1.69630631], action=1, reward=1.0, next_state=[-0.82469544 -1.65134769  0.15563085  1.44385774]
[ Experience replay ] starts
[ episode 243 ][ timestamp 116 ] state=[-0.82469544 -1.65134769  0.15563085  1.44385774], action=1, reward=1.0, next_state=[-0.85772239 -1.45844571  0.184508    1.20357289]
[ Experience replay ] starts
[ episode 243 ][ timestamp 117 ] state=[-0.85772239 -1.45844571  0.184508    1.20357289], action=1, reward=1.0, next_state=[-0.8868913  -1.2661246   0.20857946  0.97392582]
[ Experience replay

[ episode 244 ][ timestamp 43 ] state=[ 0.0073204  -0.80087785  0.035421    1.10751228], action=0, reward=1.0, next_state=[-0.00869716 -0.99644704  0.05757124  1.41109365]
[ Experience replay ] starts
[ episode 244 ][ timestamp 44 ] state=[-0.00869716 -0.99644704  0.05757124  1.41109365], action=0, reward=1.0, next_state=[-0.0286261  -1.19223362  0.08579311  1.72120355]
[ Experience replay ] starts
[ episode 244 ][ timestamp 45 ] state=[-0.0286261  -1.19223362  0.08579311  1.72120355], action=1, reward=1.0, next_state=[-0.05247077 -0.99819262  0.12021719  1.45640482]
[ Experience replay ] starts
[ episode 244 ][ timestamp 46 ] state=[-0.05247077 -0.99819262  0.12021719  1.45640482], action=1, reward=1.0, next_state=[-0.07243462 -0.80473362  0.14934528  1.2035695 ]
[ Experience replay ] starts
[ episode 244 ][ timestamp 47 ] state=[-0.07243462 -0.80473362  0.14934528  1.2035695 ], action=1, reward=1.0, next_state=[-0.0885293  -0.61182401  0.17341667  0.96117056]
[ Experience replay ] st

[ episode 245 ][ timestamp 36 ] state=[ 0.08473052  0.13892831 -0.03818645  0.23051167], action=0, reward=1.0, next_state=[ 0.08750909 -0.05562776 -0.03357621  0.51090892]
[ Experience replay ] starts
[ episode 245 ][ timestamp 37 ] state=[ 0.08750909 -0.05562776 -0.03357621  0.51090892], action=1, reward=1.0, next_state=[ 0.08639653  0.13995069 -0.02335803  0.20783705]
[ Experience replay ] starts
[ episode 245 ][ timestamp 38 ] state=[ 0.08639653  0.13995069 -0.02335803  0.20783705], action=0, reward=1.0, next_state=[ 0.08919555 -0.05482961 -0.01920129  0.49306116]
[ Experience replay ] starts
[ episode 245 ][ timestamp 39 ] state=[ 0.08919555 -0.05482961 -0.01920129  0.49306116], action=1, reward=1.0, next_state=[ 0.08809895  0.14055783 -0.00934007  0.1943892 ]
[ Experience replay ] starts
[ episode 245 ][ timestamp 40 ] state=[ 0.08809895  0.14055783 -0.00934007  0.1943892 ], action=0, reward=1.0, next_state=[ 0.09091011 -0.05442928 -0.00545229  0.48411116]
[ Experience replay ] st

[ episode 246 ][ timestamp 19 ] state=[-0.09233726  0.04074257  0.07531794  0.04290498], action=1, reward=1.0, next_state=[-0.09152241  0.23470821  0.07617604 -0.22509608]
[ Experience replay ] starts
[ episode 246 ][ timestamp 20 ] state=[-0.09152241  0.23470821  0.07617604 -0.22509608], action=0, reward=1.0, next_state=[-0.08682824  0.03858488  0.07167412  0.09060989]
[ Experience replay ] starts
[ episode 246 ][ timestamp 21 ] state=[-0.08682824  0.03858488  0.07167412  0.09060989], action=1, reward=1.0, next_state=[-0.08605654  0.2326102   0.07348631 -0.1786267 ]
[ Experience replay ] starts
[ episode 246 ][ timestamp 22 ] state=[-0.08605654  0.2326102   0.07348631 -0.1786267 ], action=1, reward=1.0, next_state=[-0.08140434  0.42660781  0.06991378 -0.44725221]
[ Experience replay ] starts
[ episode 246 ][ timestamp 23 ] state=[-0.08140434  0.42660781  0.06991378 -0.44725221], action=1, reward=1.0, next_state=[-0.07287218  0.62067469  0.06096874 -0.71710348]
[ Experience replay ] st

[ episode 246 ][ timestamp 61 ] state=[-0.04265741 -0.71055019  0.01140339  0.56987096], action=1, reward=1.0, next_state=[-0.05686841 -0.51559001  0.02280081  0.28080222]
[ Experience replay ] starts
[ episode 246 ][ timestamp 62 ] state=[-0.05686841 -0.51559001  0.02280081  0.28080222], action=0, reward=1.0, next_state=[-0.06718021 -0.71102966  0.02841685  0.58058835]
[ Experience replay ] starts
[ episode 246 ][ timestamp 63 ] state=[-0.06718021 -0.71102966  0.02841685  0.58058835], action=0, reward=1.0, next_state=[-0.0814008  -0.90653804  0.04002862  0.88208595]
[ Experience replay ] starts
[ episode 246 ][ timestamp 64 ] state=[-0.0814008  -0.90653804  0.04002862  0.88208595], action=0, reward=1.0, next_state=[-0.09953156 -1.10218014  0.05767034  1.1870793 ]
[ Experience replay ] starts
[ episode 246 ][ timestamp 65 ] state=[-0.09953156 -1.10218014  0.05767034  1.1870793 ], action=0, reward=1.0, next_state=[-0.12157517 -1.29800049  0.08141193  1.4972672 ]
[ Experience replay ] st

[ episode 247 ][ timestamp 32 ] state=[ 0.0950658   0.14845936 -0.064507   -0.17732709], action=0, reward=1.0, next_state=[ 0.09803499 -0.04568298 -0.06805354  0.09432883]
[ Experience replay ] starts
[ episode 247 ][ timestamp 33 ] state=[ 0.09803499 -0.04568298 -0.06805354  0.09432883], action=0, reward=1.0, next_state=[ 0.09712133 -0.23976688 -0.06616696  0.36478849]
[ Experience replay ] starts
[ episode 247 ][ timestamp 34 ] state=[ 0.09712133 -0.23976688 -0.06616696  0.36478849], action=0, reward=1.0, next_state=[ 0.09232599 -0.43388915 -0.05887119  0.63589583]
[ Experience replay ] starts
[ episode 247 ][ timestamp 35 ] state=[ 0.09232599 -0.43388915 -0.05887119  0.63589583], action=1, reward=1.0, next_state=[ 0.08364821 -0.23799768 -0.04615327  0.32526954]
[ Experience replay ] starts
[ episode 247 ][ timestamp 36 ] state=[ 0.08364821 -0.23799768 -0.04615327  0.32526954], action=0, reward=1.0, next_state=[ 0.07888825 -0.43243315 -0.03964788  0.60304792]
[ Experience replay ] st

[ episode 247 ][ timestamp 73 ] state=[ 0.06042577 -0.05442505  0.03073092  0.28706136], action=1, reward=1.0, next_state=[0.05933727 0.14024548 0.03647215 0.00422692]
[ Experience replay ] starts
[ episode 247 ][ timestamp 74 ] state=[0.05933727 0.14024548 0.03647215 0.00422692], action=0, reward=1.0, next_state=[ 0.06214218 -0.05538004  0.03655669  0.30819048]
[ Experience replay ] starts
[ episode 247 ][ timestamp 75 ] state=[ 0.06214218 -0.05538004  0.03655669  0.30819048], action=0, reward=1.0, next_state=[ 0.06103458 -0.25100329  0.0427205   0.61217459]
[ Experience replay ] starts
[ episode 247 ][ timestamp 76 ] state=[ 0.06103458 -0.25100329  0.0427205   0.61217459], action=1, reward=1.0, next_state=[ 0.05601451 -0.05650362  0.05496399  0.33324726]
[ Experience replay ] starts
[ episode 247 ][ timestamp 77 ] state=[ 0.05601451 -0.05650362  0.05496399  0.33324726], action=1, reward=1.0, next_state=[0.05488444 0.1377947  0.06162894 0.05839119]
[ Experience replay ] starts
[ episo

[ episode 247 ][ timestamp 114 ] state=[ 0.16598654 -1.00303048  0.00220673  1.15509879], action=0, reward=1.0, next_state=[ 0.14592593 -1.19818114  0.0253087   1.44847284]
[ Experience replay ] starts
[ episode 247 ][ timestamp 115 ] state=[ 0.14592593 -1.19818114  0.0253087   1.44847284], action=1, reward=1.0, next_state=[ 0.1219623  -1.00337932  0.05427816  1.16380366]
[ Experience replay ] starts
[ episode 247 ][ timestamp 116 ] state=[ 0.1219623  -1.00337932  0.05427816  1.16380366], action=1, reward=1.0, next_state=[ 0.10189472 -0.80900444  0.07755423  0.88862067]
[ Experience replay ] starts
[ episode 247 ][ timestamp 117 ] state=[ 0.10189472 -0.80900444  0.07755423  0.88862067], action=1, reward=1.0, next_state=[ 0.08571463 -0.6150158   0.09532665  0.62129043]
[ Experience replay ] starts
[ episode 247 ][ timestamp 118 ] state=[ 0.08571463 -0.6150158   0.09532665  0.62129043], action=1, reward=1.0, next_state=[ 0.07341431 -0.42134524  0.10775246  0.36008715]
[ Experience replay

[ episode 248 ][ timestamp 28 ] state=[-0.03407162 -0.15823788  0.11189811  0.43648131], action=1, reward=1.0, next_state=[-0.03723637  0.03513698  0.12062774  0.18106253]
[ Experience replay ] starts
[ episode 248 ][ timestamp 29 ] state=[-0.03723637  0.03513698  0.12062774  0.18106253], action=1, reward=1.0, next_state=[-0.03653363  0.22834482  0.12424899 -0.07126465]
[ Experience replay ] starts
[ episode 248 ][ timestamp 30 ] state=[-0.03653363  0.22834482  0.12424899 -0.07126465], action=1, reward=1.0, next_state=[-0.03196674  0.4214867   0.1228237  -0.3223088 ]
[ Experience replay ] starts
[ episode 248 ][ timestamp 31 ] state=[-0.03196674  0.4214867   0.1228237  -0.3223088 ], action=1, reward=1.0, next_state=[-0.023537    0.6146651   0.11637752 -0.57387402]
[ Experience replay ] starts
[ episode 248 ][ timestamp 32 ] state=[-0.023537    0.6146651   0.11637752 -0.57387402], action=1, reward=1.0, next_state=[-0.0112437   0.80797965  0.10490004 -0.8277466 ]
[ Experience replay ] st

[ episode 249 ][ timestamp 25 ] state=[ 0.01822995  0.37585553 -0.00115362 -0.50183148], action=1, reward=1.0, next_state=[ 0.02574706  0.57099372 -0.01119025 -0.79487774]
[ Experience replay ] starts
[ episode 249 ][ timestamp 26 ] state=[ 0.02574706  0.57099372 -0.01119025 -0.79487774], action=0, reward=1.0, next_state=[ 0.03716693  0.37602713 -0.02708781 -0.50573602]
[ Experience replay ] starts
[ episode 249 ][ timestamp 27 ] state=[ 0.03716693  0.37602713 -0.02708781 -0.50573602], action=0, reward=1.0, next_state=[ 0.04468748  0.18129716 -0.03720253 -0.22171107]
[ Experience replay ] starts
[ episode 249 ][ timestamp 28 ] state=[ 0.04468748  0.18129716 -0.03720253 -0.22171107], action=0, reward=1.0, next_state=[ 0.04831342 -0.01327383 -0.04163675  0.05900846]
[ Experience replay ] starts
[ episode 249 ][ timestamp 29 ] state=[ 0.04831342 -0.01327383 -0.04163675  0.05900846], action=0, reward=1.0, next_state=[ 0.04804794 -0.20777483 -0.04045658  0.33826942]
[ Experience replay ] st

[ episode 250 ][ timestamp 29 ] state=[-0.07580617  0.39858255  0.02057127 -0.49363375], action=0, reward=1.0, next_state=[-0.06783452  0.2031766   0.01069859 -0.19453932]
[ Experience replay ] starts
[ episode 250 ][ timestamp 30 ] state=[-0.06783452  0.2031766   0.01069859 -0.19453932], action=0, reward=1.0, next_state=[-0.06377099  0.00790326  0.00680781  0.10149926]
[ Experience replay ] starts
[ episode 250 ][ timestamp 31 ] state=[-0.06377099  0.00790326  0.00680781  0.10149926], action=0, reward=1.0, next_state=[-0.06361293 -0.18731559  0.00883779  0.39632223]
[ Experience replay ] starts
[ episode 250 ][ timestamp 32 ] state=[-0.06361293 -0.18731559  0.00883779  0.39632223], action=0, reward=1.0, next_state=[-0.06735924 -0.38256182  0.01676424  0.6917784 ]
[ Experience replay ] starts
[ episode 250 ][ timestamp 33 ] state=[-0.06735924 -0.38256182  0.01676424  0.6917784 ], action=1, reward=1.0, next_state=[-0.07501047 -0.18767642  0.03059981  0.40441985]
[ Experience replay ] st

[ episode 250 ][ timestamp 71 ] state=[-0.07072541 -0.3748969   0.06004519  0.52348682], action=1, reward=1.0, next_state=[-0.07822335 -0.18066915  0.07051492  0.25031292]
[ Experience replay ] starts
[ episode 250 ][ timestamp 72 ] state=[-0.07822335 -0.18066915  0.07051492  0.25031292], action=1, reward=1.0, next_state=[-0.08183673  0.01337869  0.07552118 -0.01932126]
[ Experience replay ] starts
[ episode 250 ][ timestamp 73 ] state=[-0.08183673  0.01337869  0.07552118 -0.01932126], action=1, reward=1.0, next_state=[-0.08156916  0.20734091  0.07513476 -0.28725316]
[ Experience replay ] starts
[ episode 250 ][ timestamp 74 ] state=[-0.08156916  0.20734091  0.07513476 -0.28725316], action=1, reward=1.0, next_state=[-0.07742234  0.40131544  0.06938969 -0.55532523]
[ Experience replay ] starts
[ episode 250 ][ timestamp 75 ] state=[-0.07742234  0.40131544  0.06938969 -0.55532523], action=1, reward=1.0, next_state=[-0.06939603  0.59539802  0.05828319 -0.82536431]
[ Experience replay ] st

[ episode 250 ][ timestamp 116 ] state=[ 0.15266262  0.19757455 -0.1269982  -0.07154276], action=0, reward=1.0, next_state=[ 0.15661411  0.00448045 -0.12842905  0.17852861]
[ Experience replay ] starts
[ episode 250 ][ timestamp 117 ] state=[ 0.15661411  0.00448045 -0.12842905  0.17852861], action=0, reward=1.0, next_state=[ 0.15670372 -0.18859208 -0.12485848  0.42809784]
[ Experience replay ] starts
[ episode 250 ][ timestamp 118 ] state=[ 0.15670372 -0.18859208 -0.12485848  0.42809784], action=0, reward=1.0, next_state=[ 0.15293188 -0.38174501 -0.11629652  0.67895869]
[ Experience replay ] starts
[ episode 250 ][ timestamp 119 ] state=[ 0.15293188 -0.38174501 -0.11629652  0.67895869], action=0, reward=1.0, next_state=[ 0.14529697 -0.57507581 -0.10271735  0.93288187]
[ Experience replay ] starts
[ episode 250 ][ timestamp 120 ] state=[ 0.14529697 -0.57507581 -0.10271735  0.93288187], action=1, reward=1.0, next_state=[ 0.13379546 -0.37872919 -0.08405971  0.60976846]
[ Experience replay

[ episode 250 ][ timestamp 161 ] state=[ 0.24982482 -0.7523067  -0.1770093   0.83988766], action=1, reward=1.0, next_state=[ 0.23477869 -0.55526709 -0.16021155  0.49717705]
[ Experience replay ] starts
[ episode 250 ][ timestamp 162 ] state=[ 0.23477869 -0.55526709 -0.16021155  0.49717705], action=0, reward=1.0, next_state=[ 0.22367335 -0.74781043 -0.15026801  0.7353924 ]
[ Experience replay ] starts
[ episode 250 ][ timestamp 163 ] state=[ 0.22367335 -0.74781043 -0.15026801  0.7353924 ], action=0, reward=1.0, next_state=[ 0.20871714 -0.9405725  -0.13556016  0.97726443]
[ Experience replay ] starts
[ episode 250 ][ timestamp 164 ] state=[ 0.20871714 -0.9405725  -0.13556016  0.97726443], action=1, reward=1.0, next_state=[ 0.18990569 -0.74391888 -0.11601487  0.64525748]
[ Experience replay ] starts
[ episode 250 ][ timestamp 165 ] state=[ 0.18990569 -0.74391888 -0.11601487  0.64525748], action=1, reward=1.0, next_state=[ 0.17502731 -0.5473877  -0.10310972  0.31841047]
[ Experience replay

[ episode 252 ][ timestamp 13 ] state=[ 0.13193122 -0.01399773 -0.14094599 -0.14270787], action=0, reward=1.0, next_state=[ 0.13165127 -0.20684929 -0.14380014  0.10239981]
[ Experience replay ] starts
[ episode 252 ][ timestamp 14 ] state=[ 0.13165127 -0.20684929 -0.14380014  0.10239981], action=0, reward=1.0, next_state=[ 0.12751428 -0.399649   -0.14175215  0.34648274]
[ Experience replay ] starts
[ episode 252 ][ timestamp 15 ] state=[ 0.12751428 -0.399649   -0.14175215  0.34648274], action=0, reward=1.0, next_state=[ 0.1195213  -0.59250004 -0.13482249  0.59132214]
[ Experience replay ] starts
[ episode 252 ][ timestamp 16 ] state=[ 0.1195213  -0.59250004 -0.13482249  0.59132214], action=1, reward=1.0, next_state=[ 0.1076713  -0.39577381 -0.12299605  0.25939283]
[ Experience replay ] starts
[ episode 252 ][ timestamp 17 ] state=[ 0.1076713  -0.39577381 -0.12299605  0.25939283], action=0, reward=1.0, next_state=[ 0.09975582 -0.58894486 -0.11780819  0.5108907 ]
[ Experience replay ] st

[ episode 253 ][ timestamp 11 ] state=[0.0036935  0.01052307 0.00220167 0.04588651], action=1, reward=1.0, next_state=[ 0.00390397  0.20561338  0.0031194  -0.24610096]
[ Experience replay ] starts
[ episode 253 ][ timestamp 12 ] state=[ 0.00390397  0.20561338  0.0031194  -0.24610096], action=0, reward=1.0, next_state=[ 0.00801623  0.01044701 -0.00180262  0.04756426]
[ Experience replay ] starts
[ episode 253 ][ timestamp 13 ] state=[ 0.00801623  0.01044701 -0.00180262  0.04756426], action=1, reward=1.0, next_state=[ 0.00822517  0.20559477 -0.00085134 -0.24568686]
[ Experience replay ] starts
[ episode 253 ][ timestamp 14 ] state=[ 0.00822517  0.20559477 -0.00085134 -0.24568686], action=0, reward=1.0, next_state=[ 0.01233707  0.01048498 -0.00576507  0.04672741]
[ Experience replay ] starts
[ episode 253 ][ timestamp 15 ] state=[ 0.01233707  0.01048498 -0.00576507  0.04672741], action=1, reward=1.0, next_state=[ 0.01254677  0.20568913 -0.00483053 -0.24776886]
[ Experience replay ] starts

[ episode 254 ][ timestamp 4 ] state=[ 0.03678943  0.60619058  0.02462814 -0.81492804], action=1, reward=1.0, next_state=[ 0.04891324  0.80096679  0.00832958 -1.0997638 ]
[ Experience replay ] starts
[ episode 254 ][ timestamp 5 ] state=[ 0.04891324  0.80096679  0.00832958 -1.0997638 ], action=0, reward=1.0, next_state=[ 0.06493257  0.6057362  -0.0136657  -0.80447922]
[ Experience replay ] starts
[ episode 254 ][ timestamp 6 ] state=[ 0.06493257  0.6057362  -0.0136657  -0.80447922], action=0, reward=1.0, next_state=[ 0.0770473   0.41080425 -0.02975529 -0.51612618]
[ Experience replay ] starts
[ episode 254 ][ timestamp 7 ] state=[ 0.0770473   0.41080425 -0.02975529 -0.51612618], action=0, reward=1.0, next_state=[ 0.08526338  0.21611366 -0.04007781 -0.23296633]
[ Experience replay ] starts
[ episode 254 ][ timestamp 8 ] state=[ 0.08526338  0.21611366 -0.04007781 -0.23296633], action=0, reward=1.0, next_state=[ 0.08958565  0.02158659 -0.04473714  0.04681023]
[ Experience replay ] starts


[ episode 255 ][ timestamp 8 ] state=[ 0.04324278  0.19922057  0.01427279 -0.27099656], action=0, reward=1.0, next_state=[0.04722719 0.00389789 0.00885286 0.02615368]
[ Experience replay ] starts
[ episode 255 ][ timestamp 9 ] state=[0.04722719 0.00389789 0.00885286 0.02615368], action=1, reward=1.0, next_state=[ 0.04730515  0.19889177  0.00937593 -0.26372298]
[ Experience replay ] starts
[ episode 255 ][ timestamp 10 ] state=[ 0.04730515  0.19889177  0.00937593 -0.26372298], action=0, reward=1.0, next_state=[0.05128298 0.00363725 0.00410148 0.03190241]
[ Experience replay ] starts
[ episode 255 ][ timestamp 11 ] state=[0.05128298 0.00363725 0.00410148 0.03190241], action=1, reward=1.0, next_state=[ 0.05135573  0.19870015  0.00473952 -0.25948364]
[ Experience replay ] starts
[ episode 255 ][ timestamp 12 ] state=[ 0.05135573  0.19870015  0.00473952 -0.25948364], action=0, reward=1.0, next_state=[ 0.05532973  0.00351086 -0.00045015  0.03469042]
[ Experience replay ] starts
[ episode 255

[ episode 255 ][ timestamp 49 ] state=[-0.00720854 -0.75588795  0.01581713  0.7403306 ], action=1, reward=1.0, next_state=[-0.0223263  -0.56098791  0.03062374  0.45266715]
[ Experience replay ] starts
[ episode 255 ][ timestamp 50 ] state=[-0.0223263  -0.56098791  0.03062374  0.45266715], action=0, reward=1.0, next_state=[-0.03354606 -0.75652924  0.03967708  0.7548436 ]
[ Experience replay ] starts
[ episode 255 ][ timestamp 51 ] state=[-0.03354606 -0.75652924  0.03967708  0.7548436 ], action=1, reward=1.0, next_state=[-0.04867664 -0.56197607  0.05477396  0.47490552]
[ Experience replay ] starts
[ episode 255 ][ timestamp 52 ] state=[-0.04867664 -0.56197607  0.05477396  0.47490552], action=1, reward=1.0, next_state=[-0.05991616 -0.36766865  0.06427207  0.199977  ]
[ Experience replay ] starts
[ episode 255 ][ timestamp 53 ] state=[-0.05991616 -0.36766865  0.06427207  0.199977  ], action=1, reward=1.0, next_state=[-0.06726954 -0.17352202  0.06827161 -0.07175868]
[ Experience replay ] st

[ episode 255 ][ timestamp 91 ] state=[-0.09854368  0.78570278 -0.04167931 -1.17429227], action=1, reward=1.0, next_state=[-0.08282963  0.98134087 -0.06516516 -1.47974473]
[ Experience replay ] starts
[ episode 255 ][ timestamp 92 ] state=[-0.08282963  0.98134087 -0.06516516 -1.47974473], action=0, reward=1.0, next_state=[-0.06320281  0.78707201 -0.09476005 -1.20810493]
[ Experience replay ] starts
[ episode 255 ][ timestamp 93 ] state=[-0.06320281  0.78707201 -0.09476005 -1.20810493], action=1, reward=1.0, next_state=[-0.04746137  0.98328158 -0.11892215 -1.52891666]
[ Experience replay ] starts
[ episode 255 ][ timestamp 94 ] state=[-0.04746137  0.98328158 -0.11892215 -1.52891666], action=1, reward=1.0, next_state=[-0.02779574  1.17962022 -0.14950048 -1.85622531]
[ Experience replay ] starts
[ episode 255 ][ timestamp 95 ] state=[-0.02779574  1.17962022 -0.14950048 -1.85622531], action=1, reward=1.0, next_state=[-0.00420334  1.37603492 -0.18662499 -2.19135061]
[ Experience replay ] st

[ episode 257 ][ timestamp 1 ] state=[-0.03337512  0.02041422 -0.01963308  0.02161838], action=0, reward=1.0, next_state=[-0.03296683 -0.17442075 -0.01920071  0.30804276]
[ Experience replay ] starts
[ episode 257 ][ timestamp 2 ] state=[-0.03296683 -0.17442075 -0.01920071  0.30804276], action=0, reward=1.0, next_state=[-0.03645525 -0.36926393 -0.01303986  0.59460898]
[ Experience replay ] starts
[ episode 257 ][ timestamp 3 ] state=[-0.03645525 -0.36926393 -0.01303986  0.59460898], action=1, reward=1.0, next_state=[-0.04384053 -0.17396192 -0.00114768  0.29784726]
[ Experience replay ] starts
[ episode 257 ][ timestamp 4 ] state=[-0.04384053 -0.17396192 -0.00114768  0.29784726], action=1, reward=1.0, next_state=[-0.04731977  0.02117638  0.00480927  0.0048026 ]
[ Experience replay ] starts
[ episode 257 ][ timestamp 5 ] state=[-0.04731977  0.02117638  0.00480927  0.0048026 ], action=0, reward=1.0, next_state=[-0.04689624 -0.17401421  0.00490532  0.29899902]
[ Experience replay ] starts


[ episode 257 ][ timestamp 43 ] state=[ 0.08875187  0.39386737  0.00932456 -0.19345061], action=1, reward=1.0, next_state=[ 0.09662922  0.5888547   0.00545555 -0.48317751]
[ Experience replay ] starts
[ episode 257 ][ timestamp 44 ] state=[ 0.09662922  0.5888547   0.00545555 -0.48317751], action=0, reward=1.0, next_state=[ 0.10840631  0.39365618 -0.004208   -0.18878016]
[ Experience replay ] starts
[ episode 257 ][ timestamp 45 ] state=[ 0.10840631  0.39365618 -0.004208   -0.18878016], action=1, reward=1.0, next_state=[ 0.11627944  0.58883808 -0.00798361 -0.48278757]
[ Experience replay ] starts
[ episode 257 ][ timestamp 46 ] state=[ 0.11627944  0.58883808 -0.00798361 -0.48278757], action=0, reward=1.0, next_state=[ 0.1280562   0.39382971 -0.01763936 -0.1926315 ]
[ Experience replay ] starts
[ episode 257 ][ timestamp 47 ] state=[ 0.1280562   0.39382971 -0.01763936 -0.1926315 ], action=1, reward=1.0, next_state=[ 0.13593279  0.5891995  -0.02149199 -0.49082629]
[ Experience replay ] st

[ episode 259 ][ timestamp 6 ] state=[-0.03657953 -0.97982684  0.06789128  1.50757442], action=0, reward=1.0, next_state=[-0.05617607 -1.17570308  0.09804277  1.82065662]
[ Experience replay ] starts
[ episode 259 ][ timestamp 7 ] state=[-0.05617607 -1.17570308  0.09804277  1.82065662], action=1, reward=1.0, next_state=[-0.07969013 -0.9817976   0.1344559   1.55997362]
[ Experience replay ] starts
[ episode 259 ][ timestamp 8 ] state=[-0.07969013 -0.9817976   0.1344559   1.55997362], action=1, reward=1.0, next_state=[-0.09932608 -0.78851666  0.16565537  1.31207996]
[ Experience replay ] starts
[ episode 259 ][ timestamp 9 ] state=[-0.09932608 -0.78851666  0.16565537  1.31207996], action=0, reward=1.0, next_state=[-0.11509642 -0.98530264  0.19189697  1.65169831]
[ Experience replay ] starts
[ episode 259 ][ timestamp 10 ] state=[-0.11509642 -0.98530264  0.19189697  1.65169831], action=1, reward=-1.0, next_state=[-0.13480247 -0.79287026  0.22493094  1.4244202 ]
[ Experience replay ] start

[ episode 260 ][ timestamp 40 ] state=[ 0.01985182 -0.20498663  0.0673011   0.32126028], action=0, reward=1.0, next_state=[ 0.01575209 -0.40099923  0.07372631  0.63438515]
[ Experience replay ] starts
[ episode 260 ][ timestamp 41 ] state=[ 0.01575209 -0.40099923  0.07372631  0.63438515], action=1, reward=1.0, next_state=[ 0.00773211 -0.2069789   0.08641401  0.36580116]
[ Experience replay ] starts
[ episode 260 ][ timestamp 42 ] state=[ 0.00773211 -0.2069789   0.08641401  0.36580116], action=1, reward=1.0, next_state=[ 0.00359253 -0.01318445  0.09373003  0.10156827]
[ Experience replay ] starts
[ episode 260 ][ timestamp 43 ] state=[ 0.00359253 -0.01318445  0.09373003  0.10156827], action=1, reward=1.0, next_state=[ 0.00332884  0.18047795  0.0957614  -0.16013392]
[ Experience replay ] starts
[ episode 260 ][ timestamp 44 ] state=[ 0.00332884  0.18047795  0.0957614  -0.16013392], action=1, reward=1.0, next_state=[ 0.0069384   0.37410778  0.09255872 -0.42113713]
[ Experience replay ] st

[ episode 260 ][ timestamp 81 ] state=[ 0.25095241 -0.39314481 -0.13202735  0.46410091], action=0, reward=1.0, next_state=[ 0.24308952 -0.58617811 -0.12274533  0.71242755]
[ Experience replay ] starts
[ episode 260 ][ timestamp 82 ] state=[ 0.24308952 -0.58617811 -0.12274533  0.71242755], action=1, reward=1.0, next_state=[ 0.23136595 -0.3895897  -0.10849678  0.38376698]
[ Experience replay ] starts
[ episode 260 ][ timestamp 83 ] state=[ 0.23136595 -0.3895897  -0.10849678  0.38376698], action=1, reward=1.0, next_state=[ 0.22357416 -0.19310806 -0.10082144  0.05894198]
[ Experience replay ] starts
[ episode 260 ][ timestamp 84 ] state=[ 0.22357416 -0.19310806 -0.10082144  0.05894198], action=1, reward=1.0, next_state=[ 0.219712    0.0033041  -0.0996426  -0.26377146]
[ Experience replay ] starts
[ episode 260 ][ timestamp 85 ] state=[ 0.219712    0.0033041  -0.0996426  -0.26377146], action=0, reward=1.0, next_state=[ 0.21977808 -0.19026486 -0.10491803 -0.0041047 ]
[ Experience replay ] st

[ episode 261 ][ timestamp 18 ] state=[ 0.00921269  0.17601675  0.02815327 -0.20142779], action=0, reward=1.0, next_state=[ 0.01273303 -0.0194963   0.02412471  0.10000153]
[ Experience replay ] starts
[ episode 261 ][ timestamp 19 ] state=[ 0.01273303 -0.0194963   0.02412471  0.10000153], action=1, reward=1.0, next_state=[ 0.0123431   0.17527175  0.02612474 -0.18497354]
[ Experience replay ] starts
[ episode 261 ][ timestamp 20 ] state=[ 0.0123431   0.17527175  0.02612474 -0.18497354], action=1, reward=1.0, next_state=[ 0.01584854  0.37001035  0.02242527 -0.46930197]
[ Experience replay ] starts
[ episode 261 ][ timestamp 21 ] state=[ 0.01584854  0.37001035  0.02242527 -0.46930197], action=1, reward=1.0, next_state=[ 0.02324874  0.56480845  0.01303923 -0.75483318]
[ Experience replay ] starts
[ episode 261 ][ timestamp 22 ] state=[ 0.02324874  0.56480845  0.01303923 -0.75483318], action=1, reward=1.0, next_state=[ 0.03454491  0.75974824 -0.00205743 -1.04338458]
[ Experience replay ] st

[ episode 262 ][ timestamp 23 ] state=[ 0.05652884 -0.33267269 -0.05014746  0.11934304], action=0, reward=1.0, next_state=[ 0.04987539 -0.52704162 -0.0477606   0.39579274]
[ Experience replay ] starts
[ episode 262 ][ timestamp 24 ] state=[ 0.04987539 -0.52704162 -0.0477606   0.39579274], action=0, reward=1.0, next_state=[ 0.03933456 -0.72145453 -0.03984475  0.67304329]
[ Experience replay ] starts
[ episode 262 ][ timestamp 25 ] state=[ 0.03933456 -0.72145453 -0.03984475  0.67304329], action=1, reward=1.0, next_state=[ 0.02490547 -0.52580207 -0.02638388  0.36808628]
[ Experience replay ] starts
[ episode 262 ][ timestamp 26 ] state=[ 0.02490547 -0.52580207 -0.02638388  0.36808628], action=0, reward=1.0, next_state=[ 0.01438943 -0.72053938 -0.01902216  0.65233462]
[ Experience replay ] starts
[ episode 262 ][ timestamp 27 ] state=[ 0.01438943 -0.72053938 -0.01902216  0.65233462], action=0, reward=1.0, next_state=[-2.13609627e-05 -9.15391334e-01 -5.97546328e-03  9.38967493e-01]
[ Experi

[ episode 262 ][ timestamp 67 ] state=[-0.00346036 -0.36325874  0.05426645  0.79430524], action=1, reward=1.0, next_state=[-0.01072554 -0.16892197  0.07015255  0.5191757 ]
[ Experience replay ] starts
[ episode 262 ][ timestamp 68 ] state=[-0.01072554 -0.16892197  0.07015255  0.5191757 ], action=1, reward=1.0, next_state=[-0.01410398  0.02514585  0.08053607  0.24939793]
[ Experience replay ] starts
[ episode 262 ][ timestamp 69 ] state=[-0.01410398  0.02514585  0.08053607  0.24939793], action=1, reward=1.0, next_state=[-0.01360106  0.21903085  0.08552402 -0.0168349 ]
[ Experience replay ] starts
[ episode 262 ][ timestamp 70 ] state=[-0.01360106  0.21903085  0.08552402 -0.0168349 ], action=1, reward=1.0, next_state=[-0.00922044  0.41282879  0.08518733 -0.28135591]
[ Experience replay ] starts
[ episode 262 ][ timestamp 71 ] state=[-0.00922044  0.41282879  0.08518733 -0.28135591], action=1, reward=1.0, next_state=[-0.00096387  0.60663884  0.07956021 -0.54600199]
[ Experience replay ] st

[ episode 262 ][ timestamp 108 ] state=[ 0.15489248 -1.29974809 -0.05183245  1.39744516], action=0, reward=1.0, next_state=[ 0.12889752 -1.49418854 -0.02388354  1.67348222]
[ Experience replay ] starts
[ episode 262 ][ timestamp 109 ] state=[ 0.12889752 -1.49418854 -0.02388354  1.67348222], action=1, reward=1.0, next_state=[ 0.09901375 -1.29879759  0.0095861   1.3734583 ]
[ Experience replay ] starts
[ episode 262 ][ timestamp 110 ] state=[ 0.09901375 -1.29879759  0.0095861   1.3734583 ], action=1, reward=1.0, next_state=[ 0.0730378  -1.10379678  0.03705527  1.08378879]
[ Experience replay ] starts
[ episode 262 ][ timestamp 111 ] state=[ 0.0730378  -1.10379678  0.03705527  1.08378879], action=1, reward=1.0, next_state=[ 0.05096186 -0.90918286  0.05873104  0.80296006]
[ Experience replay ] starts
[ episode 262 ][ timestamp 112 ] state=[ 0.05096186 -0.90918286  0.05873104  0.80296006], action=1, reward=1.0, next_state=[ 0.0327782  -0.71491332  0.07479024  0.52931518]
[ Experience replay

[ episode 263 ][ timestamp 20 ] state=[-0.04347933 -0.21967076  0.05353031  0.44473763], action=1, reward=1.0, next_state=[-0.04787274 -0.02534543  0.06242506  0.16939755]
[ Experience replay ] starts
[ episode 263 ][ timestamp 21 ] state=[-0.04787274 -0.02534543  0.06242506  0.16939755], action=0, reward=1.0, next_state=[-0.04837965 -0.22130277  0.06581301  0.48110209]
[ Experience replay ] starts
[ episode 263 ][ timestamp 22 ] state=[-0.04837965 -0.22130277  0.06581301  0.48110209], action=0, reward=1.0, next_state=[-0.0528057  -0.41728898  0.07543506  0.79378003]
[ Experience replay ] starts
[ episode 263 ][ timestamp 23 ] state=[-0.0528057  -0.41728898  0.07543506  0.79378003], action=0, reward=1.0, next_state=[-0.06115148 -0.61336087  0.09131066  1.10920833]
[ Experience replay ] starts
[ episode 263 ][ timestamp 24 ] state=[-0.06115148 -0.61336087  0.09131066  1.10920833], action=1, reward=1.0, next_state=[-0.0734187  -0.41954964  0.11349482  0.84651064]
[ Experience replay ] st

[ episode 264 ][ timestamp 33 ] state=[ 0.02408443  0.42198802  0.04574209 -0.39561434], action=1, reward=1.0, next_state=[ 0.03252419  0.61643211  0.0378298  -0.67353192]
[ Experience replay ] starts
[ episode 264 ][ timestamp 34 ] state=[ 0.03252419  0.61643211  0.0378298  -0.67353192], action=1, reward=1.0, next_state=[ 0.04485283  0.81100842  0.02435917 -0.95406825]
[ Experience replay ] starts
[ episode 264 ][ timestamp 35 ] state=[ 0.04485283  0.81100842  0.02435917 -0.95406825], action=1, reward=1.0, next_state=[ 0.061073    1.00579434  0.0052778  -1.23899956]
[ Experience replay ] starts
[ episode 264 ][ timestamp 36 ] state=[ 0.061073    1.00579434  0.0052778  -1.23899956], action=1, reward=1.0, next_state=[ 0.08118888  1.20084811 -0.01950219 -1.53002447]
[ Experience replay ] starts
[ episode 264 ][ timestamp 37 ] state=[ 0.08118888  1.20084811 -0.01950219 -1.53002447], action=1, reward=1.0, next_state=[ 0.10520585  1.3961997  -0.05010268 -1.82872942]
[ Experience replay ] st

[ episode 265 ][ timestamp 32 ] state=[ 0.01515718 -0.5748881  -0.17980496  0.1599722 ], action=0, reward=1.0, next_state=[ 0.00365942 -0.76704139 -0.17660552  0.39097718]
[ Experience replay ] starts
[ episode 265 ][ timestamp 33 ] state=[ 0.00365942 -0.76704139 -0.17660552  0.39097718], action=0, reward=1.0, next_state=[-0.01168141 -0.95927493 -0.16878597  0.62318988]
[ Experience replay ] starts
[ episode 265 ][ timestamp 34 ] state=[-0.01168141 -0.95927493 -0.16878597  0.62318988], action=0, reward=1.0, next_state=[-0.03086691 -1.15168831 -0.15632218  0.85832069]
[ Experience replay ] starts
[ episode 265 ][ timestamp 35 ] state=[-0.03086691 -1.15168831 -0.15632218  0.85832069], action=0, reward=1.0, next_state=[-0.05390067 -1.34437488 -0.13915576  1.09805451]
[ Experience replay ] starts
[ episode 265 ][ timestamp 36 ] state=[-0.05390067 -1.34437488 -0.13915576  1.09805451], action=0, reward=1.0, next_state=[-0.08078817 -1.5374182  -0.11719467  1.34404051]
[ Experience replay ] st

[ episode 265 ][ timestamp 77 ] state=[-0.72004192 -0.77870366  0.11456914  0.64583259], action=0, reward=1.0, next_state=[-0.735616   -0.97521992  0.12748579  0.97228417]
[ Experience replay ] starts
[ episode 265 ][ timestamp 78 ] state=[-0.735616   -0.97521992  0.12748579  0.97228417], action=0, reward=1.0, next_state=[-0.75512039 -1.17180069  0.14693148  1.30214173]
[ Experience replay ] starts
[ episode 265 ][ timestamp 79 ] state=[-0.75512039 -1.17180069  0.14693148  1.30214173], action=1, reward=1.0, next_state=[-0.77855641 -0.97881623  0.17297431  1.05882675]
[ Experience replay ] starts
[ episode 265 ][ timestamp 80 ] state=[-0.77855641 -0.97881623  0.17297431  1.05882675], action=1, reward=1.0, next_state=[-0.79813273 -0.78635471  0.19415085  0.8250438 ]
[ Experience replay ] starts
[ episode 265 ][ timestamp 81 ] state=[-0.79813273 -0.78635471  0.19415085  0.8250438 ], action=1, reward=-1.0, next_state=[-0.81385983 -0.59434257  0.21065172  0.59915934]
[ Experience replay ] s

[ episode 266 ][ timestamp 40 ] state=[ 0.01155475  0.9249408   0.04692059 -1.12939539], action=0, reward=1.0, next_state=[ 0.03005356  0.72923679  0.02433268 -0.82237287]
[ Experience replay ] starts
[ episode 266 ][ timestamp 41 ] state=[ 0.03005356  0.72923679  0.02433268 -0.82237287], action=1, reward=1.0, next_state=[ 0.0446383   0.92401753  0.00788522 -1.10730439]
[ Experience replay ] starts
[ episode 266 ][ timestamp 42 ] state=[ 0.0446383   0.92401753  0.00788522 -1.10730439], action=0, reward=1.0, next_state=[ 0.06311865  0.72879282 -0.01426086 -0.81215819]
[ Experience replay ] starts
[ episode 266 ][ timestamp 43 ] state=[ 0.06311865  0.72879282 -0.01426086 -0.81215819], action=0, reward=1.0, next_state=[ 0.07769451  0.53386909 -0.03050403 -0.52399487]
[ Experience replay ] starts
[ episode 266 ][ timestamp 44 ] state=[ 0.07769451  0.53386909 -0.03050403 -0.52399487], action=1, reward=1.0, next_state=[ 0.08837189  0.72940676 -0.04098393 -0.82613173]
[ Experience replay ] st

[ episode 268 ][ timestamp 8 ] state=[-0.03217354  0.24155404 -0.03869233 -0.39657435], action=0, reward=1.0, next_state=[-0.02734246  0.04700181 -0.04662382 -0.11633712]
[ Experience replay ] starts
[ episode 268 ][ timestamp 9 ] state=[-0.02734246  0.04700181 -0.04662382 -0.11633712], action=1, reward=1.0, next_state=[-0.02640242  0.24275971 -0.04895056 -0.42335733]
[ Experience replay ] starts
[ episode 268 ][ timestamp 10 ] state=[-0.02640242  0.24275971 -0.04895056 -0.42335733], action=0, reward=1.0, next_state=[-0.02154723  0.04836416 -0.05741771 -0.14649901]
[ Experience replay ] starts
[ episode 268 ][ timestamp 11 ] state=[-0.02154723  0.04836416 -0.05741771 -0.14649901], action=0, reward=1.0, next_state=[-0.02057995 -0.14589054 -0.06034769  0.12753133]
[ Experience replay ] starts
[ episode 268 ][ timestamp 12 ] state=[-0.02057995 -0.14589054 -0.06034769  0.12753133], action=0, reward=1.0, next_state=[-0.02349776 -0.34009838 -0.05779706  0.40058135]
[ Experience replay ] star

[ episode 269 ][ timestamp 17 ] state=[ 0.17260786  1.20827241 -0.17244136 -1.97660401], action=0, reward=-1.0, next_state=[ 0.19677331  1.01533639 -0.21197344 -1.74193907]
[ Experience replay ] starts
[ Ended! ] Episode 269: Exploration_rate=0.01. Score=17.
[ episode 270 ] state=[-0.04793079 -0.01765611  0.02067728 -0.00844789]
[ episode 270 ][ timestamp 1 ] state=[-0.04793079 -0.01765611  0.02067728 -0.00844789], action=0, reward=1.0, next_state=[-0.04828391 -0.2130684   0.02050832  0.29068658]
[ Experience replay ] starts
[ episode 270 ][ timestamp 2 ] state=[-0.04828391 -0.2130684   0.02050832  0.29068658], action=0, reward=1.0, next_state=[-0.05254528 -0.40847669  0.02632205  0.5897664 ]
[ Experience replay ] starts
[ episode 270 ][ timestamp 3 ] state=[-0.05254528 -0.40847669  0.02632205  0.5897664 ], action=1, reward=1.0, next_state=[-0.06071481 -0.213733    0.03811738  0.30548984]
[ Experience replay ] starts
[ episode 270 ][ timestamp 4 ] state=[-0.06071481 -0.213733    0.0381

[ episode 271 ][ timestamp 11 ] state=[-0.04531968  0.41359941 -0.02294314 -0.61949486], action=1, reward=1.0, next_state=[-0.03704769  0.60903416 -0.03533304 -0.91931454]
[ Experience replay ] starts
[ episode 271 ][ timestamp 12 ] state=[-0.03704769  0.60903416 -0.03533304 -0.91931454], action=0, reward=1.0, next_state=[-0.02486701  0.41440716 -0.05371933 -0.637942  ]
[ Experience replay ] starts
[ episode 271 ][ timestamp 13 ] state=[-0.02486701  0.41440716 -0.05371933 -0.637942  ], action=1, reward=1.0, next_state=[-0.01657886  0.61023542 -0.06647817 -0.94704654]
[ Experience replay ] starts
[ episode 271 ][ timestamp 14 ] state=[-0.01657886  0.61023542 -0.06647817 -0.94704654], action=1, reward=1.0, next_state=[-0.00437415  0.80618656 -0.0854191  -1.2598542 ]
[ Experience replay ] starts
[ episode 271 ][ timestamp 15 ] state=[-0.00437415  0.80618656 -0.0854191  -1.2598542 ], action=1, reward=1.0, next_state=[ 0.01174958  1.00229104 -0.11061619 -1.57802111]
[ Experience replay ] st

[ episode 273 ][ timestamp 16 ] state=[-0.08583239 -0.25341075  0.08156784  0.50982526], action=1, reward=1.0, next_state=[-0.0909006  -0.05952693  0.09176434  0.24392085]
[ Experience replay ] starts
[ episode 273 ][ timestamp 17 ] state=[-0.0909006  -0.05952693  0.09176434  0.24392085], action=1, reward=1.0, next_state=[-0.09209114  0.13417267  0.09664276 -0.01846523]
[ Experience replay ] starts
[ episode 273 ][ timestamp 18 ] state=[-0.09209114  0.13417267  0.09664276 -0.01846523], action=0, reward=1.0, next_state=[-0.08940769 -0.06219286  0.09627345  0.30307738]
[ Experience replay ] starts
[ episode 273 ][ timestamp 19 ] state=[-0.08940769 -0.06219286  0.09627345  0.30307738], action=1, reward=1.0, next_state=[-0.09065155  0.13143461  0.102335    0.04224182]
[ Experience replay ] starts
[ episode 273 ][ timestamp 20 ] state=[-0.09065155  0.13143461  0.102335    0.04224182], action=1, reward=1.0, next_state=[-0.08802285  0.32495156  0.10317984 -0.21648098]
[ Experience replay ] st

[ episode 274 ][ timestamp 28 ] state=[-0.09310946  0.19218106  0.17394063  0.06258539], action=1, reward=1.0, next_state=[-0.08926584  0.38443778  0.17519234 -0.17056701]
[ Experience replay ] starts
[ episode 274 ][ timestamp 29 ] state=[-0.08926584  0.38443778  0.17519234 -0.17056701], action=1, reward=1.0, next_state=[-0.08157708  0.57667594  0.171781   -0.40326689]
[ Experience replay ] starts
[ episode 274 ][ timestamp 30 ] state=[-0.08157708  0.57667594  0.171781   -0.40326689], action=1, reward=1.0, next_state=[-0.07004356  0.76899841  0.16371566 -0.63724907]
[ Experience replay ] starts
[ episode 274 ][ timestamp 31 ] state=[-0.07004356  0.76899841  0.16371566 -0.63724907], action=1, reward=1.0, next_state=[-0.05466359  0.96150456  0.15097068 -0.87422947]
[ Experience replay ] starts
[ episode 274 ][ timestamp 32 ] state=[-0.05466359  0.96150456  0.15097068 -0.87422947], action=1, reward=1.0, next_state=[-0.0354335   1.15428719  0.13348609 -1.11589726]
[ Experience replay ] st

[ episode 274 ][ timestamp 76 ] state=[ 0.26111825  1.3577668   0.01804066 -1.58198465], action=1, reward=1.0, next_state=[ 0.28827359  1.55266949 -0.01359904 -1.86898743]
[ Experience replay ] starts
[ episode 274 ][ timestamp 77 ] state=[ 0.28827359  1.55266949 -0.01359904 -1.86898743], action=1, reward=1.0, next_state=[ 0.31932698  1.74793746 -0.05097879 -2.1658603 ]
[ Experience replay ] starts
[ episode 274 ][ timestamp 78 ] state=[ 0.31932698  1.74793746 -0.05097879 -2.1658603 ], action=1, reward=1.0, next_state=[ 0.35428573  1.94351889 -0.09429599 -2.47383259]
[ Experience replay ] starts
[ episode 274 ][ timestamp 79 ] state=[ 0.35428573  1.94351889 -0.09429599 -2.47383259], action=1, reward=1.0, next_state=[ 0.3931561   2.13929601 -0.14377264 -2.7938756 ]
[ Experience replay ] starts
[ episode 274 ][ timestamp 80 ] state=[ 0.3931561   2.13929601 -0.14377264 -2.7938756 ], action=1, reward=1.0, next_state=[ 0.43594202  2.3350664  -0.19965016 -3.12662508]
[ Experience replay ] st

[ episode 277 ][ timestamp 19 ] state=[ 0.08902943 -0.00788112 -0.02595999  0.11717831], action=0, reward=1.0, next_state=[ 0.0888718  -0.20262168 -0.02361642  0.40155935]
[ Experience replay ] starts
[ episode 277 ][ timestamp 20 ] state=[ 0.0888718  -0.20262168 -0.02361642  0.40155935], action=0, reward=1.0, next_state=[ 0.08481937 -0.39740083 -0.01558523  0.68670402]
[ Experience replay ] starts
[ episode 277 ][ timestamp 21 ] state=[ 0.08481937 -0.39740083 -0.01558523  0.68670402], action=0, reward=1.0, next_state=[ 0.07687135 -0.59230301 -0.00185115  0.97443991]
[ Experience replay ] starts
[ episode 277 ][ timestamp 22 ] state=[ 0.07687135 -0.59230301 -0.00185115  0.97443991], action=0, reward=1.0, next_state=[ 0.06502529 -0.78740008  0.01763764  1.26654078]
[ Experience replay ] starts
[ episode 277 ][ timestamp 23 ] state=[ 0.06502529 -0.78740008  0.01763764  1.26654078], action=0, reward=1.0, next_state=[ 0.04927729 -0.98274288  0.04296846  1.5646946 ]
[ Experience replay ] st

[ episode 281 ][ timestamp 6 ] state=[-0.00654241 -0.9481631   0.06831507  1.47210826], action=0, reward=1.0, next_state=[-0.02550567 -1.14405063  0.09775724  1.78532319]
[ Experience replay ] starts
[ episode 281 ][ timestamp 7 ] state=[-0.02550567 -1.14405063  0.09775724  1.78532319], action=0, reward=1.0, next_state=[-0.04838669 -1.34012547  0.1334637   2.10672611]
[ Experience replay ] starts
[ episode 281 ][ timestamp 8 ] state=[-0.04838669 -1.34012547  0.1334637   2.10672611], action=0, reward=1.0, next_state=[-0.0751892  -1.53630847  0.17559822  2.43750554]
[ Experience replay ] starts
[ episode 281 ][ timestamp 9 ] state=[-0.0751892  -1.53630847  0.17559822  2.43750554], action=0, reward=-1.0, next_state=[-0.10591537 -1.73244667  0.22434833  2.77854956]
[ Experience replay ] starts
[ Ended! ] Episode 281: Exploration_rate=0.01. Score=9.
[ episode 282 ] state=[ 0.02009807  0.00879812 -0.03012464  0.02945343]
[ episode 282 ][ timestamp 1 ] state=[ 0.02009807  0.00879812 -0.030124

[ episode 286 ][ timestamp 8 ] state=[-0.09630967 -1.34335398  0.14684585  2.17513988], action=0, reward=1.0, next_state=[-0.12317675 -1.53956901  0.19034865  2.50931247]
[ Experience replay ] starts
[ episode 286 ][ timestamp 9 ] state=[-0.12317675 -1.53956901  0.19034865  2.50931247], action=0, reward=-1.0, next_state=[-0.15396813 -1.73567943  0.2405349   2.85379014]
[ Experience replay ] starts
[ Ended! ] Episode 286: Exploration_rate=0.01. Score=9.
[ episode 287 ] state=[ 0.00681531 -0.01760415 -0.04047668  0.00628327]
[ episode 287 ][ timestamp 1 ] state=[ 0.00681531 -0.01760415 -0.04047668  0.00628327], action=0, reward=1.0, next_state=[ 0.00646322 -0.21212294 -0.04035102  0.28592557]
[ Experience replay ] starts
[ episode 287 ][ timestamp 2 ] state=[ 0.00646322 -0.21212294 -0.04035102  0.28592557], action=0, reward=1.0, next_state=[ 0.00222076 -0.40664688 -0.0346325   0.56561399]
[ Experience replay ] starts
[ episode 287 ][ timestamp 3 ] state=[ 0.00222076 -0.40664688 -0.034632

[ episode 290 ][ timestamp 13 ] state=[0.03631331 0.00706496 0.04443554 0.23229827], action=1, reward=1.0, next_state=[ 0.03645461  0.20152474  0.0490815  -0.04604373]
[ Experience replay ] starts
[ episode 290 ][ timestamp 14 ] state=[ 0.03645461  0.20152474  0.0490815  -0.04604373], action=0, reward=1.0, next_state=[0.04048511 0.0057346  0.04816063 0.26171197]
[ Experience replay ] starts
[ episode 290 ][ timestamp 15 ] state=[0.04048511 0.0057346  0.04816063 0.26171197], action=1, reward=1.0, next_state=[ 0.0405998   0.20013718  0.05339487 -0.01540003]
[ Experience replay ] starts
[ episode 290 ][ timestamp 16 ] state=[ 0.0405998   0.20013718  0.05339487 -0.01540003], action=1, reward=1.0, next_state=[ 0.04460254  0.39445434  0.05308687 -0.29076973]
[ Experience replay ] starts
[ episode 290 ][ timestamp 17 ] state=[ 0.04460254  0.39445434  0.05308687 -0.29076973], action=1, reward=1.0, next_state=[ 0.05249163  0.58878072  0.04727147 -0.56624845]
[ Experience replay ] starts
[ episo

[ episode 290 ][ timestamp 54 ] state=[ 0.17663084 -1.32762181 -0.00420798  1.59430861], action=1, reward=1.0, next_state=[ 0.1500784  -1.1324502   0.02767819  1.30031664]
[ Experience replay ] starts
[ episode 290 ][ timestamp 55 ] state=[ 0.1500784  -1.1324502   0.02767819  1.30031664], action=1, reward=1.0, next_state=[ 0.1274294  -0.93769026  0.05368452  1.01642496]
[ Experience replay ] starts
[ episode 290 ][ timestamp 56 ] state=[ 0.1274294  -0.93769026  0.05368452  1.01642496], action=1, reward=1.0, next_state=[ 0.10867559 -0.74332361  0.07401302  0.74107068]
[ Experience replay ] starts
[ episode 290 ][ timestamp 57 ] state=[ 0.10867559 -0.74332361  0.07401302  0.74107068], action=1, reward=1.0, next_state=[ 0.09380912 -0.54929725  0.08883444  0.4725679 ]
[ Experience replay ] starts
[ episode 290 ][ timestamp 58 ] state=[ 0.09380912 -0.54929725  0.08883444  0.4725679 ], action=1, reward=1.0, next_state=[ 0.08282318 -0.35553488  0.09828579  0.20915339]
[ Experience replay ] st

[ episode 290 ][ timestamp 99 ] state=[-0.02777652 -0.32330431 -0.03922137 -0.50268569], action=0, reward=1.0, next_state=[-0.0342426  -0.51785213 -0.04927508 -0.22261652]
[ Experience replay ] starts
[ episode 290 ][ timestamp 100 ] state=[-0.0342426  -0.51785213 -0.04927508 -0.22261652], action=0, reward=1.0, next_state=[-0.04459965 -0.71223642 -0.05372741  0.054125  ]
[ Experience replay ] starts
[ episode 290 ][ timestamp 101 ] state=[-0.04459965 -0.71223642 -0.05372741  0.054125  ], action=0, reward=1.0, next_state=[-0.05884438 -0.90654847 -0.05264491  0.32938424]
[ Experience replay ] starts
[ episode 290 ][ timestamp 102 ] state=[-0.05884438 -0.90654847 -0.05264491  0.32938424], action=0, reward=1.0, next_state=[-0.07697534 -1.10088301 -0.04605723  0.60501173]
[ Experience replay ] starts
[ episode 290 ][ timestamp 103 ] state=[-0.07697534 -1.10088301 -0.04605723  0.60501173], action=0, reward=1.0, next_state=[-0.09899301 -1.29533165 -0.03395699  0.88283935]
[ Experience replay 

[ episode 290 ][ timestamp 140 ] state=[-0.46024207 -0.15359426  0.03061711 -0.23507442], action=1, reward=1.0, next_state=[-0.46331396  0.04107717  0.02591563 -0.51794469]
[ Experience replay ] starts
[ episode 290 ][ timestamp 141 ] state=[-0.46331396  0.04107717  0.02591563 -0.51794469], action=0, reward=1.0, next_state=[-0.46249241 -0.1543999   0.01555673 -0.21720921]
[ Experience replay ] starts
[ episode 290 ][ timestamp 142 ] state=[-0.46249241 -0.1543999   0.01555673 -0.21720921], action=0, reward=1.0, next_state=[-0.46558041 -0.34974074  0.01121255  0.0803401 ]
[ Experience replay ] starts
[ episode 290 ][ timestamp 143 ] state=[-0.46558041 -0.34974074  0.01121255  0.0803401 ], action=1, reward=1.0, next_state=[-0.47257523 -0.15478131  0.01281935 -0.20878426]
[ Experience replay ] starts
[ episode 290 ][ timestamp 144 ] state=[-0.47257523 -0.15478131  0.01281935 -0.20878426], action=0, reward=1.0, next_state=[-0.47567085 -0.35008419  0.00864366  0.08791479]
[ Experience replay

[ episode 290 ][ timestamp 186 ] state=[-0.65474129 -0.1612135  -0.05521068 -0.06698226], action=1, reward=1.0, next_state=[-0.65796556  0.03465476 -0.05655032 -0.37656067]
[ Experience replay ] starts
[ episode 290 ][ timestamp 187 ] state=[-0.65796556  0.03465476 -0.05655032 -0.37656067], action=1, reward=1.0, next_state=[-0.65727246  0.23053239 -0.06408154 -0.68652438]
[ Experience replay ] starts
[ episode 290 ][ timestamp 188 ] state=[-0.65727246  0.23053239 -0.06408154 -0.68652438], action=0, reward=1.0, next_state=[-0.65266181  0.03635577 -0.07781202 -0.41468435]
[ Experience replay ] starts
[ episode 290 ][ timestamp 189 ] state=[-0.65266181  0.03635577 -0.07781202 -0.41468435], action=0, reward=1.0, next_state=[-0.6519347  -0.15758204 -0.08610571 -0.14751153]
[ Experience replay ] starts
[ episode 290 ][ timestamp 190 ] state=[-0.6519347  -0.15758204 -0.08610571 -0.14751153], action=0, reward=1.0, next_state=[-0.65508634 -0.35137216 -0.08905594  0.1168129 ]
[ Experience replay

[ episode 290 ][ timestamp 231 ] state=[-0.81049942  0.40213011 -0.04093288 -0.45969471], action=1, reward=1.0, next_state=[-0.80245682  0.59780603 -0.05012677 -0.76499363]
[ Experience replay ] starts
[ episode 290 ][ timestamp 232 ] state=[-0.80245682  0.59780603 -0.05012677 -0.76499363], action=0, reward=1.0, next_state=[-0.7905007   0.40340887 -0.06542664 -0.48849526]
[ Experience replay ] starts
[ episode 290 ][ timestamp 233 ] state=[-0.7905007   0.40340887 -0.06542664 -0.48849526], action=0, reward=1.0, next_state=[-0.78243252  0.20926805 -0.07519655 -0.21712882]
[ Experience replay ] starts
[ episode 290 ][ timestamp 234 ] state=[-0.78243252  0.20926805 -0.07519655 -0.21712882], action=0, reward=1.0, next_state=[-0.77824716  0.01529712 -0.07953912  0.0509184 ]
[ Experience replay ] starts
[ episode 290 ][ timestamp 235 ] state=[-0.77824716  0.01529712 -0.07953912  0.0509184 ], action=0, reward=1.0, next_state=[-0.77794122 -0.17859956 -0.07852076  0.31748405]
[ Experience replay

[ episode 290 ][ timestamp 274 ] state=[-0.85655527 -0.16148682 -0.05497172 -0.0611488 ], action=0, reward=1.0, next_state=[-0.85978501 -0.35577926 -0.05619469  0.21369609]
[ Experience replay ] starts
[ episode 290 ][ timestamp 275 ] state=[-0.85978501 -0.35577926 -0.05619469  0.21369609], action=1, reward=1.0, next_state=[-0.86690059 -0.15990081 -0.05192077 -0.09617035]
[ Experience replay ] starts
[ episode 290 ][ timestamp 276 ] state=[-0.86690059 -0.15990081 -0.05192077 -0.09617035], action=0, reward=1.0, next_state=[-0.87009861 -0.35424165 -0.05384418  0.17969023]
[ Experience replay ] starts
[ episode 290 ][ timestamp 277 ] state=[-0.87009861 -0.35424165 -0.05384418  0.17969023], action=1, reward=1.0, next_state=[-0.87718344 -0.15839218 -0.05025037 -0.12948075]
[ Experience replay ] starts
[ episode 290 ][ timestamp 278 ] state=[-0.87718344 -0.15839218 -0.05025037 -0.12948075], action=1, reward=1.0, next_state=[-0.88035129  0.03741225 -0.05283999 -0.43758406]
[ Experience replay

[ episode 290 ][ timestamp 319 ] state=[-1.15773759 -0.70751733 -0.20318213 -0.05140955], action=0, reward=1.0, next_state=[-1.17188793 -0.89923422 -0.20421032  0.17092481]
[ Experience replay ] starts
[ episode 290 ][ timestamp 320 ] state=[-1.17188793 -0.89923422 -0.20421032  0.17092481], action=0, reward=1.0, next_state=[-1.18987262 -1.09093715 -0.20079182  0.39288282]
[ Experience replay ] starts
[ episode 290 ][ timestamp 321 ] state=[-1.18987262 -1.09093715 -0.20079182  0.39288282], action=1, reward=1.0, next_state=[-1.21169136 -0.89361669 -0.19293416  0.04421177]
[ Experience replay ] starts
[ episode 290 ][ timestamp 322 ] state=[-1.21169136 -0.89361669 -0.19293416  0.04421177], action=1, reward=1.0, next_state=[-1.2295637  -0.69632702 -0.19204993 -0.30260333]
[ Experience replay ] starts
[ episode 290 ][ timestamp 323 ] state=[-1.2295637  -0.69632702 -0.19204993 -0.30260333], action=1, reward=1.0, next_state=[-1.24349024 -0.49906093 -0.198102   -0.64917859]
[ Experience replay

[ episode 292 ][ timestamp 27 ] state=[-0.01080071 -0.76615052 -0.14295473  0.36906684], action=1, reward=1.0, next_state=[-0.02612372 -0.56931747 -0.1355734   0.03494331]
[ Experience replay ] starts
[ episode 292 ][ timestamp 28 ] state=[-0.02612372 -0.56931747 -0.1355734   0.03494331], action=0, reward=1.0, next_state=[-0.03751007 -0.7622611  -0.13487453  0.28196649]
[ Experience replay ] starts
[ episode 292 ][ timestamp 29 ] state=[-0.03751007 -0.7622611  -0.13487453  0.28196649], action=0, reward=1.0, next_state=[-0.05275529 -0.95522726 -0.1292352   0.52925401]
[ Experience replay ] starts
[ episode 292 ][ timestamp 30 ] state=[-0.05275529 -0.95522726 -0.1292352   0.52925401], action=0, reward=1.0, next_state=[-0.07185984 -1.14831692 -0.11865012  0.77858369]
[ Experience replay ] starts
[ episode 292 ][ timestamp 31 ] state=[-0.07185984 -1.14831692 -0.11865012  0.77858369], action=0, reward=1.0, next_state=[-0.09482618 -1.34162499 -0.10307845  1.03170582]
[ Experience replay ] st

[ episode 293 ][ timestamp 23 ] state=[ 0.0629753  -0.72654838 -0.10873115  0.69704591], action=0, reward=1.0, next_state=[ 0.04844434 -0.92000798 -0.09479023  0.95361761]
[ Experience replay ] starts
[ episode 293 ][ timestamp 24 ] state=[ 0.04844434 -0.92000798 -0.09479023  0.95361761], action=1, reward=1.0, next_state=[ 0.03004418 -0.72374734 -0.07571788  0.63272163]
[ Experience replay ] starts
[ episode 293 ][ timestamp 25 ] state=[ 0.03004418 -0.72374734 -0.07571788  0.63272163], action=0, reward=1.0, next_state=[ 0.01556923 -0.91773586 -0.06306345  0.90063088]
[ Experience replay ] starts
[ episode 293 ][ timestamp 26 ] state=[ 0.01556923 -0.91773586 -0.06306345  0.90063088], action=0, reward=1.0, next_state=[-0.00278549 -1.11194921 -0.04505083  1.17284345]
[ Experience replay ] starts
[ episode 293 ][ timestamp 27 ] state=[-0.00278549 -1.11194921 -0.04505083  1.17284345], action=0, reward=1.0, next_state=[-0.02502447 -1.30645751 -0.02159396  1.45106941]
[ Experience replay ] st

[ episode 295 ][ timestamp 2 ] state=[-0.01355342  0.21494474 -0.02215255 -0.30972774], action=1, reward=1.0, next_state=[-0.00925452  0.41037519 -0.0283471  -0.60931381]
[ Experience replay ] starts
[ episode 295 ][ timestamp 3 ] state=[-0.00925452  0.41037519 -0.0283471  -0.60931381], action=1, reward=1.0, next_state=[-0.00104702  0.60588171 -0.04053338 -0.9107887 ]
[ Experience replay ] starts
[ episode 295 ][ timestamp 4 ] state=[-0.00104702  0.60588171 -0.04053338 -0.9107887 ], action=1, reward=1.0, next_state=[ 0.01107061  0.80152803 -0.05874915 -1.21593068]
[ Experience replay ] starts
[ episode 295 ][ timestamp 5 ] state=[ 0.01107061  0.80152803 -0.05874915 -1.21593068], action=1, reward=1.0, next_state=[ 0.02710117  0.9973565  -0.08306776 -1.52642893]
[ Experience replay ] starts
[ episode 295 ][ timestamp 6 ] state=[ 0.02710117  0.9973565  -0.08306776 -1.52642893], action=1, reward=1.0, next_state=[ 0.0470483   1.19337694 -0.11359634 -1.84383958]
[ Experience replay ] starts


[ Ended! ] Episode 299: Exploration_rate=0.01. Score=10.
[ episode 300 ] state=[ 0.01905837  0.0176583  -0.01354051  0.01228794]
[ episode 300 ][ timestamp 1 ] state=[ 0.01905837  0.0176583  -0.01354051  0.01228794], action=1, reward=1.0, next_state=[ 0.01941154  0.21297179 -0.01329475 -0.28463624]
[ Experience replay ] starts
[ episode 300 ][ timestamp 2 ] state=[ 0.01941154  0.21297179 -0.01329475 -0.28463624], action=1, reward=1.0, next_state=[ 0.02367097  0.40828081 -0.01898748 -0.58148242]
[ Experience replay ] starts
[ episode 300 ][ timestamp 3 ] state=[ 0.02367097  0.40828081 -0.01898748 -0.58148242], action=1, reward=1.0, next_state=[ 0.03183659  0.60366359 -0.03061713 -0.88008574]
[ Experience replay ] starts
[ episode 300 ][ timestamp 4 ] state=[ 0.03183659  0.60366359 -0.03061713 -0.88008574], action=1, reward=1.0, next_state=[ 0.04390986  0.79918782 -0.04821884 -1.18223466]
[ Experience replay ] starts
[ episode 300 ][ timestamp 5 ] state=[ 0.04390986  0.79918782 -0.048218

[ episode 304 ][ timestamp 5 ] state=[ 0.02245301  0.81111588 -0.01592298 -1.1076042 ], action=1, reward=1.0, next_state=[ 0.03867533  1.00644347 -0.03807506 -1.4052396 ]
[ Experience replay ] starts
[ episode 304 ][ timestamp 6 ] state=[ 0.03867533  1.00644347 -0.03807506 -1.4052396 ], action=1, reward=1.0, next_state=[ 0.0588042   1.20201688 -0.06617985 -1.70957845]
[ Experience replay ] starts
[ episode 304 ][ timestamp 7 ] state=[ 0.0588042   1.20201688 -0.06617985 -1.70957845], action=1, reward=1.0, next_state=[ 0.08284453  1.39783395 -0.10037142 -2.02210374]
[ Experience replay ] starts
[ episode 304 ][ timestamp 8 ] state=[ 0.08284453  1.39783395 -0.10037142 -2.02210374], action=1, reward=1.0, next_state=[ 0.11080121  1.59384199 -0.1408135  -2.34409573]
[ Experience replay ] starts
[ episode 304 ][ timestamp 9 ] state=[ 0.11080121  1.59384199 -0.1408135  -2.34409573], action=1, reward=1.0, next_state=[ 0.14267805  1.78992184 -0.18769541 -2.67656684]
[ Experience replay ] starts


[ episode 308 ][ timestamp 9 ] state=[ 0.11527208  1.58939068 -0.17874926 -2.43417383], action=1, reward=-1.0, next_state=[ 0.14705989  1.78554039 -0.22743274 -2.77598334]
[ Experience replay ] starts
[ Ended! ] Episode 308: Exploration_rate=0.01. Score=9.
[ episode 309 ] state=[-0.04423704  0.01798281  0.00018785 -0.00372284]
[ episode 309 ][ timestamp 1 ] state=[-0.04423704  0.01798281  0.00018785 -0.00372284], action=1, reward=1.0, next_state=[-4.38773794e-02  2.13102071e-01  1.13393862e-04 -2.96346495e-01]
[ Experience replay ] starts
[ episode 309 ][ timestamp 2 ] state=[-4.38773794e-02  2.13102071e-01  1.13393862e-04 -2.96346495e-01], action=1, reward=1.0, next_state=[-0.03961534  0.40822241 -0.00581354 -0.58899366]
[ Experience replay ] starts
[ episode 309 ][ timestamp 3 ] state=[-0.03961534  0.40822241 -0.00581354 -0.58899366], action=1, reward=1.0, next_state=[-0.03145089  0.60342528 -0.01759341 -0.88350219]
[ Experience replay ] starts
[ episode 309 ][ timestamp 4 ] state=[-

[ episode 313 ][ timestamp 3 ] state=[-0.01964991  0.3733099  -0.01175118 -0.55582193], action=1, reward=1.0, next_state=[-0.01218371  0.56859485 -0.02286762 -0.85218389]
[ Experience replay ] starts
[ episode 313 ][ timestamp 4 ] state=[-0.01218371  0.56859485 -0.02286762 -0.85218389], action=1, reward=1.0, next_state=[-8.11813303e-04  7.64020963e-01 -3.99112964e-02 -1.15196892e+00]
[ Experience replay ] starts
[ episode 313 ][ timestamp 5 ] state=[-8.11813303e-04  7.64020963e-01 -3.99112964e-02 -1.15196892e+00], action=1, reward=1.0, next_state=[ 0.01446861  0.95964025 -0.06295067 -1.45689498]
[ Experience replay ] starts
[ episode 313 ][ timestamp 6 ] state=[ 0.01446861  0.95964025 -0.06295067 -1.45689498], action=1, reward=1.0, next_state=[ 0.03366141  1.15547564 -0.09208857 -1.7685615 ]
[ Experience replay ] starts
[ episode 313 ][ timestamp 7 ] state=[ 0.03366141  1.15547564 -0.09208857 -1.7685615 ], action=1, reward=1.0, next_state=[ 0.05677092  1.35150891 -0.1274598  -2.0884012

[ episode 317 ][ timestamp 9 ] state=[ 0.0634543   1.55228913 -0.1367162  -2.36502744], action=1, reward=1.0, next_state=[ 0.09450009  1.74833726 -0.18401675 -2.69642508]
[ Experience replay ] starts
[ episode 317 ][ timestamp 10 ] state=[ 0.09450009  1.74833726 -0.18401675 -2.69642508], action=1, reward=-1.0, next_state=[ 0.12946683  1.94426118 -0.23794525 -3.03914527]
[ Experience replay ] starts
[ Ended! ] Episode 317: Exploration_rate=0.01. Score=10.
[ episode 318 ] state=[ 0.03686101 -0.04620754 -0.04448619 -0.04701657]
[ episode 318 ][ timestamp 1 ] state=[ 0.03686101 -0.04620754 -0.04448619 -0.04701657], action=1, reward=1.0, next_state=[ 0.03593686  0.14952314 -0.04542652 -0.35339675]
[ Experience replay ] starts
[ episode 318 ][ timestamp 2 ] state=[ 0.03593686  0.14952314 -0.04542652 -0.35339675], action=1, reward=1.0, next_state=[ 0.03892732  0.34526061 -0.05249446 -0.66005088]
[ Experience replay ] starts
[ episode 318 ][ timestamp 3 ] state=[ 0.03892732  0.34526061 -0.0524

[ episode 321 ][ timestamp 14 ] state=[-0.08070344 -0.95411324  0.08634303  1.45669019], action=1, reward=1.0, next_state=[-0.0997857  -0.76015045  0.11547683  1.19218316]
[ Experience replay ] starts
[ episode 321 ][ timestamp 15 ] state=[-0.0997857  -0.76015045  0.11547683  1.19218316], action=1, reward=1.0, next_state=[-0.11498871 -0.56669807  0.1393205   0.93781198]
[ Experience replay ] starts
[ episode 321 ][ timestamp 16 ] state=[-0.11498871 -0.56669807  0.1393205   0.93781198], action=1, reward=1.0, next_state=[-0.12632267 -0.37370161  0.15807674  0.69195016]
[ Experience replay ] starts
[ episode 321 ][ timestamp 17 ] state=[-0.12632267 -0.37370161  0.15807674  0.69195016], action=0, reward=1.0, next_state=[-0.13379671 -0.57062243  0.17191574  1.0299298 ]
[ Experience replay ] starts
[ episode 321 ][ timestamp 18 ] state=[-0.13379671 -0.57062243  0.17191574  1.0299298 ], action=1, reward=1.0, next_state=[-0.14520916 -0.37815282  0.19251434  0.79577584]
[ Experience replay ] st

[ episode 324 ][ timestamp 6 ] state=[-0.03475385 -0.56285068  0.03271861  0.90802675], action=0, reward=1.0, next_state=[-0.04601087 -0.7583999   0.05087915  1.21081115]
[ Experience replay ] starts
[ episode 324 ][ timestamp 7 ] state=[-0.04601087 -0.7583999   0.05087915  1.21081115], action=0, reward=1.0, next_state=[-0.06117887 -0.95414049  0.07509537  1.5189941 ]
[ Experience replay ] starts
[ episode 324 ][ timestamp 8 ] state=[-0.06117887 -0.95414049  0.07509537  1.5189941 ], action=1, reward=1.0, next_state=[-0.08026167 -0.76000254  0.10547525  1.25066519]
[ Experience replay ] starts
[ episode 324 ][ timestamp 9 ] state=[-0.08026167 -0.76000254  0.10547525  1.25066519], action=1, reward=1.0, next_state=[-0.09546173 -0.56637844  0.13048856  0.99279535]
[ Experience replay ] starts
[ episode 324 ][ timestamp 10 ] state=[-0.09546173 -0.56637844  0.13048856  0.99279535], action=1, reward=1.0, next_state=[-0.10678929 -0.37322089  0.15034446  0.74377709]
[ Experience replay ] starts

[ episode 328 ][ timestamp 5 ] state=[-0.01413477  0.02440044 -0.00562262 -0.01666215], action=0, reward=1.0, next_state=[-0.01364676 -0.17064043 -0.00595586  0.27424148]
[ Experience replay ] starts
[ episode 328 ][ timestamp 6 ] state=[-0.01364676 -0.17064043 -0.00595586  0.27424148], action=0, reward=1.0, next_state=[-1.70595672e-02 -3.65676900e-01 -4.71033042e-04  5.65039982e-01]
[ Experience replay ] starts
[ episode 328 ][ timestamp 7 ] state=[-1.70595672e-02 -3.65676900e-01 -4.71033042e-04  5.65039982e-01], action=0, reward=1.0, next_state=[-0.02437311 -0.56079224  0.01082977  0.85757448]
[ Experience replay ] starts
[ episode 328 ][ timestamp 8 ] state=[-0.02437311 -0.56079224  0.01082977  0.85757448], action=0, reward=1.0, next_state=[-0.03558895 -0.75606005  0.02798126  1.1536429 ]
[ Experience replay ] starts
[ episode 328 ][ timestamp 9 ] state=[-0.03558895 -0.75606005  0.02798126  1.1536429 ], action=0, reward=1.0, next_state=[-0.05071015 -0.95153556  0.05105411  1.4549668

[ episode 331 ][ timestamp 4 ] state=[-0.01630581 -0.62225899  0.03440842  0.93935893], action=0, reward=1.0, next_state=[-0.02875098 -0.81782746  0.0531956   1.24265209]
[ Experience replay ] starts
[ episode 331 ][ timestamp 5 ] state=[-0.02875098 -0.81782746  0.0531956   1.24265209], action=0, reward=1.0, next_state=[-0.04510753 -1.01359028  0.07804864  1.55151308]
[ Experience replay ] starts
[ episode 331 ][ timestamp 6 ] state=[-0.04510753 -1.01359028  0.07804864  1.55151308], action=0, reward=1.0, next_state=[-0.06537934 -1.20955674  0.1090789   1.86749093]
[ Experience replay ] starts
[ episode 331 ][ timestamp 7 ] state=[-0.06537934 -1.20955674  0.1090789   1.86749093], action=0, reward=1.0, next_state=[-0.08957047 -1.40569019  0.14642872  2.19194824]
[ Experience replay ] starts
[ episode 331 ][ timestamp 8 ] state=[-0.08957047 -1.40569019  0.14642872  2.19194824], action=1, reward=1.0, next_state=[-0.11768428 -1.21225573  0.19026769  1.947798  ]
[ Experience replay ] starts


[ episode 332 ][ timestamp 40 ] state=[-0.16911977 -0.62439748  0.17505321  1.09698402], action=1, reward=1.0, next_state=[-0.18160772 -0.43195771  0.1969929   0.86393907]
[ Experience replay ] starts
[ episode 332 ][ timestamp 41 ] state=[-0.18160772 -0.43195771  0.1969929   0.86393907], action=0, reward=-1.0, next_state=[-0.19024688 -0.6291371   0.21427168  1.2115299 ]
[ Experience replay ] starts
[ Ended! ] Episode 332: Exploration_rate=0.01. Score=41.
[ episode 333 ] state=[-0.02653375 -0.01525245 -0.01943731  0.00811976]
[ episode 333 ][ timestamp 1 ] state=[-0.02653375 -0.01525245 -0.01943731  0.00811976], action=0, reward=1.0, next_state=[-0.0268388  -0.21009033 -0.01927491  0.29460716]
[ Experience replay ] starts
[ episode 333 ][ timestamp 2 ] state=[-0.0268388  -0.21009033 -0.01927491  0.29460716], action=1, reward=1.0, next_state=[-0.03104061 -0.01469896 -0.01338277 -0.00409192]
[ Experience replay ] starts
[ episode 333 ][ timestamp 3 ] state=[-0.03104061 -0.01469896 -0.013

[ Ended! ] Episode 334: Exploration_rate=0.01. Score=25.
[ episode 335 ] state=[-0.00209227  0.00103428  0.01185679 -0.01661282]
[ episode 335 ][ timestamp 1 ] state=[-0.00209227  0.00103428  0.01185679 -0.01661282], action=0, reward=1.0, next_state=[-0.00207158 -0.19425569  0.01152454  0.27978735]
[ Experience replay ] starts
[ episode 335 ][ timestamp 2 ] state=[-0.00207158 -0.19425569  0.01152454  0.27978735], action=1, reward=1.0, next_state=[-0.00595669  0.00069998  0.01712028 -0.0092386 ]
[ Experience replay ] starts
[ episode 335 ][ timestamp 3 ] state=[-0.00595669  0.00069998  0.01712028 -0.0092386 ], action=1, reward=1.0, next_state=[-0.00594269  0.19557228  0.01693551 -0.29647108]
[ Experience replay ] starts
[ episode 335 ][ timestamp 4 ] state=[-0.00594269  0.19557228  0.01693551 -0.29647108], action=0, reward=1.0, next_state=[-0.00203125  0.00021304  0.01100609  0.00150455]
[ Experience replay ] starts
[ episode 335 ][ timestamp 5 ] state=[-0.00203125  0.00021304  0.011006

[ episode 337 ][ timestamp 10 ] state=[ 0.1069396   0.62814825 -0.12883307 -1.01820798], action=0, reward=1.0, next_state=[ 0.11950257  0.43495696 -0.14919723 -0.76859489]
[ Experience replay ] starts
[ episode 337 ][ timestamp 11 ] state=[ 0.11950257  0.43495696 -0.14919723 -0.76859489], action=0, reward=1.0, next_state=[ 0.12820171  0.24216912 -0.16456913 -0.52632716]
[ Experience replay ] starts
[ episode 337 ][ timestamp 12 ] state=[ 0.12820171  0.24216912 -0.16456913 -0.52632716], action=1, reward=1.0, next_state=[ 0.13304509  0.43917768 -0.17509567 -0.86601256]
[ Experience replay ] starts
[ episode 337 ][ timestamp 13 ] state=[ 0.13304509  0.43917768 -0.17509567 -0.86601256], action=0, reward=1.0, next_state=[ 0.14182864  0.24681551 -0.19241592 -0.63309667]
[ Experience replay ] starts
[ episode 337 ][ timestamp 14 ] state=[ 0.14182864  0.24681551 -0.19241592 -0.63309667], action=0, reward=1.0, next_state=[ 0.14676495  0.05482443 -0.20507786 -0.40664666]
[ Experience replay ] st

[ episode 339 ][ timestamp 24 ] state=[-0.0696201  -0.55480218  0.17845975  1.28236739], action=0, reward=1.0, next_state=[-0.08071614 -0.75169075  0.2041071   1.62519897]
[ Experience replay ] starts
[ episode 339 ][ timestamp 25 ] state=[-0.08071614 -0.75169075  0.2041071   1.62519897], action=0, reward=-1.0, next_state=[-0.09574996 -0.94854518  0.23661108  1.97394297]
[ Experience replay ] starts
[ Ended! ] Episode 339: Exploration_rate=0.01. Score=25.
[ episode 340 ] state=[-0.03185152  0.01907287  0.00333619  0.02256361]
[ episode 340 ][ timestamp 1 ] state=[-0.03185152  0.01907287  0.00333619  0.02256361], action=1, reward=1.0, next_state=[-0.03147007  0.21414682  0.00378746 -0.26906485]
[ Experience replay ] starts
[ episode 340 ][ timestamp 2 ] state=[-0.03147007  0.21414682  0.00378746 -0.26906485], action=0, reward=1.0, next_state=[-0.02718713  0.01897102 -0.00159384  0.02481025]
[ Experience replay ] starts
[ episode 340 ][ timestamp 3 ] state=[-0.02718713  0.01897102 -0.001

[ episode 340 ][ timestamp 42 ] state=[ 0.11648945 -0.17726187 -0.01733422  0.34199243], action=1, reward=1.0, next_state=[ 0.11294422  0.01810236 -0.01049437  0.04389411]
[ Experience replay ] starts
[ episode 340 ][ timestamp 43 ] state=[ 0.11294422  0.01810236 -0.01049437  0.04389411], action=1, reward=1.0, next_state=[ 0.11330626  0.21337321 -0.00961649 -0.25208133]
[ Experience replay ] starts
[ episode 340 ][ timestamp 44 ] state=[ 0.11330626  0.21337321 -0.00961649 -0.25208133], action=1, reward=1.0, next_state=[ 0.11757373  0.40863115 -0.01465812 -0.5477819 ]
[ Experience replay ] starts
[ episode 340 ][ timestamp 45 ] state=[ 0.11757373  0.40863115 -0.01465812 -0.5477819 ], action=1, reward=1.0, next_state=[ 0.12574635  0.60395593 -0.02561376 -0.84504693]
[ Experience replay ] starts
[ episode 340 ][ timestamp 46 ] state=[ 0.12574635  0.60395593 -0.02561376 -0.84504693], action=0, reward=1.0, next_state=[ 0.13782547  0.40919266 -0.04251469 -0.56052747]
[ Experience replay ] st

[ episode 340 ][ timestamp 86 ] state=[-0.08992566 -1.3302757   0.11245818  1.7151585 ], action=0, reward=1.0, next_state=[-0.11653117 -1.52649401  0.14676135  2.04061983]
[ Experience replay ] starts
[ episode 340 ][ timestamp 87 ] state=[-0.11653117 -1.52649401  0.14676135  2.04061983], action=0, reward=1.0, next_state=[-0.14706105 -1.7227894   0.18757375  2.37489073]
[ Experience replay ] starts
[ episode 340 ][ timestamp 88 ] state=[-0.14706105 -1.7227894   0.18757375  2.37489073], action=0, reward=-1.0, next_state=[-0.18151684 -1.91901342  0.23507156  2.71888784]
[ Experience replay ] starts
[ Ended! ] Episode 340: Exploration_rate=0.01. Score=88.
[ episode 341 ] state=[ 0.02170645 -0.02133633 -0.03047621 -0.03430287]
[ episode 341 ][ timestamp 1 ] state=[ 0.02170645 -0.02133633 -0.03047621 -0.03430287], action=0, reward=1.0, next_state=[ 0.02127972 -0.21600828 -0.03116227  0.24861085]
[ Experience replay ] starts
[ episode 341 ][ timestamp 2 ] state=[ 0.02127972 -0.21600828 -0.03

[ episode 341 ][ timestamp 46 ] state=[-0.35409741 -0.55361787 -0.05698757 -0.32731931], action=0, reward=1.0, next_state=[-0.36516977 -0.74788415 -0.06353396 -0.05313822]
[ Experience replay ] starts
[ episode 341 ][ timestamp 47 ] state=[-0.36516977 -0.74788415 -0.06353396 -0.05313822], action=0, reward=1.0, next_state=[-0.38012745 -0.94204029 -0.06459672  0.21884197]
[ Experience replay ] starts
[ episode 341 ][ timestamp 48 ] state=[-0.38012745 -0.94204029 -0.06459672  0.21884197], action=0, reward=1.0, next_state=[-0.39896826 -1.13618222 -0.06021988  0.49046928]
[ Experience replay ] starts
[ episode 341 ][ timestamp 49 ] state=[-0.39896826 -1.13618222 -0.06021988  0.49046928], action=0, reward=1.0, next_state=[-0.4216919  -1.33040527 -0.0504105   0.76358181]
[ Experience replay ] starts
[ episode 341 ][ timestamp 50 ] state=[-0.4216919  -1.33040527 -0.0504105   0.76358181], action=1, reward=1.0, next_state=[-0.44830001 -1.13462662 -0.03513886  0.45547249]
[ Experience replay ] st

[ episode 342 ][ timestamp 12 ] state=[-0.01799971  0.17895366 -0.01704371 -0.32208622], action=0, reward=1.0, next_state=[-0.01442064 -0.01592149 -0.02348544 -0.03482656]
[ Experience replay ] starts
[ episode 342 ][ timestamp 13 ] state=[-0.01442064 -0.01592149 -0.02348544 -0.03482656], action=1, reward=1.0, next_state=[-0.01473907  0.17952924 -0.02418197 -0.33482589]
[ Experience replay ] starts
[ episode 342 ][ timestamp 14 ] state=[-0.01473907  0.17952924 -0.02418197 -0.33482589], action=1, reward=1.0, next_state=[-0.01114848  0.37498685 -0.03087849 -0.6350354 ]
[ Experience replay ] starts
[ episode 342 ][ timestamp 15 ] state=[-0.01114848  0.37498685 -0.03087849 -0.6350354 ], action=0, reward=1.0, next_state=[-0.00364875  0.1803089  -0.04357919 -0.35223449]
[ Experience replay ] starts
[ episode 342 ][ timestamp 16 ] state=[-0.00364875  0.1803089  -0.04357919 -0.35223449], action=1, reward=1.0, next_state=[-4.25686164e-05  3.76022593e-01 -5.06238831e-02 -6.58334544e-01]
[ Experi

[ episode 342 ][ timestamp 54 ] state=[-0.0864783   0.56172397 -0.03416501 -0.74284206], action=0, reward=1.0, next_state=[-0.07524382  0.36708984 -0.04902185 -0.4611038 ]
[ Experience replay ] starts
[ episode 342 ][ timestamp 55 ] state=[-0.07524382  0.36708984 -0.04902185 -0.4611038 ], action=0, reward=1.0, next_state=[-0.06790202  0.1726938  -0.05824393 -0.18426669]
[ Experience replay ] starts
[ episode 342 ][ timestamp 56 ] state=[-0.06790202  0.1726938  -0.05824393 -0.18426669], action=0, reward=1.0, next_state=[-0.06444814 -0.0215485  -0.06192926  0.08948866]
[ Experience replay ] starts
[ episode 342 ][ timestamp 57 ] state=[-0.06444814 -0.0215485  -0.06192926  0.08948866], action=1, reward=1.0, next_state=[-0.06487911  0.17440393 -0.06013949 -0.2220721 ]
[ Experience replay ] starts
[ episode 342 ][ timestamp 58 ] state=[-0.06487911  0.17440393 -0.06013949 -0.2220721 ], action=0, reward=1.0, next_state=[-0.06139104 -0.01980916 -0.06458093  0.05105054]
[ Experience replay ] st

[ episode 342 ][ timestamp 104 ] state=[-0.41258241 -0.55908506 -0.04576146 -0.08750679], action=1, reward=1.0, next_state=[-0.42376411 -0.36333805 -0.0475116  -0.3942691 ]
[ Experience replay ] starts
[ episode 342 ][ timestamp 105 ] state=[-0.42376411 -0.36333805 -0.0475116  -0.3942691 ], action=0, reward=1.0, next_state=[-0.43103087 -0.55775475 -0.05539698 -0.11693628]
[ Experience replay ] starts
[ episode 342 ][ timestamp 106 ] state=[-0.43103087 -0.55775475 -0.05539698 -0.11693628], action=0, reward=1.0, next_state=[-0.44218597 -0.75204101 -0.05773571  0.15776766]
[ Experience replay ] starts
[ episode 342 ][ timestamp 107 ] state=[-0.44218597 -0.75204101 -0.05773571  0.15776766], action=1, reward=1.0, next_state=[-0.45722679 -0.55614201 -0.05458035 -0.15255608]
[ Experience replay ] starts
[ episode 342 ][ timestamp 108 ] state=[-0.45722679 -0.55614201 -0.05458035 -0.15255608], action=1, reward=1.0, next_state=[-0.46834963 -0.36028274 -0.05763147 -0.46194615]
[ Experience replay

[ episode 343 ][ timestamp 17 ] state=[ 0.0351225  -0.02352785 -0.07346173 -0.0296254 ], action=1, reward=1.0, next_state=[ 0.03465194  0.17256651 -0.07405424 -0.34455193]
[ Experience replay ] starts
[ episode 343 ][ timestamp 18 ] state=[ 0.03465194  0.17256651 -0.07405424 -0.34455193], action=0, reward=1.0, next_state=[ 0.03810327 -0.02142814 -0.08094527 -0.07610954]
[ Experience replay ] starts
[ episode 343 ][ timestamp 19 ] state=[ 0.03810327 -0.02142814 -0.08094527 -0.07610954], action=0, reward=1.0, next_state=[ 0.03767471 -0.21530199 -0.08246747  0.18997711]
[ Experience replay ] starts
[ episode 343 ][ timestamp 20 ] state=[ 0.03767471 -0.21530199 -0.08246747  0.18997711], action=1, reward=1.0, next_state=[ 0.03336867 -0.01910301 -0.07866792 -0.12753916]
[ Experience replay ] starts
[ episode 343 ][ timestamp 21 ] state=[ 0.03336867 -0.01910301 -0.07866792 -0.12753916], action=1, reward=1.0, next_state=[ 0.03298661  0.17705262 -0.08121871 -0.44396714]
[ Experience replay ] st

[ episode 343 ][ timestamp 60 ] state=[-0.25525068  0.18487624  0.00354082 -0.61517014], action=0, reward=1.0, next_state=[-0.25155316 -0.010295   -0.00876258 -0.32137411]
[ Experience replay ] starts
[ episode 343 ][ timestamp 61 ] state=[-0.25155316 -0.010295   -0.00876258 -0.32137411], action=0, reward=1.0, next_state=[-0.25175906 -0.20529108 -0.01519006 -0.03146739]
[ Experience replay ] starts
[ episode 343 ][ timestamp 62 ] state=[-0.25175906 -0.20529108 -0.01519006 -0.03146739], action=0, reward=1.0, next_state=[-0.25586488 -0.40019194 -0.01581941  0.25638447]
[ Experience replay ] starts
[ episode 343 ][ timestamp 63 ] state=[-0.25586488 -0.40019194 -0.01581941  0.25638447], action=0, reward=1.0, next_state=[-0.26386872 -0.5950845  -0.01069172  0.54403602]
[ Experience replay ] starts
[ episode 343 ][ timestamp 64 ] state=[-0.26386872 -0.5950845  -0.01069172  0.54403602], action=1, reward=1.0, next_state=[-2.75770407e-01 -3.99813947e-01  1.88997244e-04  2.48003625e-01]
[ Experi

[ episode 343 ][ timestamp 107 ] state=[-0.63957566 -1.51819544 -0.05636159  0.85178929], action=1, reward=1.0, next_state=[-0.66993957 -1.32235224 -0.03932581  0.54192942]
[ Experience replay ] starts
[ episode 343 ][ timestamp 108 ] state=[-0.66993957 -1.32235224 -0.03932581  0.54192942], action=1, reward=1.0, next_state=[-0.69638662 -1.12670027 -0.02848722  0.23711957]
[ Experience replay ] starts
[ episode 343 ][ timestamp 109 ] state=[-0.69638662 -1.12670027 -0.02848722  0.23711957], action=1, reward=1.0, next_state=[-0.71892062 -0.93118316 -0.02374483 -0.06441121]
[ Experience replay ] starts
[ episode 343 ][ timestamp 110 ] state=[-0.71892062 -0.93118316 -0.02374483 -0.06441121], action=0, reward=1.0, next_state=[-0.73754429 -1.12595677 -0.02503305  0.22068652]
[ Experience replay ] starts
[ episode 343 ][ timestamp 111 ] state=[-0.73754429 -1.12595677 -0.02503305  0.22068652], action=0, reward=1.0, next_state=[-0.76006342 -1.32071212 -0.02061932  0.50536907]
[ Experience replay

[ episode 344 ][ timestamp 28 ] state=[ 0.0137806  -0.21558974  0.00754694  0.18497307], action=0, reward=1.0, next_state=[ 0.00946881 -0.41081886  0.0112464   0.48002719]
[ Experience replay ] starts
[ episode 344 ][ timestamp 29 ] state=[ 0.00946881 -0.41081886  0.0112464   0.48002719], action=1, reward=1.0, next_state=[ 0.00125243 -0.21585746  0.02084695  0.19090995]
[ Experience replay ] starts
[ episode 344 ][ timestamp 30 ] state=[ 0.00125243 -0.21585746  0.02084695  0.19090995], action=1, reward=1.0, next_state=[-0.00306472 -0.02103985  0.02466515 -0.0951244 ]
[ Experience replay ] starts
[ episode 344 ][ timestamp 31 ] state=[-0.00306472 -0.02103985  0.02466515 -0.0951244 ], action=1, reward=1.0, next_state=[-0.00348551  0.17372006  0.02276266 -0.37992459]
[ Experience replay ] starts
[ episode 344 ][ timestamp 32 ] state=[-0.00348551  0.17372006  0.02276266 -0.37992459], action=1, reward=1.0, next_state=[-1.11132680e-05  3.68511498e-01  1.51641669e-02 -6.65344406e-01]
[ Experi

[ episode 345 ][ timestamp 26 ] state=[ 0.02937669 -0.15508504 -0.04060384  0.21638566], action=1, reward=1.0, next_state=[ 0.02627499  0.04059314 -0.03627613 -0.08882394]
[ Experience replay ] starts
[ episode 345 ][ timestamp 27 ] state=[ 0.02627499  0.04059314 -0.03627613 -0.08882394], action=0, reward=1.0, next_state=[ 0.02708685 -0.15399057 -0.03805261  0.19219675]
[ Experience replay ] starts
[ episode 345 ][ timestamp 28 ] state=[ 0.02708685 -0.15399057 -0.03805261  0.19219675], action=1, reward=1.0, next_state=[ 0.02400704  0.0416545  -0.03420867 -0.11224317]
[ Experience replay ] starts
[ episode 345 ][ timestamp 29 ] state=[ 0.02400704  0.0416545  -0.03420867 -0.11224317], action=1, reward=1.0, next_state=[ 0.02484013  0.23724951 -0.03645354 -0.41551942]
[ Experience replay ] starts
[ episode 345 ][ timestamp 30 ] state=[ 0.02484013  0.23724951 -0.03645354 -0.41551942], action=0, reward=1.0, next_state=[ 0.02958512  0.04266266 -0.04476392 -0.13454803]
[ Experience replay ] st

[ episode 345 ][ timestamp 70 ] state=[-0.1969552   0.24047289  0.04593076 -0.48701047], action=1, reward=1.0, next_state=[-0.19214574  0.43491768  0.03619055 -0.76487116]
[ Experience replay ] starts
[ episode 345 ][ timestamp 71 ] state=[-0.19214574  0.43491768  0.03619055 -0.76487116], action=0, reward=1.0, next_state=[-0.18344738  0.23931654  0.02089313 -0.46102388]
[ Experience replay ] starts
[ episode 345 ][ timestamp 72 ] state=[-0.18344738  0.23931654  0.02089313 -0.46102388], action=0, reward=1.0, next_state=[-0.17866105  0.04390561  0.01167265 -0.16182932]
[ Experience replay ] starts
[ episode 345 ][ timestamp 73 ] state=[-0.17866105  0.04390561  0.01167265 -0.16182932], action=1, reward=1.0, next_state=[-0.17778294  0.23885853  0.00843607 -0.4508071 ]
[ Experience replay ] starts
[ episode 345 ][ timestamp 74 ] state=[-0.17778294  0.23885853  0.00843607 -0.4508071 ], action=1, reward=1.0, next_state=[-1.73005771e-01  4.33860162e-01 -5.80075964e-04 -7.40818959e-01]
[ Experi

[ episode 346 ][ timestamp 29 ] state=[ 0.18053352  0.42033504 -0.08011009 -0.18132709], action=1, reward=1.0, next_state=[ 0.18894022  0.61650649 -0.08373663 -0.49816774]
[ Experience replay ] starts
[ episode 346 ][ timestamp 30 ] state=[ 0.18894022  0.61650649 -0.08373663 -0.49816774], action=0, reward=1.0, next_state=[ 0.20127035  0.42265881 -0.09369999 -0.23300485]
[ Experience replay ] starts
[ episode 346 ][ timestamp 31 ] state=[ 0.20127035  0.42265881 -0.09369999 -0.23300485], action=0, reward=1.0, next_state=[ 0.20972352  0.2289919  -0.09836008  0.0287137 ]
[ Experience replay ] starts
[ episode 346 ][ timestamp 32 ] state=[ 0.20972352  0.2289919  -0.09836008  0.0287137 ], action=1, reward=1.0, next_state=[ 0.21430336  0.42537674 -0.09778581 -0.293311  ]
[ Experience replay ] starts
[ episode 346 ][ timestamp 33 ] state=[ 0.21430336  0.42537674 -0.09778581 -0.293311  ], action=0, reward=1.0, next_state=[ 0.2228109   0.23177509 -0.10365203 -0.03299908]
[ Experience replay ] st

[ episode 346 ][ timestamp 71 ] state=[ 0.18000527  0.06522204 -0.05337631 -0.37077407], action=0, reward=1.0, next_state=[ 0.18130971 -0.12910256 -0.06079179 -0.09538749]
[ Experience replay ] starts
[ episode 346 ][ timestamp 72 ] state=[ 0.18130971 -0.12910256 -0.06079179 -0.09538749], action=0, reward=1.0, next_state=[ 0.17872766 -0.3233029  -0.06269954  0.17751314]
[ Experience replay ] starts
[ episode 346 ][ timestamp 73 ] state=[ 0.17872766 -0.3233029  -0.06269954  0.17751314], action=1, reward=1.0, next_state=[ 0.1722616  -0.12734232 -0.05914928 -0.13427174]
[ Experience replay ] starts
[ episode 346 ][ timestamp 74 ] state=[ 0.1722616  -0.12734232 -0.05914928 -0.13427174], action=0, reward=1.0, next_state=[ 0.16971476 -0.32156934 -0.06183472  0.13917954]
[ Experience replay ] starts
[ episode 346 ][ timestamp 75 ] state=[ 0.16971476 -0.32156934 -0.06183472  0.13917954], action=1, reward=1.0, next_state=[ 0.16328337 -0.12561877 -0.05905113 -0.17235241]
[ Experience replay ] st

[ episode 346 ][ timestamp 116 ] state=[-0.15736107 -0.50761121  0.0062789   0.23129031], action=0, reward=1.0, next_state=[-0.1675133  -0.70282232  0.01090471  0.52594719]
[ Experience replay ] starts
[ episode 346 ][ timestamp 117 ] state=[-0.1675133  -0.70282232  0.01090471  0.52594719], action=1, reward=1.0, next_state=[-0.18156974 -0.5078555   0.02142365  0.23672026]
[ Experience replay ] starts
[ episode 346 ][ timestamp 118 ] state=[-0.18156974 -0.5078555   0.02142365  0.23672026], action=0, reward=1.0, next_state=[-0.19172685 -0.70327687  0.02615805  0.53608312]
[ Experience replay ] starts
[ episode 346 ][ timestamp 119 ] state=[-0.19172685 -0.70327687  0.02615805  0.53608312], action=1, reward=1.0, next_state=[-0.20579239 -0.50853231  0.03687972  0.2517558 ]
[ Experience replay ] starts
[ episode 346 ][ timestamp 120 ] state=[-0.20579239 -0.50853231  0.03687972  0.2517558 ], action=1, reward=1.0, next_state=[-0.21596304 -0.31395586  0.04191483 -0.02907023]
[ Experience replay

[ episode 346 ][ timestamp 163 ] state=[-0.38086959 -0.32283266  0.00387499  0.16674416], action=1, reward=1.0, next_state=[-0.38732625 -0.12776639  0.00720987 -0.1247138 ]
[ Experience replay ] starts
[ episode 346 ][ timestamp 164 ] state=[-0.38732625 -0.12776639  0.00720987 -0.1247138 ], action=1, reward=1.0, next_state=[-0.38988157  0.06725153  0.0047156  -0.4151134 ]
[ Experience replay ] starts
[ episode 346 ][ timestamp 165 ] state=[-0.38988157  0.06725153  0.0047156  -0.4151134 ], action=1, reward=1.0, next_state=[-0.38853654  0.26230633 -0.00358667 -0.70630596]
[ Experience replay ] starts
[ episode 346 ][ timestamp 166 ] state=[-0.38853654  0.26230633 -0.00358667 -0.70630596], action=0, reward=1.0, next_state=[-0.38329042  0.06723426 -0.01771279 -0.41475421]
[ Experience replay ] starts
[ episode 346 ][ timestamp 167 ] state=[-0.38329042  0.06723426 -0.01771279 -0.41475421], action=0, reward=1.0, next_state=[-0.38194573 -0.12763222 -0.02600788 -0.12770764]
[ Experience replay

[ episode 347 ][ timestamp 10 ] state=[-0.06980507 -0.18575677  0.01700141  0.21698043], action=1, reward=1.0, next_state=[-0.0735202   0.00911807  0.02134102 -0.0702914 ]
[ Experience replay ] starts
[ episode 347 ][ timestamp 11 ] state=[-0.0735202   0.00911807  0.02134102 -0.0702914 ], action=0, reward=1.0, next_state=[-0.07333784 -0.18630324  0.01993519  0.22904759]
[ Experience replay ] starts
[ episode 347 ][ timestamp 12 ] state=[-0.07333784 -0.18630324  0.01993519  0.22904759], action=1, reward=1.0, next_state=[-0.07706391  0.00852824  0.02451614 -0.057281  ]
[ Experience replay ] starts
[ episode 347 ][ timestamp 13 ] state=[-0.07706391  0.00852824  0.02451614 -0.057281  ], action=0, reward=1.0, next_state=[-0.07689334 -0.18693649  0.02337052  0.24303501]
[ Experience replay ] starts
[ episode 347 ][ timestamp 14 ] state=[-0.07689334 -0.18693649  0.02337052  0.24303501], action=1, reward=1.0, next_state=[-0.08063207  0.00784398  0.02823122 -0.0421856 ]
[ Experience replay ] st

[ episode 347 ][ timestamp 53 ] state=[-0.07913005 -0.38289245  0.01431995  0.55408436], action=0, reward=1.0, next_state=[-0.0867879  -0.57821252  0.02540164  0.85124436]
[ Experience replay ] starts
[ episode 347 ][ timestamp 54 ] state=[-0.0867879  -0.57821252  0.02540164  0.85124436], action=1, reward=1.0, next_state=[-0.09835215 -0.38344595  0.04242653  0.56665603]
[ Experience replay ] starts
[ episode 347 ][ timestamp 55 ] state=[-0.09835215 -0.38344595  0.04242653  0.56665603], action=1, reward=1.0, next_state=[-0.10602107 -0.18894404  0.05375965  0.28763536]
[ Experience replay ] starts
[ episode 347 ][ timestamp 56 ] state=[-0.10602107 -0.18894404  0.05375965  0.28763536], action=1, reward=1.0, next_state=[-0.10979995  0.00537169  0.05951236  0.01238059]
[ Experience replay ] starts
[ episode 347 ][ timestamp 57 ] state=[-0.10979995  0.00537169  0.05951236  0.01238059], action=1, reward=1.0, next_state=[-0.10969252  0.1995919   0.05975997 -0.26094766]
[ Experience replay ] st

[ episode 347 ][ timestamp 101 ] state=[-0.05650002 -0.3640623  -0.03826265  0.13884763], action=1, reward=1.0, next_state=[-0.06378127 -0.16841381 -0.03548569 -0.16565678]
[ Experience replay ] starts
[ episode 347 ][ timestamp 102 ] state=[-0.06378127 -0.16841381 -0.03548569 -0.16565678], action=0, reward=1.0, next_state=[-0.06714954 -0.3630103  -0.03879883  0.11562359]
[ Experience replay ] starts
[ episode 347 ][ timestamp 103 ] state=[-0.06714954 -0.3630103  -0.03879883  0.11562359], action=0, reward=1.0, next_state=[-0.07440975 -0.55755546 -0.03648636  0.39581773]
[ Experience replay ] starts
[ episode 347 ][ timestamp 104 ] state=[-0.07440975 -0.55755546 -0.03648636  0.39581773], action=1, reward=1.0, next_state=[-0.08556086 -0.36193533 -0.02857     0.09185821]
[ Experience replay ] starts
[ episode 347 ][ timestamp 105 ] state=[-0.08556086 -0.36193533 -0.02857     0.09185821], action=1, reward=1.0, next_state=[-0.09279956 -0.16641577 -0.02673284 -0.20969987]
[ Experience replay

[ episode 347 ][ timestamp 144 ] state=[-0.35565502 -0.55656345  0.01422958  0.37359388], action=1, reward=1.0, next_state=[-0.36678629 -0.3616465   0.02170146  0.0854314 ]
[ Experience replay ] starts
[ episode 347 ][ timestamp 145 ] state=[-0.36678629 -0.3616465   0.02170146  0.0854314 ], action=0, reward=1.0, next_state=[-0.37401922 -0.55707269  0.02341009  0.3848814 ]
[ Experience replay ] starts
[ episode 347 ][ timestamp 146 ] state=[-0.37401922 -0.55707269  0.02341009  0.3848814 ], action=1, reward=1.0, next_state=[-0.38516067 -0.36229078  0.03110771  0.09967052]
[ Experience replay ] starts
[ episode 347 ][ timestamp 147 ] state=[-0.38516067 -0.36229078  0.03110771  0.09967052], action=1, reward=1.0, next_state=[-0.39240649 -0.16762815  0.03310112 -0.18303796]
[ Experience replay ] starts
[ episode 347 ][ timestamp 148 ] state=[-0.39240649 -0.16762815  0.03310112 -0.18303796], action=0, reward=1.0, next_state=[-0.39575905 -0.36320772  0.02944036  0.11990064]
[ Experience replay

[ episode 347 ][ timestamp 187 ] state=[-0.46218802  0.02041526 -0.09067799 -0.32068243], action=0, reward=1.0, next_state=[-0.46177971 -0.17330621 -0.09709164 -0.05791688]
[ Experience replay ] starts
[ episode 347 ][ timestamp 188 ] state=[-0.46177971 -0.17330621 -0.09709164 -0.05791688], action=0, reward=1.0, next_state=[-0.46524584 -0.36691165 -0.09824998  0.20262344]
[ Experience replay ] starts
[ episode 347 ][ timestamp 189 ] state=[-0.46524584 -0.36691165 -0.09824998  0.20262344], action=0, reward=1.0, next_state=[-0.47258407 -0.56050123 -0.09419751  0.46276835]
[ Experience replay ] starts
[ episode 347 ][ timestamp 190 ] state=[-0.47258407 -0.56050123 -0.09419751  0.46276835], action=0, reward=1.0, next_state=[-0.48379409 -0.75417451 -0.08494214  0.72433722]
[ Experience replay ] starts
[ episode 347 ][ timestamp 191 ] state=[-0.48379409 -0.75417451 -0.08494214  0.72433722], action=1, reward=1.0, next_state=[-0.49887759 -0.55798697 -0.0704554   0.40617394]
[ Experience replay

[ episode 348 ][ timestamp 12 ] state=[ 0.00726975  0.15385098  0.00687638 -0.33241123], action=0, reward=1.0, next_state=[ 0.01034677 -0.04136817  0.00022815 -0.03756779]
[ Experience replay ] starts
[ episode 348 ][ timestamp 13 ] state=[ 0.01034677 -0.04136817  0.00022815 -0.03756779], action=0, reward=1.0, next_state=[ 0.0095194  -0.23649339 -0.0005232   0.25518711]
[ Experience replay ] starts
[ episode 348 ][ timestamp 14 ] state=[ 0.0095194  -0.23649339 -0.0005232   0.25518711], action=1, reward=1.0, next_state=[ 0.00478954 -0.04136397  0.00458054 -0.03766079]
[ Experience replay ] starts
[ episode 348 ][ timestamp 15 ] state=[ 0.00478954 -0.04136397  0.00458054 -0.03766079], action=1, reward=1.0, next_state=[ 0.00396226  0.153692    0.00382732 -0.328895  ]
[ Experience replay ] starts
[ episode 348 ][ timestamp 16 ] state=[ 0.00396226  0.153692    0.00382732 -0.328895  ], action=1, reward=1.0, next_state=[ 0.0070361   0.34875925 -0.00275058 -0.62036851]
[ Experience replay ] st

[ episode 348 ][ timestamp 54 ] state=[-0.12837005 -0.60916949  0.03494502  0.45342697], action=1, reward=1.0, next_state=[-0.14055344 -0.41455867  0.04401356  0.17196071]
[ Experience replay ] starts
[ episode 348 ][ timestamp 55 ] state=[-0.14055344 -0.41455867  0.04401356  0.17196071], action=0, reward=1.0, next_state=[-0.14884462 -0.61028203  0.04745277  0.47819724]
[ Experience replay ] starts
[ episode 348 ][ timestamp 56 ] state=[-0.14884462 -0.61028203  0.04745277  0.47819724], action=1, reward=1.0, next_state=[-0.16105026 -0.41586102  0.05701672  0.20083989]
[ Experience replay ] starts
[ episode 348 ][ timestamp 57 ] state=[-0.16105026 -0.41586102  0.05701672  0.20083989], action=1, reward=1.0, next_state=[-0.16936748 -0.22159892  0.06103352 -0.07332591]
[ Experience replay ] starts
[ episode 348 ][ timestamp 58 ] state=[-0.16936748 -0.22159892  0.06103352 -0.07332591], action=0, reward=1.0, next_state=[-0.17379945 -0.41754035  0.059567    0.23797169]
[ Experience replay ] st

[ episode 349 ][ timestamp 17 ] state=[-0.0297023   0.00628682  0.00612359 -0.01958463], action=0, reward=1.0, next_state=[-0.02957656 -0.18892241  0.0057319   0.27502405]
[ Experience replay ] starts
[ episode 349 ][ timestamp 18 ] state=[-0.02957656 -0.18892241  0.0057319   0.27502405], action=1, reward=1.0, next_state=[-0.03335501  0.00611729  0.01123238 -0.01584553]
[ Experience replay ] starts
[ episode 349 ][ timestamp 19 ] state=[-0.03335501  0.00611729  0.01123238 -0.01584553], action=0, reward=1.0, next_state=[-0.03323266 -0.18916393  0.01091547  0.28036008]
[ Experience replay ] starts
[ episode 349 ][ timestamp 20 ] state=[-0.03323266 -0.18916393  0.01091547  0.28036008], action=0, reward=1.0, next_state=[-0.03701594 -0.38443987  0.01652267  0.57646563]
[ Experience replay ] starts
[ episode 349 ][ timestamp 21 ] state=[-0.03701594 -0.38443987  0.01652267  0.57646563], action=1, reward=1.0, next_state=[-0.04470474 -0.18955337  0.02805199  0.28903323]
[ Experience replay ] st

[ episode 349 ][ timestamp 62 ] state=[ 0.07722836  0.57408802  0.00093021 -0.51018989], action=0, reward=1.0, next_state=[ 0.08871012  0.37895297 -0.00927359 -0.21721397]
[ Experience replay ] starts
[ episode 349 ][ timestamp 63 ] state=[ 0.08871012  0.37895297 -0.00927359 -0.21721397], action=0, reward=1.0, next_state=[ 0.09628918  0.18396481 -0.01361787  0.0725293 ]
[ Experience replay ] starts
[ episode 349 ][ timestamp 64 ] state=[ 0.09628918  0.18396481 -0.01361787  0.0725293 ], action=0, reward=1.0, next_state=[ 0.09996848 -0.01095929 -0.01216729  0.36088481]
[ Experience replay ] starts
[ episode 349 ][ timestamp 65 ] state=[ 0.09996848 -0.01095929 -0.01216729  0.36088481], action=0, reward=1.0, next_state=[ 0.09974929 -0.2059062  -0.00494959  0.64970643]
[ Experience replay ] starts
[ episode 349 ][ timestamp 66 ] state=[ 0.09974929 -0.2059062  -0.00494959  0.64970643], action=1, reward=1.0, next_state=[ 0.09563117 -0.01071565  0.00804454  0.35546902]
[ Experience replay ] st

[ episode 349 ][ timestamp 104 ] state=[ 0.37693469  0.55451127 -0.01172551 -0.07840662], action=1, reward=1.0, next_state=[ 0.38802491  0.74979933 -0.01329364 -0.3747658 ]
[ Experience replay ] starts
[ episode 349 ][ timestamp 105 ] state=[ 0.38802491  0.74979933 -0.01329364 -0.3747658 ], action=0, reward=1.0, next_state=[ 0.4030209   0.5548687  -0.02078896 -0.08630391]
[ Experience replay ] starts
[ episode 349 ][ timestamp 106 ] state=[ 0.4030209   0.5548687  -0.02078896 -0.08630391], action=0, reward=1.0, next_state=[ 0.41411827  0.36005082 -0.02251504  0.19974825]
[ Experience replay ] starts
[ episode 349 ][ timestamp 107 ] state=[ 0.41411827  0.36005082 -0.02251504  0.19974825], action=0, reward=1.0, next_state=[ 0.42131929  0.16525801 -0.01852007  0.48524456]
[ Experience replay ] starts
[ episode 349 ][ timestamp 108 ] state=[ 0.42131929  0.16525801 -0.01852007  0.48524456], action=1, reward=1.0, next_state=[ 0.42462445  0.36063634 -0.00881518  0.18678272]
[ Experience replay

[ episode 349 ][ timestamp 145 ] state=[ 0.80769189  0.36706033 -0.02926689  0.0450811 ], action=1, reward=1.0, next_state=[ 0.8150331   0.56258946 -0.02836527 -0.25669023]
[ Experience replay ] starts
[ episode 349 ][ timestamp 146 ] state=[ 0.8150331   0.56258946 -0.02836527 -0.25669023], action=1, reward=1.0, next_state=[ 0.82628489  0.75810466 -0.03349907 -0.55818333]
[ Experience replay ] starts
[ episode 349 ][ timestamp 147 ] state=[ 0.82628489  0.75810466 -0.03349907 -0.55818333], action=0, reward=1.0, next_state=[ 0.84144698  0.56346857 -0.04466274 -0.27623988]
[ Experience replay ] starts
[ episode 349 ][ timestamp 148 ] state=[ 0.84144698  0.56346857 -0.04466274 -0.27623988], action=0, reward=1.0, next_state=[ 0.85271635  0.36901134 -0.05018754  0.00202861]
[ Experience replay ] starts
[ episode 349 ][ timestamp 149 ] state=[ 0.85271635  0.36901134 -0.05018754  0.00202861], action=1, reward=1.0, next_state=[ 0.86009658  0.56481579 -0.05014697 -0.30605719]
[ Experience replay

[ episode 349 ][ timestamp 189 ] state=[ 0.92053861 -0.76633477 -0.00676981  0.97775326], action=0, reward=1.0, next_state=[ 0.90521191 -0.96136529  0.01278526  1.26830203]
[ Experience replay ] starts
[ episode 349 ][ timestamp 190 ] state=[ 0.90521191 -0.96136529  0.01278526  1.26830203], action=1, reward=1.0, next_state=[ 0.8859846  -0.76640895  0.0381513   0.97965018]
[ Experience replay ] starts
[ episode 349 ][ timestamp 191 ] state=[ 0.8859846  -0.76640895  0.0381513   0.97965018], action=1, reward=1.0, next_state=[ 0.87065643 -0.57181861  0.0577443   0.69919084]
[ Experience replay ] starts
[ episode 349 ][ timestamp 192 ] state=[ 0.87065643 -0.57181861  0.0577443   0.69919084], action=1, reward=1.0, next_state=[ 0.85922005 -0.37754278  0.07172812  0.42523019]
[ Experience replay ] starts
[ episode 349 ][ timestamp 193 ] state=[ 0.85922005 -0.37754278  0.07172812  0.42523019], action=1, reward=1.0, next_state=[ 0.8516692  -0.18350627  0.08023272  0.15599382]
[ Experience replay

[ episode 350 ][ timestamp 10 ] state=[-0.08619085  0.16985845  0.03012687 -0.36510289], action=0, reward=1.0, next_state=[-0.08279368 -0.02567841  0.02282481 -0.06307475]
[ Experience replay ] starts
[ episode 350 ][ timestamp 11 ] state=[-0.08279368 -0.02567841  0.02282481 -0.06307475], action=1, reward=1.0, next_state=[-0.08330725  0.16910898  0.02156332 -0.34846981]
[ Experience replay ] starts
[ episode 350 ][ timestamp 12 ] state=[-0.08330725  0.16910898  0.02156332 -0.34846981], action=0, reward=1.0, next_state=[-0.07992507 -0.02631292  0.01459392 -0.04906598]
[ Experience replay ] starts
[ episode 350 ][ timestamp 13 ] state=[-0.07992507 -0.02631292  0.01459392 -0.04906598], action=0, reward=1.0, next_state=[-0.08045132 -0.22164106  0.0136126   0.24818549]
[ Experience replay ] starts
[ episode 350 ][ timestamp 14 ] state=[-0.08045132 -0.22164106  0.0136126   0.24818549], action=1, reward=1.0, next_state=[-0.08488415 -0.02671614  0.01857631 -0.04017283]
[ Experience replay ] st

[ episode 350 ][ timestamp 53 ] state=[-0.06545351 -0.0353018  -0.00596812  0.14921534], action=0, reward=1.0, next_state=[-0.06615954 -0.23033778 -0.00298382  0.44000948]
[ Experience replay ] starts
[ episode 350 ][ timestamp 54 ] state=[-0.06615954 -0.23033778 -0.00298382  0.44000948], action=1, reward=1.0, next_state=[-0.0707663  -0.03517373  0.00581637  0.14638747]
[ Experience replay ] starts
[ episode 350 ][ timestamp 55 ] state=[-0.0707663  -0.03517373  0.00581637  0.14638747], action=1, reward=1.0, next_state=[-0.07146978  0.15986444  0.00874412 -0.14445484]
[ Experience replay ] starts
[ episode 350 ][ timestamp 56 ] state=[-0.07146978  0.15986444  0.00874412 -0.14445484], action=1, reward=1.0, next_state=[-0.06827249  0.35486009  0.00585502 -0.43436639]
[ Experience replay ] starts
[ episode 350 ][ timestamp 57 ] state=[-0.06827249  0.35486009  0.00585502 -0.43436639], action=0, reward=1.0, next_state=[-0.06117528  0.15965573 -0.0028323  -0.13984351]
[ Experience replay ] st

[ episode 350 ][ timestamp 95 ] state=[-0.0270779  -0.03117041 -0.01735144  0.05809357], action=1, reward=1.0, next_state=[-0.02770131  0.16419598 -0.01618957 -0.24001296]
[ Experience replay ] starts
[ episode 350 ][ timestamp 96 ] state=[-0.02770131  0.16419598 -0.01618957 -0.24001296], action=1, reward=1.0, next_state=[-0.02441739  0.35954541 -0.02098982 -0.53775824]
[ Experience replay ] starts
[ episode 350 ][ timestamp 97 ] state=[-0.02441739  0.35954541 -0.02098982 -0.53775824], action=0, reward=1.0, next_state=[-0.01722648  0.16472476 -0.03174499 -0.25176218]
[ Experience replay ] starts
[ episode 350 ][ timestamp 98 ] state=[-0.01722648  0.16472476 -0.03174499 -0.25176218], action=1, reward=1.0, next_state=[-0.01393199  0.36028529 -0.03678023 -0.55428665]
[ Experience replay ] starts
[ episode 350 ][ timestamp 99 ] state=[-0.01393199  0.36028529 -0.03678023 -0.55428665], action=1, reward=1.0, next_state=[-0.00672628  0.55590388 -0.04786597 -0.85832703]
[ Experience replay ] st

[ episode 350 ][ timestamp 136 ] state=[-0.32930284 -0.1954321   0.03670242 -0.32966403], action=0, reward=1.0, next_state=[-0.33321148 -0.39105678  0.03010914 -0.02563654]
[ Experience replay ] starts
[ episode 350 ][ timestamp 137 ] state=[-0.33321148 -0.39105678  0.03010914 -0.02563654], action=1, reward=1.0, next_state=[-0.34103262 -0.19637927  0.02959641 -0.30866971]
[ Experience replay ] starts
[ episode 350 ][ timestamp 138 ] state=[-0.34103262 -0.19637927  0.02959641 -0.30866971], action=1, reward=1.0, next_state=[-0.3449602  -0.00169125  0.02342302 -0.59187377]
[ Experience replay ] starts
[ episode 350 ][ timestamp 139 ] state=[-0.3449602  -0.00169125  0.02342302 -0.59187377], action=1, reward=1.0, next_state=[-0.34499403  0.19309509  0.01158554 -0.87708739]
[ Experience replay ] starts
[ episode 350 ][ timestamp 140 ] state=[-0.34499403  0.19309509  0.01158554 -0.87708739], action=0, reward=1.0, next_state=[-0.34113213 -0.00218239 -0.00595621 -0.58078476]
[ Experience replay

[ episode 350 ][ timestamp 178 ] state=[-0.62601111 -1.33428744 -0.12408765  0.73034924], action=0, reward=1.0, next_state=[-0.65269686 -1.52749585 -0.10948067  0.98154527]
[ Experience replay ] starts
[ episode 350 ][ timestamp 179 ] state=[-0.65269686 -1.52749585 -0.10948067  0.98154527], action=1, reward=1.0, next_state=[-0.68324678 -1.33109057 -0.08984976  0.65657811]
[ Experience replay ] starts
[ episode 350 ][ timestamp 180 ] state=[-0.68324678 -1.33109057 -0.08984976  0.65657811], action=1, reward=1.0, next_state=[-0.70986859 -1.13484032 -0.0767182   0.33700987]
[ Experience replay ] starts
[ episode 350 ][ timestamp 181 ] state=[-0.70986859 -1.13484032 -0.0767182   0.33700987], action=1, reward=1.0, next_state=[-0.73256539 -0.93871522 -0.069978    0.02115451]
[ Experience replay ] starts
[ episode 350 ][ timestamp 182 ] state=[-0.73256539 -0.93871522 -0.069978    0.02115451], action=0, reward=1.0, next_state=[-0.7513397  -1.13276747 -0.06955491  0.29096374]
[ Experience replay

[ episode 351 ][ timestamp 11 ] state=[ 0.03891595 -0.34622315  0.01911342  0.61460879], action=0, reward=1.0, next_state=[ 0.03199149 -0.54160688  0.03140559  0.91324986]
[ Experience replay ] starts
[ episode 351 ][ timestamp 12 ] state=[ 0.03199149 -0.54160688  0.03140559  0.91324986], action=0, reward=1.0, next_state=[ 0.02115935 -0.73713928  0.04967059  1.21563556]
[ Experience replay ] starts
[ episode 351 ][ timestamp 13 ] state=[ 0.02115935 -0.73713928  0.04967059  1.21563556], action=1, reward=1.0, next_state=[ 0.00641656 -0.542692    0.0739833   0.93892151]
[ Experience replay ] starts
[ episode 351 ][ timestamp 14 ] state=[ 0.00641656 -0.542692    0.0739833   0.93892151], action=0, reward=1.0, next_state=[-0.00443728 -0.73872917  0.09276173  1.25390413]
[ Experience replay ] starts
[ episode 351 ][ timestamp 15 ] state=[-0.00443728 -0.73872917  0.09276173  1.25390413], action=1, reward=1.0, next_state=[-0.01921186 -0.54490954  0.11783982  0.99165746]
[ Experience replay ] st

[ episode 351 ][ timestamp 52 ] state=[ 0.47076982  0.96978523 -0.02268078 -0.33511331], action=1, reward=1.0, next_state=[ 0.49016553  1.16522251 -0.02938305 -0.63486141]
[ Experience replay ] starts
[ episode 351 ][ timestamp 53 ] state=[ 0.49016553  1.16522251 -0.02938305 -0.63486141], action=1, reward=1.0, next_state=[ 0.51346998  1.36074171 -0.04208028 -0.93665099]
[ Experience replay ] starts
[ episode 351 ][ timestamp 54 ] state=[ 0.51346998  1.36074171 -0.04208028 -0.93665099], action=1, reward=1.0, next_state=[ 0.54068481  1.5564051  -0.0608133  -1.24225421]
[ Experience replay ] starts
[ episode 351 ][ timestamp 55 ] state=[ 0.54068481  1.5564051  -0.0608133  -1.24225421], action=0, reward=1.0, next_state=[ 0.57181291  1.36211416 -0.08565838 -0.96922463]
[ Experience replay ] starts
[ episode 351 ][ timestamp 56 ] state=[ 0.57181291  1.36211416 -0.08565838 -0.96922463], action=0, reward=1.0, next_state=[ 0.5990552   1.1682401  -0.10504287 -0.70463256]
[ Experience replay ] st

[ episode 352 ][ timestamp 33 ] state=[-0.0837331  -0.00201606  0.02193798 -0.09450253], action=0, reward=1.0, next_state=[-0.08377342 -0.19744546  0.02004793  0.20502028]
[ Experience replay ] starts
[ episode 352 ][ timestamp 34 ] state=[-0.08377342 -0.19744546  0.02004793  0.20502028], action=1, reward=1.0, next_state=[-0.08772233 -0.00261586  0.02414834 -0.08127171]
[ Experience replay ] starts
[ episode 352 ][ timestamp 35 ] state=[-0.08772233 -0.00261586  0.02414834 -0.08127171], action=0, reward=1.0, next_state=[-0.08777465 -0.1980755   0.0225229   0.2189312 ]
[ Experience replay ] starts
[ episode 352 ][ timestamp 36 ] state=[-0.08777465 -0.1980755   0.0225229   0.2189312 ], action=1, reward=1.0, next_state=[-0.09173616 -0.00328263  0.02690153 -0.06656283]
[ Experience replay ] starts
[ episode 352 ][ timestamp 37 ] state=[-0.09173616 -0.00328263  0.02690153 -0.06656283], action=0, reward=1.0, next_state=[-0.09180181 -0.19877974  0.02557027  0.23448482]
[ Experience replay ] st

[ episode 352 ][ timestamp 78 ] state=[-0.03738629  0.18088306 -0.00228264 -0.11753796], action=0, reward=1.0, next_state=[-0.03376863 -0.01420612 -0.0046334   0.17442394]
[ Experience replay ] starts
[ episode 352 ][ timestamp 79 ] state=[-0.03376863 -0.01420612 -0.0046334   0.17442394], action=0, reward=1.0, next_state=[-0.03405275 -0.20926145 -0.00114492  0.46564158]
[ Experience replay ] starts
[ episode 352 ][ timestamp 80 ] state=[-0.03405275 -0.20926145 -0.00114492  0.46564158], action=0, reward=1.0, next_state=[-0.03823798 -0.4043672   0.00816791  0.75796341]
[ Experience replay ] starts
[ episode 352 ][ timestamp 81 ] state=[-0.03823798 -0.4043672   0.00816791  0.75796341], action=1, reward=1.0, next_state=[-0.04632532 -0.20935876  0.02332718  0.46786185]
[ Experience replay ] starts
[ episode 352 ][ timestamp 82 ] state=[-0.04632532 -0.20935876  0.02332718  0.46786185], action=1, reward=1.0, next_state=[-0.0505125  -0.01457401  0.03268441  0.18262177]
[ Experience replay ] st

[ episode 352 ][ timestamp 122 ] state=[ 0.09235463  0.5536677   0.06222645 -0.31847358], action=0, reward=1.0, next_state=[ 0.10342799  0.35771724  0.05585697 -0.00683398]
[ Experience replay ] starts
[ episode 352 ][ timestamp 123 ] state=[ 0.10342799  0.35771724  0.05585697 -0.00683398], action=1, reward=1.0, next_state=[ 0.11058233  0.55199548  0.05572029 -0.28138344]
[ Experience replay ] starts
[ episode 352 ][ timestamp 124 ] state=[ 0.11058233  0.55199548  0.05572029 -0.28138344], action=1, reward=1.0, next_state=[ 0.12162224  0.74628018  0.05009263 -0.55598492]
[ Experience replay ] starts
[ episode 352 ][ timestamp 125 ] state=[ 0.12162224  0.74628018  0.05009263 -0.55598492], action=0, reward=1.0, next_state=[ 0.13654785  0.55049205  0.03897293 -0.24795003]
[ Experience replay ] starts
[ episode 352 ][ timestamp 126 ] state=[ 0.13654785  0.55049205  0.03897293 -0.24795003], action=0, reward=1.0, next_state=[0.14755769 0.35483581 0.03401393 0.05676662]
[ Experience replay ] s

[ episode 352 ][ timestamp 165 ] state=[ 0.48639711 -0.40252302 -0.08468037  0.72041304], action=0, reward=1.0, next_state=[ 0.47834665 -0.59637768 -0.07027211  0.98528681]
[ Experience replay ] starts
[ episode 352 ][ timestamp 166 ] state=[ 0.47834665 -0.59637768 -0.07027211  0.98528681], action=0, reward=1.0, next_state=[ 0.4664191  -0.79049162 -0.05056638  1.25509609]
[ Experience replay ] starts
[ episode 352 ][ timestamp 167 ] state=[ 0.4664191  -0.79049162 -0.05056638  1.25509609], action=1, reward=1.0, next_state=[ 0.45060927 -0.59475997 -0.02546445  0.94701372]
[ Experience replay ] starts
[ episode 352 ][ timestamp 168 ] state=[ 0.45060927 -0.59475997 -0.02546445  0.94701372], action=1, reward=1.0, next_state=[ 0.43871407 -0.39930453 -0.00652418  0.64643987]
[ Experience replay ] starts
[ episode 352 ][ timestamp 169 ] state=[ 0.43871407 -0.39930453 -0.00652418  0.64643987], action=1, reward=1.0, next_state=[ 0.43072798 -0.20409229  0.00640462  0.35170964]
[ Experience replay

[ episode 352 ][ timestamp 208 ] state=[ 0.5447824   0.55062602  0.03249582 -0.25074489], action=1, reward=1.0, next_state=[ 0.55579492  0.74526922  0.02748092 -0.53300346]
[ Experience replay ] starts
[ episode 352 ][ timestamp 209 ] state=[ 0.55579492  0.74526922  0.02748092 -0.53300346], action=1, reward=1.0, next_state=[ 0.5707003   0.93999411  0.01682085 -0.81690214]
[ Experience replay ] starts
[ episode 352 ][ timestamp 210 ] state=[ 0.5707003   0.93999411  0.01682085 -0.81690214], action=0, reward=1.0, next_state=[ 5.89500184e-01  7.44645962e-01  4.82811145e-04 -5.18976276e-01]
[ Experience replay ] starts
[ episode 352 ][ timestamp 211 ] state=[ 5.89500184e-01  7.44645962e-01  4.82811145e-04 -5.18976276e-01], action=0, reward=1.0, next_state=[ 0.6043931   0.54951722 -0.00989671 -0.22614125]
[ Experience replay ] starts
[ episode 352 ][ timestamp 212 ] state=[ 0.6043931   0.54951722 -0.00989671 -0.22614125], action=0, reward=1.0, next_state=[ 0.61538345  0.35453809 -0.01441954 

[ episode 353 ][ timestamp 18 ] state=[ 0.0116252   0.21509142 -0.00792201 -0.0905219 ], action=1, reward=1.0, next_state=[ 0.01592703  0.41032602 -0.00973245 -0.38569366]
[ Experience replay ] starts
[ episode 353 ][ timestamp 19 ] state=[ 0.01592703  0.41032602 -0.00973245 -0.38569366], action=1, reward=1.0, next_state=[ 0.02413355  0.60558478 -0.01744632 -0.68142921]
[ Experience replay ] starts
[ episode 353 ][ timestamp 20 ] state=[ 0.02413355  0.60558478 -0.01744632 -0.68142921], action=0, reward=1.0, next_state=[ 0.03624525  0.41070942 -0.0310749  -0.39428961]
[ Experience replay ] starts
[ episode 353 ][ timestamp 21 ] state=[ 0.03624525  0.41070942 -0.0310749  -0.39428961], action=0, reward=1.0, next_state=[ 0.04445943  0.21604188 -0.03896069 -0.11156383]
[ Experience replay ] starts
[ episode 353 ][ timestamp 22 ] state=[ 0.04445943  0.21604188 -0.03896069 -0.11156383], action=1, reward=1.0, next_state=[ 0.04878027  0.41169982 -0.04119197 -0.41627957]
[ Experience replay ] st

[ episode 354 ][ timestamp 8 ] state=[ 0.02443463 -0.17836339 -0.04014939  0.20346235], action=0, reward=1.0, next_state=[ 0.02086737 -0.37288884 -0.03608014  0.48321464]
[ Experience replay ] starts
[ episode 354 ][ timestamp 9 ] state=[ 0.02086737 -0.37288884 -0.03608014  0.48321464], action=0, reward=1.0, next_state=[ 0.01340959 -0.56748349 -0.02641585  0.76431139]
[ Experience replay ] starts
[ episode 354 ][ timestamp 10 ] state=[ 0.01340959 -0.56748349 -0.02641585  0.76431139], action=1, reward=1.0, next_state=[ 0.00205992 -0.37200791 -0.01112962  0.46343495]
[ Experience replay ] starts
[ episode 354 ][ timestamp 11 ] state=[ 0.00205992 -0.37200791 -0.01112962  0.46343495], action=1, reward=1.0, next_state=[-0.00538024 -0.17673046 -0.00186092  0.16726487]
[ Experience replay ] starts
[ episode 354 ][ timestamp 12 ] state=[-0.00538024 -0.17673046 -0.00186092  0.16726487], action=1, reward=1.0, next_state=[-0.00891485  0.01841808  0.00148438 -0.12600454]
[ Experience replay ] star

[ episode 354 ][ timestamp 52 ] state=[-0.05206227 -0.17232793 -0.03154466  0.07019431], action=1, reward=1.0, next_state=[-0.05550883  0.02323173 -0.03014077 -0.23227184]
[ Experience replay ] starts
[ episode 354 ][ timestamp 53 ] state=[-0.05550883  0.02323173 -0.03014077 -0.23227184], action=0, reward=1.0, next_state=[-0.05504419 -0.17144687 -0.03478621  0.05075338]
[ Experience replay ] starts
[ episode 354 ][ timestamp 54 ] state=[-0.05504419 -0.17144687 -0.03478621  0.05075338], action=1, reward=1.0, next_state=[-0.05847313  0.02415616 -0.03377114 -0.25269875]
[ Experience replay ] starts
[ episode 354 ][ timestamp 55 ] state=[-0.05847313  0.02415616 -0.03377114 -0.25269875], action=1, reward=1.0, next_state=[-0.05799001  0.21974365 -0.03882511 -0.55583953]
[ Experience replay ] starts
[ episode 354 ][ timestamp 56 ] state=[-0.05799001  0.21974365 -0.03882511 -0.55583953], action=1, reward=1.0, next_state=[-0.05359513  0.41538859 -0.0499419  -0.86049749]
[ Experience replay ] st

[ episode 354 ][ timestamp 99 ] state=[-0.41939813 -0.34195531 -0.00854697 -0.1993462 ], action=0, reward=1.0, next_state=[-0.42623724 -0.53695398 -0.0125339   0.09062834]
[ Experience replay ] starts
[ episode 354 ][ timestamp 100 ] state=[-0.42623724 -0.53695398 -0.0125339   0.09062834], action=1, reward=1.0, next_state=[-0.43697632 -0.34165464 -0.01072133 -0.20598253]
[ Experience replay ] starts
[ episode 354 ][ timestamp 101 ] state=[-0.43697632 -0.34165464 -0.01072133 -0.20598253], action=0, reward=1.0, next_state=[-0.44380941 -0.53662165 -0.01484098  0.08329917]
[ Experience replay ] starts
[ episode 354 ][ timestamp 102 ] state=[-0.44380941 -0.53662165 -0.01484098  0.08329917], action=1, reward=1.0, next_state=[-0.45454185 -0.34129013 -0.013175   -0.21402892]
[ Experience replay ] starts
[ episode 354 ][ timestamp 103 ] state=[-0.45454185 -0.34129013 -0.013175   -0.21402892], action=0, reward=1.0, next_state=[-0.46136765 -0.53622127 -0.01745557  0.07446907]
[ Experience replay 

[ episode 354 ][ timestamp 142 ] state=[-0.94657914 -0.89215304 -0.09256822 -0.09743783], action=0, reward=1.0, next_state=[-0.9644222  -1.08583477 -0.09451697  0.16466472]
[ Experience replay ] starts
[ episode 354 ][ timestamp 143 ] state=[-0.9644222  -1.08583477 -0.09451697  0.16466472], action=1, reward=1.0, next_state=[-0.9861389  -0.88949582 -0.09122368 -0.15627583]
[ Experience replay ] starts
[ episode 354 ][ timestamp 144 ] state=[-0.9861389  -0.88949582 -0.09122368 -0.15627583], action=0, reward=1.0, next_state=[-1.00392881 -1.08320128 -0.09434919  0.10629165]
[ Experience replay ] starts
[ episode 354 ][ timestamp 145 ] state=[-1.00392881 -1.08320128 -0.09434919  0.10629165], action=1, reward=1.0, next_state=[-1.02559284 -0.88686279 -0.09222336 -0.21460378]
[ Experience replay ] starts
[ episode 354 ][ timestamp 146 ] state=[-1.02559284 -0.88686279 -0.09222336 -0.21460378], action=0, reward=1.0, next_state=[-1.0433301  -1.08055355 -0.09651544  0.04762246]
[ Experience replay

[ episode 354 ][ timestamp 184 ] state=[-2.11827918 -2.7737612  -0.08490546  1.30085495], action=0, reward=1.0, next_state=[-2.1737544  -2.96770927 -0.05888836  1.56579685]
[ Experience replay ] starts
[ episode 354 ][ timestamp 185 ] state=[-2.1737544  -2.96770927 -0.05888836  1.56579685], action=0, reward=1.0, next_state=[-2.23310859 -3.16208014 -0.02757242  1.83954459]
[ Experience replay ] starts
[ episode 354 ][ timestamp 186 ] state=[-2.23310859 -3.16208014 -0.02757242  1.83954459], action=0, reward=1.0, next_state=[-2.29635019 -3.35688705  0.00921847  2.12353862]
[ Experience replay ] starts
[ episode 354 ][ timestamp 187 ] state=[-2.29635019 -3.35688705  0.00921847  2.12353862], action=0, reward=1.0, next_state=[-2.36348793 -3.55209943  0.05168924  2.41905494]
[ Experience replay ] starts
[ episode 354 ][ timestamp 188 ] state=[-2.36348793 -3.55209943  0.05168924  2.41905494], action=0, reward=-1.0, next_state=[-2.43452992 -3.7476282   0.10007034  2.72714625]
[ Experience repla

[ episode 355 ][ timestamp 40 ] state=[ 0.04474253 -0.21312927  0.02751401  0.23322159], action=1, reward=1.0, next_state=[ 0.04047994 -0.01841103  0.03217844 -0.05065712]
[ Experience replay ] starts
[ episode 355 ][ timestamp 41 ] state=[ 0.04047994 -0.01841103  0.03217844 -0.05065712], action=1, reward=1.0, next_state=[ 0.04011172  0.17623509  0.0311653  -0.33301632]
[ Experience replay ] starts
[ episode 355 ][ timestamp 42 ] state=[ 0.04011172  0.17623509  0.0311653  -0.33301632], action=0, reward=1.0, next_state=[ 0.04363642 -0.01931626  0.02450497 -0.03067062]
[ Experience replay ] starts
[ episode 355 ][ timestamp 43 ] state=[ 0.04363642 -0.01931626  0.02450497 -0.03067062], action=0, reward=1.0, next_state=[ 0.0432501  -0.2147809   0.02389156  0.26964205]
[ Experience replay ] starts
[ episode 355 ][ timestamp 44 ] state=[ 0.0432501  -0.2147809   0.02389156  0.26964205], action=1, reward=1.0, next_state=[ 0.03895448 -0.0200079   0.0292844  -0.01541062]
[ Experience replay ] st

[ episode 355 ][ timestamp 82 ] state=[ 0.18260294  1.30470223  0.0525323  -1.15913161], action=1, reward=1.0, next_state=[ 0.20869699  1.49910179  0.02934966 -1.43489129]
[ Experience replay ] starts
[ episode 355 ][ timestamp 83 ] state=[ 0.20869699  1.49910179  0.02934966 -1.43489129], action=1, reward=1.0, next_state=[ 2.38679022e-01  1.69384974e+00  6.51838387e-04 -1.71825984e+00]
[ Experience replay ] starts
[ episode 355 ][ timestamp 84 ] state=[ 2.38679022e-01  1.69384974e+00  6.51838387e-04 -1.71825984e+00], action=1, reward=1.0, next_state=[ 0.27255602  1.88896421 -0.03371336 -2.01073985]
[ Experience replay ] starts
[ episode 355 ][ timestamp 85 ] state=[ 0.27255602  1.88896421 -0.03371336 -2.01073985], action=0, reward=1.0, next_state=[ 0.3103353   1.69420863 -0.07392816 -1.72868233]
[ Experience replay ] starts
[ episode 355 ][ timestamp 86 ] state=[ 0.3103353   1.69420863 -0.07392816 -1.72868233], action=0, reward=1.0, next_state=[ 0.34421947  1.50000524 -0.1085018  -1.45

[ episode 356 ][ timestamp 32 ] state=[-0.04243094  0.54742083  0.03424304 -0.66326637], action=1, reward=1.0, next_state=[-0.03148253  0.74205007  0.02097771 -0.9449736 ]
[ Experience replay ] starts
[ episode 356 ][ timestamp 33 ] state=[-0.03148253  0.74205007  0.02097771 -0.9449736 ], action=1, reward=1.0, next_state=[-0.01664152  0.93688326  0.00207824 -1.23099209]
[ Experience replay ] starts
[ episode 356 ][ timestamp 34 ] state=[-0.01664152  0.93688326  0.00207824 -1.23099209], action=1, reward=1.0, next_state=[ 0.00209614  1.13197842 -0.0225416  -1.52302319]
[ Experience replay ] starts
[ episode 356 ][ timestamp 35 ] state=[ 0.00209614  1.13197842 -0.0225416  -1.52302319], action=0, reward=1.0, next_state=[ 0.02473571  0.93713587 -0.05300207 -1.23746029]
[ Experience replay ] starts
[ episode 356 ][ timestamp 36 ] state=[ 0.02473571  0.93713587 -0.05300207 -1.23746029], action=1, reward=1.0, next_state=[ 0.04347843  1.13289718 -0.07775127 -1.54626521]
[ Experience replay ] st

[ episode 357 ][ timestamp 32 ] state=[-0.00057467  0.15119831  0.08224781 -0.04455567], action=0, reward=1.0, next_state=[ 0.0024493  -0.04500083  0.0813567   0.27290178]
[ Experience replay ] starts
[ episode 357 ][ timestamp 33 ] state=[ 0.0024493  -0.04500083  0.0813567   0.27290178], action=0, reward=1.0, next_state=[ 0.00154928 -0.24118371  0.08681473  0.59009523]
[ Experience replay ] starts
[ episode 357 ][ timestamp 34 ] state=[ 0.00154928 -0.24118371  0.08681473  0.59009523], action=0, reward=1.0, next_state=[-0.00327439 -0.43740706  0.09861664  0.90881328]
[ Experience replay ] starts
[ episode 357 ][ timestamp 35 ] state=[-0.00327439 -0.43740706  0.09861664  0.90881328], action=0, reward=1.0, next_state=[-0.01202253 -0.63371559  0.1167929   1.23079169]
[ Experience replay ] starts
[ episode 357 ][ timestamp 36 ] state=[-0.01202253 -0.63371559  0.1167929   1.23079169], action=0, reward=1.0, next_state=[-0.02469684 -0.83012997  0.14140874  1.55766523]
[ Experience replay ] st

[ episode 362 ][ timestamp 3 ] state=[-0.04686245 -0.39254164  0.0045157   0.56571317], action=0, reward=1.0, next_state=[-0.05471328 -0.58772665  0.01582996  0.85981531]
[ Experience replay ] starts
[ episode 362 ][ timestamp 4 ] state=[-0.05471328 -0.58772665  0.01582996  0.85981531], action=0, reward=1.0, next_state=[-0.06646781 -0.78306059  0.03302627  1.15743332]
[ Experience replay ] starts
[ episode 362 ][ timestamp 5 ] state=[-0.06646781 -0.78306059  0.03302627  1.15743332], action=0, reward=1.0, next_state=[-0.08212903 -0.97859709  0.05617493  1.46028608]
[ Experience replay ] starts
[ episode 362 ][ timestamp 6 ] state=[-0.08212903 -0.97859709  0.05617493  1.46028608], action=0, reward=1.0, next_state=[-0.10170097 -1.17436102  0.08538065  1.76997552]
[ Experience replay ] starts
[ episode 362 ][ timestamp 7 ] state=[-0.10170097 -1.17436102  0.08538065  1.76997552], action=0, reward=1.0, next_state=[-0.12518819 -1.37033659  0.12078016  2.08793948]
[ Experience replay ] starts


[ episode 366 ][ timestamp 7 ] state=[-0.09656109 -1.12432045  0.08311407  1.7415151 ], action=0, reward=1.0, next_state=[-0.1190475  -1.32028443  0.11794438  2.05885378]
[ Experience replay ] starts
[ episode 366 ][ timestamp 8 ] state=[-0.1190475  -1.32028443  0.11794438  2.05885378], action=0, reward=1.0, next_state=[-0.14545319 -1.51639691  0.15912145  2.38557411]
[ Experience replay ] starts
[ episode 366 ][ timestamp 9 ] state=[-0.14545319 -1.51639691  0.15912145  2.38557411], action=0, reward=1.0, next_state=[-0.17578113 -1.71252253  0.20683293  2.72263056]
[ Experience replay ] starts
[ episode 366 ][ timestamp 10 ] state=[-0.17578113 -1.71252253  0.20683293  2.72263056], action=0, reward=-1.0, next_state=[-0.21003158 -1.90843717  0.26128554  3.07061524]
[ Experience replay ] starts
[ Ended! ] Episode 366: Exploration_rate=0.01. Score=10.
[ episode 367 ] state=[0.01991573 0.02081575 0.01363675 0.00308637]
[ episode 367 ][ timestamp 1 ] state=[0.01991573 0.02081575 0.01363675 0.

[ episode 371 ][ timestamp 3 ] state=[ 0.04395054 -0.34937176  0.03233534  0.60637112], action=0, reward=1.0, next_state=[ 0.0369631  -0.54493057  0.04446277  0.90906093]
[ Experience replay ] starts
[ episode 371 ][ timestamp 4 ] state=[ 0.0369631  -0.54493057  0.04446277  0.90906093], action=0, reward=1.0, next_state=[ 0.02606449 -0.74062522  0.06264398  1.21538054]
[ Experience replay ] starts
[ episode 371 ][ timestamp 5 ] state=[ 0.02606449 -0.74062522  0.06264398  1.21538054], action=0, reward=1.0, next_state=[ 0.01125199 -0.93649684  0.0869516   1.52701696]
[ Experience replay ] starts
[ episode 371 ][ timestamp 6 ] state=[ 0.01125199 -0.93649684  0.0869516   1.52701696], action=0, reward=1.0, next_state=[-0.00747795 -1.13255378  0.11749193  1.84552291]
[ Experience replay ] starts
[ episode 371 ][ timestamp 7 ] state=[-0.00747795 -1.13255378  0.11749193  1.84552291], action=0, reward=1.0, next_state=[-0.03012902 -1.32875849  0.15440239  2.17226416]
[ Experience replay ] starts


[ episode 375 ][ timestamp 9 ] state=[-0.06769733 -1.56645583  0.1722464   2.45977573], action=0, reward=-1.0, next_state=[-0.09902645 -1.7625662   0.22144191  2.79997869]
[ Experience replay ] starts
[ Ended! ] Episode 375: Exploration_rate=0.01. Score=9.
[ episode 376 ] state=[ 0.04914428 -0.01022997  0.00467768 -0.01603384]
[ episode 376 ][ timestamp 1 ] state=[ 0.04914428 -0.01022997  0.00467768 -0.01603384], action=0, reward=1.0, next_state=[ 0.04893969 -0.20541869  0.004357    0.27812127]
[ Experience replay ] starts
[ episode 376 ][ timestamp 2 ] state=[ 0.04893969 -0.20541869  0.004357    0.27812127], action=0, reward=1.0, next_state=[ 0.04483131 -0.40060252  0.00991942  0.5721752 ]
[ Experience replay ] starts
[ episode 376 ][ timestamp 3 ] state=[ 0.04483131 -0.40060252  0.00991942  0.5721752 ], action=0, reward=1.0, next_state=[ 0.03681926 -0.59586215  0.02136293  0.86796649]
[ Experience replay ] starts
[ episode 376 ][ timestamp 4 ] state=[ 0.03681926 -0.59586215  0.021362

[ episode 380 ][ timestamp 4 ] state=[ 0.00340927 -0.56737346 -0.00873866  0.84575681], action=0, reward=1.0, next_state=[-0.0079382  -0.7623751   0.00817648  1.13567897]
[ Experience replay ] starts
[ episode 380 ][ timestamp 5 ] state=[-0.0079382  -0.7623751   0.00817648  1.13567897], action=0, reward=1.0, next_state=[-0.0231857  -0.95760307  0.03089006  1.43091498]
[ Experience replay ] starts
[ episode 380 ][ timestamp 6 ] state=[-0.0231857  -0.95760307  0.03089006  1.43091498], action=0, reward=1.0, next_state=[-0.04233777 -1.1530924   0.05950836  1.73308933]
[ Experience replay ] starts
[ episode 380 ][ timestamp 7 ] state=[-0.04233777 -1.1530924   0.05950836  1.73308933], action=0, reward=1.0, next_state=[-0.06539961 -1.34884084  0.09417014  2.04367738]
[ Experience replay ] starts
[ episode 380 ][ timestamp 8 ] state=[-0.06539961 -1.34884084  0.09417014  2.04367738], action=0, reward=1.0, next_state=[-0.09237643 -1.54479543  0.13504369  2.36395205]
[ Experience replay ] starts


[ episode 384 ][ timestamp 6 ] state=[ 0.0020777  -1.00790052  0.01802952  1.41585725], action=0, reward=1.0, next_state=[-0.01808032 -1.20324108  0.04634666  1.71412086]
[ Experience replay ] starts
[ episode 384 ][ timestamp 7 ] state=[-0.01808032 -1.20324108  0.04634666  1.71412086], action=0, reward=1.0, next_state=[-0.04214514 -1.39886323  0.08062908  2.02086003]
[ Experience replay ] starts
[ episode 384 ][ timestamp 8 ] state=[-0.04214514 -1.39886323  0.08062908  2.02086003], action=0, reward=1.0, next_state=[-0.0701224  -1.59472265  0.12104628  2.33737398]
[ Experience replay ] starts
[ episode 384 ][ timestamp 9 ] state=[-0.0701224  -1.59472265  0.12104628  2.33737398], action=0, reward=1.0, next_state=[-0.10201685 -1.79071097  0.16779376  2.6647061 ]
[ Experience replay ] starts
[ episode 384 ][ timestamp 10 ] state=[-0.10201685 -1.79071097  0.16779376  2.6647061 ], action=0, reward=-1.0, next_state=[-0.13783107 -1.98663762  0.22108788  3.00356878]
[ Experience replay ] start

[ episode 388 ][ timestamp 9 ] state=[-0.12718159 -1.54230072  0.19725173  2.54112444], action=0, reward=-1.0, next_state=[-0.15802761 -1.73839307  0.24807422  2.88717597]
[ Experience replay ] starts
[ Ended! ] Episode 388: Exploration_rate=0.01. Score=9.
[ episode 389 ] state=[0.00574797 0.0184636  0.03173773 0.02296388]
[ episode 389 ][ timestamp 1 ] state=[0.00574797 0.0184636  0.03173773 0.02296388], action=0, reward=1.0, next_state=[ 0.00611724 -0.17709878  0.03219701  0.32548906]
[ Experience replay ] starts
[ episode 389 ][ timestamp 2 ] state=[ 0.00611724 -0.17709878  0.03219701  0.32548906], action=0, reward=1.0, next_state=[ 0.00257527 -0.37266401  0.03870679  0.62814915]
[ Experience replay ] starts
[ episode 389 ][ timestamp 3 ] state=[ 0.00257527 -0.37266401  0.03870679  0.62814915], action=0, reward=1.0, next_state=[-0.00487801 -0.56830419  0.05126977  0.93276656]
[ Experience replay ] starts
[ episode 389 ][ timestamp 4 ] state=[-0.00487801 -0.56830419  0.05126977  0.93

[ episode 393 ][ timestamp 5 ] state=[-0.04528904 -0.75577384  0.00585809  1.16224748], action=0, reward=1.0, next_state=[-0.06040452 -0.95097159  0.02910304  1.45676135]
[ Experience replay ] starts
[ episode 393 ][ timestamp 6 ] state=[-0.06040452 -0.95097159  0.02910304  1.45676135], action=0, reward=1.0, next_state=[-0.07942395 -1.14643833  0.05823827  1.75839239]
[ Experience replay ] starts
[ episode 393 ][ timestamp 7 ] state=[-0.07942395 -1.14643833  0.05823827  1.75839239], action=0, reward=1.0, next_state=[-0.10235272 -1.34216952  0.09340612  2.06860379]
[ Experience replay ] starts
[ episode 393 ][ timestamp 8 ] state=[-0.10235272 -1.34216952  0.09340612  2.06860379], action=0, reward=1.0, next_state=[-0.12919611 -1.53810917  0.13477819  2.38865354]
[ Experience replay ] starts
[ episode 393 ][ timestamp 9 ] state=[-0.12919611 -1.53810917  0.13477819  2.38865354], action=0, reward=1.0, next_state=[-0.15995829 -1.73413377  0.18255126  2.71952879]
[ Experience replay ] starts


[ Ended! ] Episode 397: Exploration_rate=0.01. Score=9.
[ episode 398 ] state=[ 4.32657751e-02 -2.64634405e-03 -2.07686765e-05  1.89438636e-02]
[ episode 398 ][ timestamp 1 ] state=[ 4.32657751e-02 -2.64634405e-03 -2.07686765e-05  1.89438636e-02], action=0, reward=1.0, next_state=[ 0.04321285 -0.197768    0.00035811  0.31162024]
[ Experience replay ] starts
[ episode 398 ][ timestamp 2 ] state=[ 0.04321285 -0.197768    0.00035811  0.31162024], action=0, reward=1.0, next_state=[ 0.03925749 -0.39289505  0.00659051  0.60441608]
[ Experience replay ] starts
[ episode 398 ][ timestamp 3 ] state=[ 0.03925749 -0.39289505  0.00659051  0.60441608], action=0, reward=1.0, next_state=[ 0.03139959 -0.58810855  0.01867883  0.89916756]
[ Experience replay ] starts
[ episode 398 ][ timestamp 4 ] state=[ 0.03139959 -0.58810855  0.01867883  0.89916756], action=0, reward=1.0, next_state=[ 0.01963742 -0.7834786   0.03666219  1.19766278]
[ Experience replay ] starts
[ episode 398 ][ timestamp 5 ] state=[ 0

[ episode 402 ][ timestamp 7 ] state=[-0.08835346 -1.15088477  0.08404741  1.79791479], action=0, reward=1.0, next_state=[-0.11137116 -1.34684063  0.1200057   2.11549187]
[ Experience replay ] starts
[ episode 402 ][ timestamp 8 ] state=[-0.11137116 -1.34684063  0.1200057   2.11549187], action=0, reward=1.0, next_state=[-0.13830797 -1.5429388   0.16231554  2.44272067]
[ Experience replay ] starts
[ episode 402 ][ timestamp 9 ] state=[-0.13830797 -1.5429388   0.16231554  2.44272067], action=0, reward=-1.0, next_state=[-0.16916675 -1.73903246  0.21116995  2.78050638]
[ Experience replay ] starts
[ Ended! ] Episode 402: Exploration_rate=0.01. Score=9.
[ episode 403 ] state=[-0.00335713  0.02232431  0.03636318  0.04782606]
[ episode 403 ][ timestamp 1 ] state=[-0.00335713  0.02232431  0.03636318  0.04782606], action=0, reward=1.0, next_state=[-0.00291065 -0.17329968  0.0373197   0.35175648]
[ Experience replay ] starts
[ episode 403 ][ timestamp 2 ] state=[-0.00291065 -0.17329968  0.037319

[ episode 407 ][ timestamp 7 ] state=[-0.01519323 -1.14141218  0.10336823  1.77583135], action=0, reward=1.0, next_state=[-0.03802147 -1.3375358   0.13888485  2.09878265]
[ Experience replay ] starts
[ episode 407 ][ timestamp 8 ] state=[-0.03802147 -1.3375358   0.13888485  2.09878265], action=0, reward=1.0, next_state=[-0.06477219 -1.53375395  0.18086051  2.4309768 ]
[ Experience replay ] starts
[ episode 407 ][ timestamp 9 ] state=[-0.06477219 -1.53375395  0.18086051  2.4309768 ], action=0, reward=-1.0, next_state=[-0.09544727 -1.72991203  0.22948004  2.77329829]
[ Experience replay ] starts
[ Ended! ] Episode 407: Exploration_rate=0.01. Score=9.
[ episode 408 ] state=[ 0.04543535 -0.00285593  0.01156972 -0.00165772]
[ episode 408 ][ timestamp 1 ] state=[ 0.04543535 -0.00285593  0.01156972 -0.00165772], action=0, reward=1.0, next_state=[ 0.04537823 -0.19814188  0.01153656  0.29465302]
[ Experience replay ] starts
[ episode 408 ][ timestamp 2 ] state=[ 0.04537823 -0.19814188  0.011536

[ Ended! ] Episode 411: Exploration_rate=0.01. Score=10.
[ episode 412 ] state=[-0.01539732 -0.04664588 -0.03585672 -0.01576753]
[ episode 412 ][ timestamp 1 ] state=[-0.01539732 -0.04664588 -0.03585672 -0.01576753], action=0, reward=1.0, next_state=[-0.01633024 -0.24123575 -0.03617207  0.26539003]
[ Experience replay ] starts
[ episode 412 ][ timestamp 2 ] state=[-0.01633024 -0.24123575 -0.03617207  0.26539003], action=0, reward=1.0, next_state=[-0.02115496 -0.43582325 -0.03086427  0.54644809]
[ Experience replay ] starts
[ episode 412 ][ timestamp 3 ] state=[-0.02115496 -0.43582325 -0.03086427  0.54644809], action=0, reward=1.0, next_state=[-0.02987142 -0.63049827 -0.01993531  0.82924889]
[ Experience replay ] starts
[ episode 412 ][ timestamp 4 ] state=[-0.02987142 -0.63049827 -0.01993531  0.82924889], action=0, reward=1.0, next_state=[-0.04248139 -0.8253421  -0.00335033  1.11559598]
[ Experience replay ] starts
[ episode 412 ][ timestamp 5 ] state=[-0.04248139 -0.8253421  -0.003350

[ episode 416 ][ timestamp 8 ] state=[-0.10129031 -1.32048287  0.11730992  2.09817663], action=0, reward=1.0, next_state=[-0.12769996 -1.51657259  0.15927345  2.42469971]
[ Experience replay ] starts
[ episode 416 ][ timestamp 9 ] state=[-0.12769996 -1.51657259  0.15927345  2.42469971], action=0, reward=1.0, next_state=[-0.15803142 -1.71266969  0.20776745  2.76175097]
[ Experience replay ] starts
[ episode 416 ][ timestamp 10 ] state=[-0.15803142 -1.71266969  0.20776745  2.76175097], action=0, reward=-1.0, next_state=[-0.19228481 -1.90854155  0.26300247  3.10988519]
[ Experience replay ] starts
[ Ended! ] Episode 416: Exploration_rate=0.01. Score=10.
[ episode 417 ] state=[0.03667538 0.0320063  0.0452019  0.03397243]
[ episode 417 ][ timestamp 1 ] state=[0.03667538 0.0320063  0.0452019  0.03397243], action=0, reward=1.0, next_state=[ 0.0373155  -0.16373373  0.04588135  0.34056742]
[ Experience replay ] starts
[ episode 417 ][ timestamp 2 ] state=[ 0.0373155  -0.16373373  0.04588135  0.

[ episode 421 ][ timestamp 3 ] state=[-0.02591537 -0.36359983 -0.00924135  0.53057965], action=0, reward=1.0, next_state=[-0.03318737 -0.55859057  0.00137025  0.82033636]
[ Experience replay ] starts
[ episode 421 ][ timestamp 4 ] state=[-0.03318737 -0.55859057  0.00137025  0.82033636], action=0, reward=1.0, next_state=[-0.04435918 -0.75373125  0.01777697  1.11344995]
[ Experience replay ] starts
[ episode 421 ][ timestamp 5 ] state=[-0.04435918 -0.75373125  0.01777697  1.11344995], action=0, reward=1.0, next_state=[-0.0594338  -0.94908208  0.04004597  1.41165605]
[ Experience replay ] starts
[ episode 421 ][ timestamp 6 ] state=[-0.0594338  -0.94908208  0.04004597  1.41165605], action=0, reward=1.0, next_state=[-0.07841545 -1.14467696  0.06827909  1.71658352]
[ Experience replay ] starts
[ episode 421 ][ timestamp 7 ] state=[-0.07841545 -1.14467696  0.06827909  1.71658352], action=0, reward=1.0, next_state=[-0.10130899 -1.34051226  0.10261076  2.02971044]
[ Experience replay ] starts


[ episode 425 ][ timestamp 6 ] state=[-1.38875546e-03 -9.65770743e-01  9.18968708e-02  1.52666868e+00], action=0, reward=1.0, next_state=[-0.02070417 -1.16187372  0.12243024  1.84656161]
[ Experience replay ] starts
[ episode 425 ][ timestamp 7 ] state=[-0.02070417 -1.16187372  0.12243024  1.84656161], action=0, reward=1.0, next_state=[-0.04394164 -1.35811359  0.15936148  2.1746227 ]
[ Experience replay ] starts
[ episode 425 ][ timestamp 8 ] state=[-0.04394164 -1.35811359  0.15936148  2.1746227 ], action=0, reward=1.0, next_state=[-0.07110392 -1.55438873  0.20285393  2.51195908]
[ Experience replay ] starts
[ episode 425 ][ timestamp 9 ] state=[-0.07110392 -1.55438873  0.20285393  2.51195908], action=0, reward=-1.0, next_state=[-0.10219169 -1.75051804  0.25309311  2.85935165]
[ Experience replay ] starts
[ Ended! ] Episode 425: Exploration_rate=0.01. Score=9.
[ episode 426 ] state=[ 0.03805554 -0.04290938 -0.04362569  0.0465677 ]
[ episode 426 ][ timestamp 1 ] state=[ 0.03805554 -0.04

[ episode 430 ][ timestamp 4 ] state=[ 0.02086035 -0.57565363  0.01922923  0.84286201], action=0, reward=1.0, next_state=[ 0.00934728 -0.77103267  0.03608647  1.14152945]
[ Experience replay ] starts
[ episode 430 ][ timestamp 5 ] state=[ 0.00934728 -0.77103267  0.03608647  1.14152945], action=0, reward=1.0, next_state=[-0.00607337 -0.96660721  0.05891706  1.44530738]
[ Experience replay ] starts
[ episode 430 ][ timestamp 6 ] state=[-0.00607337 -0.96660721  0.05891706  1.44530738], action=0, reward=1.0, next_state=[-0.02540552 -1.16240249  0.08782321  1.75580231]
[ Experience replay ] starts
[ episode 430 ][ timestamp 7 ] state=[-0.02540552 -1.16240249  0.08782321  1.75580231], action=0, reward=1.0, next_state=[-0.04865357 -1.35840336  0.12293926  2.07445738]
[ Experience replay ] starts
[ episode 430 ][ timestamp 8 ] state=[-0.04865357 -1.35840336  0.12293926  2.07445738], action=0, reward=1.0, next_state=[-0.07582163 -1.55454006  0.1644284   2.40249507]
[ Experience replay ] starts


[ episode 432 ][ timestamp 16 ] state=[-0.00533729 -0.1715914  -0.02403379  0.33345167], action=0, reward=1.0, next_state=[-0.00876912 -0.36636318 -0.01736476  0.61845971]
[ Experience replay ] starts
[ episode 432 ][ timestamp 17 ] state=[-0.00876912 -0.36636318 -0.01736476  0.61845971], action=0, reward=1.0, next_state=[-0.01609638 -0.56123832 -0.00499556  0.90562338]
[ Experience replay ] starts
[ episode 432 ][ timestamp 18 ] state=[-0.01609638 -0.56123832 -0.00499556  0.90562338], action=0, reward=1.0, next_state=[-0.02732115 -0.75629227  0.0131169   1.19673196]
[ Experience replay ] starts
[ episode 432 ][ timestamp 19 ] state=[-0.02732115 -0.75629227  0.0131169   1.19673196], action=0, reward=1.0, next_state=[-0.04244699 -0.95158153  0.03705154  1.49349691]
[ Experience replay ] starts
[ episode 432 ][ timestamp 20 ] state=[-0.04244699 -0.95158153  0.03705154  1.49349691], action=0, reward=1.0, next_state=[-0.06147863 -1.14713412  0.06692148  1.79751514]
[ Experience replay ] st

[ episode 435 ][ timestamp 9 ] state=[-0.03085316  0.43516603  0.03228356 -0.52826658], action=1, reward=1.0, next_state=[-0.02214984  0.62981926  0.02171823 -0.81060456]
[ Experience replay ] starts
[ episode 435 ][ timestamp 10 ] state=[-0.02214984  0.62981926  0.02171823 -0.81060456], action=1, reward=1.0, next_state=[-0.00955346  0.82463703  0.00550614 -1.09637765]
[ Experience replay ] starts
[ episode 435 ][ timestamp 11 ] state=[-0.00955346  0.82463703  0.00550614 -1.09637765], action=1, reward=1.0, next_state=[ 0.00693928  1.01968604 -0.01642142 -1.38732794]
[ Experience replay ] starts
[ episode 435 ][ timestamp 12 ] state=[ 0.00693928  1.01968604 -0.01642142 -1.38732794], action=1, reward=1.0, next_state=[ 0.027333    1.21500877 -0.04416798 -1.68510021]
[ Experience replay ] starts
[ episode 435 ][ timestamp 13 ] state=[ 0.027333    1.21500877 -0.04416798 -1.68510021], action=1, reward=1.0, next_state=[ 0.05163318  1.41061311 -0.07786998 -1.99120174]
[ Experience replay ] sta

[ episode 439 ][ timestamp 4 ] state=[ 0.00127954  0.57910412  0.01918474 -0.8065233 ], action=1, reward=1.0, next_state=[ 0.01286162  0.77395793  0.00305427 -1.09311026]
[ Experience replay ] starts
[ episode 439 ][ timestamp 5 ] state=[ 0.01286162  0.77395793  0.00305427 -1.09311026], action=1, reward=1.0, next_state=[ 0.02834078  0.9690395  -0.01880793 -1.3848333 ]
[ Experience replay ] starts
[ episode 439 ][ timestamp 6 ] state=[ 0.02834078  0.9690395  -0.01880793 -1.3848333 ], action=0, reward=1.0, next_state=[ 0.04772157  0.77415708 -0.0465046  -1.09809057]
[ Experience replay ] starts
[ episode 439 ][ timestamp 7 ] state=[ 0.04772157  0.77415708 -0.0465046  -1.09809057], action=0, reward=1.0, next_state=[ 0.06320471  0.57967718 -0.06846641 -0.82035353]
[ Experience replay ] starts
[ episode 439 ][ timestamp 8 ] state=[ 0.06320471  0.57967718 -0.06846641 -0.82035353], action=0, reward=1.0, next_state=[ 0.07479825  0.38555563 -0.08487348 -0.54996683]
[ Experience replay ] starts


[ episode 442 ][ timestamp 14 ] state=[-0.02890041  0.1830789   0.02826263 -0.33265658], action=1, reward=1.0, next_state=[-0.02523883  0.37778741  0.0216095  -0.6162946 ]
[ Experience replay ] starts
[ episode 442 ][ timestamp 15 ] state=[-0.02523883  0.37778741  0.0216095  -0.6162946 ], action=1, reward=1.0, next_state=[-0.01768308  0.5726009   0.00928361 -0.9020939 ]
[ Experience replay ] starts
[ episode 442 ][ timestamp 16 ] state=[-0.01768308  0.5726009   0.00928361 -0.9020939 ], action=0, reward=1.0, next_state=[-0.00623106  0.37735442 -0.00875827 -0.60650746]
[ Experience replay ] starts
[ episode 442 ][ timestamp 17 ] state=[-0.00623106  0.37735442 -0.00875827 -0.60650746], action=0, reward=1.0, next_state=[ 0.00131603  0.18235602 -0.02088842 -0.31659597]
[ Experience replay ] starts
[ episode 442 ][ timestamp 18 ] state=[ 0.00131603  0.18235602 -0.02088842 -0.31659597], action=0, reward=1.0, next_state=[ 0.00496315 -0.01246228 -0.02722034 -0.03057303]
[ Experience replay ] st

[ episode 445 ][ timestamp 8 ] state=[ 0.02887053  0.9782108  -0.04809191 -1.37991507], action=0, reward=1.0, next_state=[ 0.04843474  0.78372107 -0.07569021 -1.10265136]
[ Experience replay ] starts
[ episode 445 ][ timestamp 9 ] state=[ 0.04843474  0.78372107 -0.07569021 -1.10265136], action=1, reward=1.0, next_state=[ 0.06410916  0.97975268 -0.09774324 -1.41808854]
[ Experience replay ] starts
[ episode 445 ][ timestamp 10 ] state=[ 0.06410916  0.97975268 -0.09774324 -1.41808854], action=0, reward=1.0, next_state=[ 0.08370422  0.78596717 -0.12610501 -1.15748849]
[ Experience replay ] starts
[ episode 445 ][ timestamp 11 ] state=[ 0.08370422  0.78596717 -0.12610501 -1.15748849], action=0, reward=1.0, next_state=[ 0.09942356  0.59269394 -0.14925478 -0.90685743]
[ Experience replay ] starts
[ episode 445 ][ timestamp 12 ] state=[ 0.09942356  0.59269394 -0.14925478 -0.90685743], action=0, reward=1.0, next_state=[ 0.11127744  0.39987354 -0.16739193 -0.66456061]
[ Experience replay ] star

[ episode 445 ][ timestamp 49 ] state=[-0.18910602  0.82875385 -0.17754256 -2.11926613], action=1, reward=-1.0, next_state=[-0.17253095  1.02514675 -0.21992788 -2.46114846]
[ Experience replay ] starts
[ Ended! ] Episode 445: Exploration_rate=0.01. Score=49.
[ episode 446 ] state=[-0.04706975  0.02789885 -0.00396621 -0.04816886]
[ episode 446 ][ timestamp 1 ] state=[-0.04706975  0.02789885 -0.00396621 -0.04816886], action=1, reward=1.0, next_state=[-0.04651177  0.22307744 -0.00492959 -0.34210051]
[ Experience replay ] starts
[ episode 446 ][ timestamp 2 ] state=[-0.04651177  0.22307744 -0.00492959 -0.34210051], action=1, reward=1.0, next_state=[-0.04205022  0.41826918 -0.0117716  -0.63633386]
[ Experience replay ] starts
[ episode 446 ][ timestamp 3 ] state=[-0.04205022  0.41826918 -0.0117716  -0.63633386], action=1, reward=1.0, next_state=[-0.03368484  0.61355331 -0.02449828 -0.93270052]
[ Experience replay ] starts
[ episode 446 ][ timestamp 4 ] state=[-0.03368484  0.61355331 -0.0244

[ episode 448 ][ timestamp 7 ] state=[ 0.03069555  0.75083926 -0.03150768 -1.08963089], action=0, reward=1.0, next_state=[ 0.04571233  0.55614652 -0.0533003  -0.80699846]
[ Experience replay ] starts
[ episode 448 ][ timestamp 8 ] state=[ 0.04571233  0.55614652 -0.0533003  -0.80699846], action=0, reward=1.0, next_state=[ 0.05683526  0.36179404 -0.06944027 -0.53154661]
[ Experience replay ] starts
[ episode 448 ][ timestamp 9 ] state=[ 0.05683526  0.36179404 -0.06944027 -0.53154661], action=0, reward=1.0, next_state=[ 0.06407114  0.16771401 -0.0800712  -0.2615272 ]
[ Experience replay ] starts
[ episode 448 ][ timestamp 10 ] state=[ 0.06407114  0.16771401 -0.0800712  -0.2615272 ], action=0, reward=1.0, next_state=[ 0.06742542 -0.02617908 -0.08530174  0.0048648 ]
[ Experience replay ] starts
[ episode 448 ][ timestamp 11 ] state=[ 0.06742542 -0.02617908 -0.08530174  0.0048648 ], action=0, reward=1.0, next_state=[ 0.06690184 -0.21998067 -0.08520445  0.26946189]
[ Experience replay ] start

[ episode 449 ][ timestamp 3 ] state=[ 0.04126507  0.41913721 -0.02108991 -0.59684107], action=1, reward=1.0, next_state=[ 0.04964781  0.61454784 -0.03302673 -0.89609181]
[ Experience replay ] starts
[ episode 449 ][ timestamp 4 ] state=[ 0.04964781  0.61454784 -0.03302673 -0.89609181], action=1, reward=1.0, next_state=[ 0.06193877  0.81010163 -0.05094857 -1.19897063]
[ Experience replay ] starts
[ episode 449 ][ timestamp 5 ] state=[ 0.06193877  0.81010163 -0.05094857 -1.19897063], action=0, reward=1.0, next_state=[ 0.0781408   0.61567457 -0.07492798 -0.92268086]
[ Experience replay ] starts
[ episode 449 ][ timestamp 6 ] state=[ 0.0781408   0.61567457 -0.07492798 -0.92268086], action=0, reward=1.0, next_state=[ 0.09045429  0.42164056 -0.0933816  -0.6544547 ]
[ Experience replay ] starts
[ episode 449 ][ timestamp 7 ] state=[ 0.09045429  0.42164056 -0.0933816  -0.6544547 ], action=1, reward=1.0, next_state=[ 0.0988871   0.61793012 -0.1064707  -0.97502053]
[ Experience replay ] starts


[ episode 452 ][ timestamp 8 ] state=[ 0.07889919  1.34905594 -0.1702791  -2.19864803], action=1, reward=-1.0, next_state=[ 0.10588031  1.54536136 -0.21425206 -2.53866804]
[ Experience replay ] starts
[ Ended! ] Episode 452: Exploration_rate=0.01. Score=8.
[ episode 453 ] state=[-0.03449875  0.00239387  0.04390228  0.00128242]
[ episode 453 ][ timestamp 1 ] state=[-0.03449875  0.00239387  0.04390228  0.00128242], action=1, reward=1.0, next_state=[-0.03445087  0.1968596   0.04392792 -0.27723198]
[ Experience replay ] starts
[ episode 453 ][ timestamp 2 ] state=[-0.03445087  0.1968596   0.04392792 -0.27723198], action=1, reward=1.0, next_state=[-0.03051368  0.39132822  0.03838328 -0.55574286]
[ Experience replay ] starts
[ episode 453 ][ timestamp 3 ] state=[-0.03051368  0.39132822  0.03838328 -0.55574286], action=1, reward=1.0, next_state=[-0.02268711  0.58589084  0.02726843 -0.83608992]
[ Experience replay ] starts
[ episode 453 ][ timestamp 4 ] state=[-0.02268711  0.58589084  0.027268

[ episode 457 ][ timestamp 5 ] state=[-0.0262414   0.43414254 -0.03909853 -0.60012173], action=1, reward=1.0, next_state=[-0.01755855  0.62978905 -0.05110097 -0.90485924]
[ Experience replay ] starts
[ episode 457 ][ timestamp 6 ] state=[-0.01755855  0.62978905 -0.05110097 -0.90485924], action=1, reward=1.0, next_state=[-0.00496277  0.82556441 -0.06919815 -1.21315609]
[ Experience replay ] starts
[ episode 457 ][ timestamp 7 ] state=[-0.00496277  0.82556441 -0.06919815 -1.21315609], action=1, reward=1.0, next_state=[ 0.01154852  1.02150778 -0.09346127 -1.52669576]
[ Experience replay ] starts
[ episode 457 ][ timestamp 8 ] state=[ 0.01154852  1.02150778 -0.09346127 -1.52669576], action=1, reward=1.0, next_state=[ 0.03197867  1.21762511 -0.12399519 -1.84702551]
[ Experience replay ] starts
[ episode 457 ][ timestamp 9 ] state=[ 0.03197867  1.21762511 -0.12399519 -1.84702551], action=0, reward=1.0, next_state=[ 0.05633118  1.02406826 -0.1609357  -1.59528054]
[ Experience replay ] starts


[ episode 461 ][ timestamp 10 ] state=[ 0.12114716  1.75560535 -0.17633889 -2.70775421], action=1, reward=-1.0, next_state=[ 0.15625927  1.95150818 -0.23049397 -3.0486269 ]
[ Experience replay ] starts
[ Ended! ] Episode 461: Exploration_rate=0.01. Score=10.
[ episode 462 ] state=[-0.04173002  0.00806664  0.02601074  0.02639256]
[ episode 462 ][ timestamp 1 ] state=[-0.04173002  0.00806664  0.02601074  0.02639256], action=1, reward=1.0, next_state=[-0.04156869  0.2028061   0.02653859 -0.25797154]
[ Experience replay ] starts
[ episode 462 ][ timestamp 2 ] state=[-0.04156869  0.2028061   0.02653859 -0.25797154], action=1, reward=1.0, next_state=[-0.03751256  0.39753932  0.02137916 -0.54216708]
[ Experience replay ] starts
[ episode 462 ][ timestamp 3 ] state=[-0.03751256  0.39753932  0.02137916 -0.54216708], action=1, reward=1.0, next_state=[-0.02956178  0.59235437  0.01053582 -0.82803788]
[ Experience replay ] starts
[ episode 462 ][ timestamp 4 ] state=[-0.02956178  0.59235437  0.0105

[ episode 466 ][ timestamp 3 ] state=[-0.02903428  0.37636893  0.01564195 -0.54642947], action=1, reward=1.0, next_state=[-0.0215069   0.57126766  0.00471337 -0.83414325]
[ Experience replay ] starts
[ episode 466 ][ timestamp 4 ] state=[-0.0215069   0.57126766  0.00471337 -0.83414325], action=1, reward=1.0, next_state=[-0.01008155  0.7663249  -0.0119695  -1.12534013]
[ Experience replay ] starts
[ episode 466 ][ timestamp 5 ] state=[-0.01008155  0.7663249  -0.0119695  -1.12534013], action=1, reward=1.0, next_state=[ 0.00524495  0.96160166 -0.0344763  -1.42175324]
[ Experience replay ] starts
[ episode 466 ][ timestamp 6 ] state=[ 0.00524495  0.96160166 -0.0344763  -1.42175324], action=1, reward=1.0, next_state=[ 0.02447698  1.15713268 -0.06291137 -1.72500951]
[ Experience replay ] starts
[ episode 466 ][ timestamp 7 ] state=[ 0.02447698  1.15713268 -0.06291137 -1.72500951], action=1, reward=1.0, next_state=[ 0.04761964  1.35291536 -0.09741156 -2.0365863 ]
[ Experience replay ] starts


[ episode 470 ][ timestamp 6 ] state=[-0.01077762  0.94034698 -0.07859193 -1.53202707], action=1, reward=1.0, next_state=[ 0.00802932  1.13632325 -0.10923247 -1.84816632]
[ Experience replay ] starts
[ episode 470 ][ timestamp 7 ] state=[ 0.00802932  1.13632325 -0.10923247 -1.84816632], action=1, reward=1.0, next_state=[ 0.03075578  1.33246547 -0.14619579 -2.17267669]
[ Experience replay ] starts
[ episode 470 ][ timestamp 8 ] state=[ 0.03075578  1.33246547 -0.14619579 -2.17267669], action=1, reward=1.0, next_state=[ 0.05740509  1.52867877 -0.18964933 -2.50668557]
[ Experience replay ] starts
[ episode 470 ][ timestamp 9 ] state=[ 0.05740509  1.52867877 -0.18964933 -2.50668557], action=1, reward=-1.0, next_state=[ 0.08797867  1.72479023 -0.23978304 -2.85100174]
[ Experience replay ] starts
[ Ended! ] Episode 470: Exploration_rate=0.01. Score=9.
[ episode 471 ] state=[-0.03427269 -0.02845209 -0.0185573  -0.01794599]
[ episode 471 ][ timestamp 1 ] state=[-0.03427269 -0.02845209 -0.018557

[ episode 474 ][ timestamp 9 ] state=[ 0.05183759  0.42918011 -0.07918822 -0.57282626], action=1, reward=1.0, next_state=[ 0.0604212   0.62531787 -0.09064474 -0.88936793]
[ Experience replay ] starts
[ episode 474 ][ timestamp 10 ] state=[ 0.0604212   0.62531787 -0.09064474 -0.88936793], action=0, reward=1.0, next_state=[ 0.07292755  0.43153511 -0.1084321  -0.62650022]
[ Experience replay ] starts
[ episode 474 ][ timestamp 11 ] state=[ 0.07292755  0.43153511 -0.1084321  -0.62650022], action=1, reward=1.0, next_state=[ 0.08155826  0.62799019 -0.1209621  -0.95126876]
[ Experience replay ] starts
[ episode 474 ][ timestamp 12 ] state=[ 0.08155826  0.62799019 -0.1209621  -0.95126876], action=0, reward=1.0, next_state=[ 0.09411806  0.43468556 -0.13998748 -0.69890673]
[ Experience replay ] starts
[ episode 474 ][ timestamp 13 ] state=[ 0.09411806  0.43468556 -0.13998748 -0.69890673], action=0, reward=1.0, next_state=[ 0.10281177  0.24175345 -0.15396561 -0.45336157]
[ Experience replay ] sta

[ episode 475 ][ timestamp 39 ] state=[-0.13062777 -0.35689921 -0.00559018  0.10038521], action=0, reward=1.0, next_state=[-0.13776575 -0.55194061 -0.00358247  0.39129922]
[ Experience replay ] starts
[ episode 475 ][ timestamp 40 ] state=[-0.13776575 -0.55194061 -0.00358247  0.39129922], action=1, reward=1.0, next_state=[-0.14880456 -0.356768    0.00424351  0.09748894]
[ Experience replay ] starts
[ episode 475 ][ timestamp 41 ] state=[-0.14880456 -0.356768    0.00424351  0.09748894], action=0, reward=1.0, next_state=[-0.15593992 -0.55195051  0.00619329  0.39150766]
[ Experience replay ] starts
[ episode 475 ][ timestamp 42 ] state=[-0.15593992 -0.55195051  0.00619329  0.39150766], action=1, reward=1.0, next_state=[-0.16697893 -0.356917    0.01402344  0.10078382]
[ Experience replay ] starts
[ episode 475 ][ timestamp 43 ] state=[-0.16697893 -0.356917    0.01402344  0.10078382], action=0, reward=1.0, next_state=[-0.17411727 -0.55223709  0.01603912  0.39785791]
[ Experience replay ] st

[ episode 478 ][ timestamp 9 ] state=[-0.06958422 -1.1441176   0.10388448  1.76978113], action=0, reward=1.0, next_state=[-0.09246657 -1.34024752  0.1392801   2.0928771 ]
[ Experience replay ] starts
[ episode 478 ][ timestamp 10 ] state=[-0.09246657 -1.34024752  0.1392801   2.0928771 ], action=0, reward=1.0, next_state=[-0.11927152 -1.53647122  0.18113764  2.42517845]
[ Experience replay ] starts
[ episode 478 ][ timestamp 11 ] state=[-0.11927152 -1.53647122  0.18113764  2.42517845], action=0, reward=-1.0, next_state=[-0.15000094 -1.73263498  0.22964121  2.76757377]
[ Experience replay ] starts
[ Ended! ] Episode 478: Exploration_rate=0.01. Score=11.
[ episode 479 ] state=[-0.02874712  0.02182374  0.01536018 -0.04528169]
[ episode 479 ][ timestamp 1 ] state=[-0.02874712  0.02182374  0.01536018 -0.04528169], action=0, reward=1.0, next_state=[-0.02831064 -0.17351506  0.01445454  0.25220766]
[ Experience replay ] starts
[ episode 479 ][ timestamp 2 ] state=[-0.02831064 -0.17351506  0.014

[ episode 481 ][ timestamp 7 ] state=[-0.00487114 -0.012838    0.06547894  0.04291257], action=0, reward=1.0, next_state=[-0.0051279  -0.20883482  0.06633719  0.35551482]
[ Experience replay ] starts
[ episode 481 ][ timestamp 8 ] state=[-0.0051279  -0.20883482  0.06633719  0.35551482], action=1, reward=1.0, next_state=[-0.00930459 -0.01471569  0.07344749  0.08446541]
[ Experience replay ] starts
[ episode 481 ][ timestamp 9 ] state=[-0.00930459 -0.01471569  0.07344749  0.08446541], action=0, reward=1.0, next_state=[-0.00959891 -0.21080943  0.07513679  0.39938715]
[ Experience replay ] starts
[ episode 481 ][ timestamp 10 ] state=[-0.00959891 -0.21080943  0.07513679  0.39938715], action=1, reward=1.0, next_state=[-0.0138151  -0.0168293   0.08312454  0.13130735]
[ Experience replay ] starts
[ episode 481 ][ timestamp 11 ] state=[-0.0138151  -0.0168293   0.08312454  0.13130735], action=0, reward=1.0, next_state=[-0.01415168 -0.21303752  0.08575068  0.44901395]
[ Experience replay ] start

[ episode 481 ][ timestamp 49 ] state=[0.33338105 1.07286606 0.18522719 0.16487624], action=1, reward=1.0, next_state=[ 0.35483837  1.26492016  0.18852471 -0.06413119]
[ Experience replay ] starts
[ episode 481 ][ timestamp 50 ] state=[ 0.35483837  1.26492016  0.18852471 -0.06413119], action=0, reward=1.0, next_state=[0.38013677 1.06766578 0.18724209 0.28160641]
[ Experience replay ] starts
[ episode 481 ][ timestamp 51 ] state=[0.38013677 1.06766578 0.18724209 0.28160641], action=1, reward=1.0, next_state=[0.40149009 1.25969225 0.19287422 0.05332932]
[ Experience replay ] starts
[ episode 481 ][ timestamp 52 ] state=[0.40149009 1.25969225 0.19287422 0.05332932], action=1, reward=1.0, next_state=[ 0.42668394  1.4516008   0.1939408  -0.17284167]
[ Experience replay ] starts
[ episode 481 ][ timestamp 53 ] state=[ 0.42668394  1.4516008   0.1939408  -0.17284167], action=0, reward=1.0, next_state=[0.45571595 1.2543085  0.19048397 0.17421046]
[ Experience replay ] starts
[ episode 481 ][ ti

[ episode 482 ][ timestamp 8 ] state=[ 0.06135985 -0.16386236 -0.04551775  0.19875277], action=1, reward=1.0, next_state=[ 0.0580826   0.03188007 -0.0415427  -0.10793437]
[ Experience replay ] starts
[ episode 482 ][ timestamp 9 ] state=[ 0.0580826   0.03188007 -0.0415427  -0.10793437], action=0, reward=1.0, next_state=[ 0.0587202  -0.16262271 -0.04370139  0.17135804]
[ Experience replay ] starts
[ episode 482 ][ timestamp 10 ] state=[ 0.0587202  -0.16262271 -0.04370139  0.17135804], action=1, reward=1.0, next_state=[ 0.05546775  0.0330966  -0.04027423 -0.13478475]
[ Experience replay ] starts
[ episode 482 ][ timestamp 11 ] state=[ 0.05546775  0.0330966  -0.04027423 -0.13478475], action=0, reward=1.0, next_state=[ 0.05612968 -0.16142602 -0.04296992  0.14492516]
[ Experience replay ] starts
[ episode 482 ][ timestamp 12 ] state=[ 0.05612968 -0.16142602 -0.04296992  0.14492516], action=1, reward=1.0, next_state=[ 0.05290116  0.03428412 -0.04007142 -0.16099834]
[ Experience replay ] star

[ episode 482 ][ timestamp 50 ] state=[-0.11434303 -0.51692042 -0.08226084 -0.0365152 ], action=0, reward=1.0, next_state=[-0.12468144 -0.71077228 -0.08299114  0.22912191]
[ Experience replay ] starts
[ episode 482 ][ timestamp 51 ] state=[-0.12468144 -0.71077228 -0.08299114  0.22912191], action=1, reward=1.0, next_state=[-0.13889688 -0.51456848 -0.07840871 -0.08854226]
[ Experience replay ] starts
[ episode 482 ][ timestamp 52 ] state=[-0.13889688 -0.51456848 -0.07840871 -0.08854226], action=0, reward=1.0, next_state=[-0.14918825 -0.70848408 -0.08017955  0.17840892]
[ Experience replay ] starts
[ episode 482 ][ timestamp 53 ] state=[-0.14918825 -0.70848408 -0.08017955  0.17840892], action=1, reward=1.0, next_state=[-0.16335793 -0.51231173 -0.07661137 -0.1384518 ]
[ Experience replay ] starts
[ episode 482 ][ timestamp 54 ] state=[-0.16335793 -0.51231173 -0.07661137 -0.1384518 ], action=0, reward=1.0, next_state=[-0.17360417 -0.70625755 -0.07938041  0.1291119 ]
[ Experience replay ] st

[ episode 483 ][ timestamp 17 ] state=[ 0.06611857 -0.35863564 -0.1171137   0.21276852], action=1, reward=1.0, next_state=[ 0.05894585 -0.16205097 -0.11285833 -0.11444136]
[ Experience replay ] starts
[ episode 483 ][ timestamp 18 ] state=[ 0.05894585 -0.16205097 -0.11285833 -0.11444136], action=0, reward=1.0, next_state=[ 0.05570484 -0.3553901  -0.11514715  0.14061241]
[ Experience replay ] starts
[ episode 483 ][ timestamp 19 ] state=[ 0.05570484 -0.3553901  -0.11514715  0.14061241], action=1, reward=1.0, next_state=[ 0.04859703 -0.1588234  -0.11233491 -0.18606361]
[ Experience replay ] starts
[ episode 483 ][ timestamp 20 ] state=[ 0.04859703 -0.1588234  -0.11233491 -0.18606361], action=0, reward=1.0, next_state=[ 0.04542057 -0.35217383 -0.11605618  0.06917698]
[ Experience replay ] starts
[ episode 483 ][ timestamp 21 ] state=[ 0.04542057 -0.35217383 -0.11605618  0.06917698], action=1, reward=1.0, next_state=[ 0.03837709 -0.15559578 -0.11467264 -0.25775052]
[ Experience replay ] st

[ episode 483 ][ timestamp 65 ] state=[-0.60116012 -1.05849448 -0.17113486 -0.40218322], action=0, reward=1.0, next_state=[-0.62233001 -1.25082847 -0.17917852 -0.16796505]
[ Experience replay ] starts
[ episode 483 ][ timestamp 66 ] state=[-0.62233001 -1.25082847 -0.17917852 -0.16796505], action=1, reward=1.0, next_state=[-0.64734658 -1.05365464 -0.18253782 -0.51138785]
[ Experience replay ] starts
[ episode 483 ][ timestamp 67 ] state=[-0.64734658 -1.05365464 -0.18253782 -0.51138785], action=0, reward=1.0, next_state=[-0.66841968 -1.24579953 -0.19276558 -0.28132749]
[ Experience replay ] starts
[ episode 483 ][ timestamp 68 ] state=[-0.66841968 -1.24579953 -0.19276558 -0.28132749], action=1, reward=1.0, next_state=[-0.69333567 -1.04852557 -0.19839213 -0.62808037]
[ Experience replay ] starts
[ episode 483 ][ timestamp 69 ] state=[-0.69333567 -1.04852557 -0.19839213 -0.62808037], action=0, reward=-1.0, next_state=[-0.71430618 -1.24040682 -0.21095374 -0.40384962]
[ Experience replay ] s

[ episode 484 ][ timestamp 41 ] state=[-0.11032851 -0.76778798 -0.10819271  0.21585937], action=1, reward=1.0, next_state=[-0.12568427 -0.57129902 -0.10387552 -0.10889737]
[ Experience replay ] starts
[ episode 484 ][ timestamp 42 ] state=[-0.12568427 -0.57129902 -0.10387552 -0.10889737], action=0, reward=1.0, next_state=[-0.13711025 -0.76479092 -0.10605347  0.14929153]
[ Experience replay ] starts
[ episode 484 ][ timestamp 43 ] state=[-0.13711025 -0.76479092 -0.10605347  0.14929153], action=1, reward=1.0, next_state=[-0.15240607 -0.56832277 -0.10306764 -0.17487624]
[ Experience replay ] starts
[ episode 484 ][ timestamp 44 ] state=[-0.15240607 -0.56832277 -0.10306764 -0.17487624], action=0, reward=1.0, next_state=[-0.16377253 -0.76183022 -0.10656516  0.08359631]
[ Experience replay ] starts
[ episode 484 ][ timestamp 45 ] state=[-0.16377253 -0.76183022 -0.10656516  0.08359631], action=0, reward=1.0, next_state=[-0.17900913 -0.955276   -0.10489324  0.34084806]
[ Experience replay ] st

[ episode 485 ][ timestamp 3 ] state=[ 0.04787077  0.40510852 -0.03698464 -0.60103393], action=1, reward=1.0, next_state=[ 0.05597294  0.6007278  -0.04900532 -0.9051332 ]
[ Experience replay ] starts
[ episode 485 ][ timestamp 4 ] state=[ 0.05597294  0.6007278  -0.04900532 -0.9051332 ], action=0, reward=1.0, next_state=[ 0.06798749  0.40630252 -0.06710799 -0.62824719]
[ Experience replay ] starts
[ episode 485 ][ timestamp 5 ] state=[ 0.06798749  0.40630252 -0.06710799 -0.62824719], action=0, reward=1.0, next_state=[ 0.07611354  0.21217815 -0.07967293 -0.35743102]
[ Experience replay ] starts
[ episode 485 ][ timestamp 6 ] state=[ 0.07611354  0.21217815 -0.07967293 -0.35743102], action=0, reward=1.0, next_state=[ 0.08035711  0.01827395 -0.08682155 -0.09089644]
[ Experience replay ] starts
[ episode 485 ][ timestamp 7 ] state=[ 0.08035711  0.01827395 -0.08682155 -0.09089644], action=1, reward=1.0, next_state=[ 0.08072259  0.21452613 -0.08863948 -0.40965938]
[ Experience replay ] starts


[ episode 485 ][ timestamp 46 ] state=[-0.21122576 -0.52381031 -0.06989814 -0.16756547], action=0, reward=1.0, next_state=[-0.22170196 -0.71786573 -0.07324945  0.10227355]
[ Experience replay ] starts
[ episode 485 ][ timestamp 47 ] state=[-0.22170196 -0.71786573 -0.07324945  0.10227355], action=1, reward=1.0, next_state=[-0.23605928 -0.52177463 -0.07120398 -0.21259046]
[ Experience replay ] starts
[ episode 485 ][ timestamp 48 ] state=[-0.23605928 -0.52177463 -0.07120398 -0.21259046], action=0, reward=1.0, next_state=[-0.24649477 -0.71581013 -0.07545579  0.05680901]
[ Experience replay ] starts
[ episode 485 ][ timestamp 49 ] state=[-0.24649477 -0.71581013 -0.07545579  0.05680901], action=1, reward=1.0, next_state=[-0.26081097 -0.51969192 -0.0743196  -0.2586942 ]
[ Experience replay ] starts
[ episode 485 ][ timestamp 50 ] state=[-0.26081097 -0.51969192 -0.0743196  -0.2586942 ], action=1, reward=1.0, next_state=[-0.27120481 -0.323592   -0.07949349 -0.57386196]
[ Experience replay ] st

[ episode 486 ][ timestamp 31 ] state=[ 0.00906534 -0.35466639 -0.02834222  0.22024599], action=1, reward=1.0, next_state=[ 0.00197202 -0.15915102 -0.0239373  -0.08124078]
[ Experience replay ] starts
[ episode 486 ][ timestamp 32 ] state=[ 0.00197202 -0.15915102 -0.0239373  -0.08124078], action=1, reward=1.0, next_state=[-0.001211    0.03630575 -0.02556211 -0.38137884]
[ Experience replay ] starts
[ episode 486 ][ timestamp 33 ] state=[-0.001211    0.03630575 -0.02556211 -0.38137884], action=1, reward=1.0, next_state=[-4.84889065e-04  2.31781171e-01 -3.31896902e-02 -6.82010617e-01]
[ Experience replay ] starts
[ episode 486 ][ timestamp 34 ] state=[-4.84889065e-04  2.31781171e-01 -3.31896902e-02 -6.82010617e-01], action=1, reward=1.0, next_state=[ 0.00415073  0.42734795 -0.0468299  -0.9849552 ]
[ Experience replay ] starts
[ episode 486 ][ timestamp 35 ] state=[ 0.00415073  0.42734795 -0.0468299  -0.9849552 ], action=1, reward=1.0, next_state=[ 0.01269769  0.62306483 -0.06652901 -1.29

[ episode 488 ][ timestamp 5 ] state=[ 0.03280394  0.78287331 -0.07036851 -1.25804021], action=1, reward=1.0, next_state=[ 0.04846141  0.97882168 -0.09552931 -1.57190663]
[ Experience replay ] starts
[ episode 488 ][ timestamp 6 ] state=[ 0.04846141  0.97882168 -0.09552931 -1.57190663], action=1, reward=1.0, next_state=[ 0.06803784  1.17494484 -0.12696745 -1.89279297]
[ Experience replay ] starts
[ episode 488 ][ timestamp 7 ] state=[ 0.06803784  1.17494484 -0.12696745 -1.89279297], action=1, reward=1.0, next_state=[ 0.09153674  1.37119534 -0.16482331 -2.22202734]
[ Experience replay ] starts
[ episode 488 ][ timestamp 8 ] state=[ 0.09153674  1.37119534 -0.16482331 -2.22202734], action=1, reward=1.0, next_state=[ 0.11896064  1.56746154 -0.20926385 -2.5606757 ]
[ Experience replay ] starts
[ episode 488 ][ timestamp 9 ] state=[ 0.11896064  1.56746154 -0.20926385 -2.5606757 ], action=0, reward=-1.0, next_state=[ 0.15030987  1.37453416 -0.26047737 -2.33867348]
[ Experience replay ] starts

[ episode 489 ][ timestamp 38 ] state=[-0.11525305 -0.53886054 -0.08773823 -0.04538135], action=0, reward=1.0, next_state=[-0.12603026 -0.73262196 -0.08864586  0.21838086]
[ Experience replay ] starts
[ episode 489 ][ timestamp 39 ] state=[-0.12603026 -0.73262196 -0.08864586  0.21838086], action=1, reward=1.0, next_state=[-0.1406827  -0.53635203 -0.08427824 -0.10089583]
[ Experience replay ] starts
[ episode 489 ][ timestamp 40 ] state=[-0.1406827  -0.53635203 -0.08427824 -0.10089583], action=0, reward=1.0, next_state=[-0.15140974 -0.73017136 -0.08629616  0.1640528 ]
[ Experience replay ] starts
[ episode 489 ][ timestamp 41 ] state=[-0.15140974 -0.73017136 -0.08629616  0.1640528 ], action=1, reward=1.0, next_state=[-0.16601317 -0.53392687 -0.0830151  -0.15455814]
[ Experience replay ] starts
[ episode 489 ][ timestamp 42 ] state=[-0.16601317 -0.53392687 -0.0830151  -0.15455814], action=0, reward=1.0, next_state=[-0.17669171 -0.72776813 -0.08610627  0.11082403]
[ Experience replay ] st

[ episode 490 ][ timestamp 11 ] state=[ 0.02874868 -0.01378266 -0.02668308  0.0418179 ], action=1, reward=1.0, next_state=[ 0.02847303  0.18171156 -0.02584673 -0.25916293]
[ Experience replay ] starts
[ episode 490 ][ timestamp 12 ] state=[ 0.02847303  0.18171156 -0.02584673 -0.25916293], action=0, reward=1.0, next_state=[ 0.03210726 -0.01303205 -0.03102999  0.02525683]
[ Experience replay ] starts
[ episode 490 ][ timestamp 13 ] state=[ 0.03210726 -0.01303205 -0.03102999  0.02525683], action=1, reward=1.0, next_state=[ 0.03184662  0.18252084 -0.03052485 -0.27705265]
[ Experience replay ] starts
[ episode 490 ][ timestamp 14 ] state=[ 0.03184662  0.18252084 -0.03052485 -0.27705265], action=1, reward=1.0, next_state=[ 0.03549704  0.37806467 -0.0360659  -0.57920468]
[ Experience replay ] starts
[ episode 490 ][ timestamp 15 ] state=[ 0.03549704  0.37806467 -0.0360659  -0.57920468], action=0, reward=1.0, next_state=[ 0.04305833  0.18346622 -0.04765    -0.2980979 ]
[ Experience replay ] st

[ episode 491 ][ timestamp 5 ] state=[ 0.05552966  0.37133138 -0.0045139  -0.5498831 ], action=0, reward=1.0, next_state=[ 0.06295629  0.17627313 -0.01551157 -0.25862577]
[ Experience replay ] starts
[ episode 491 ][ timestamp 6 ] state=[ 0.06295629  0.17627313 -0.01551157 -0.25862577], action=1, reward=1.0, next_state=[ 0.06648175  0.37161305 -0.02068408 -0.55616063]
[ Experience replay ] starts
[ episode 491 ][ timestamp 7 ] state=[ 0.06648175  0.37161305 -0.02068408 -0.55616063], action=0, reward=1.0, next_state=[ 0.07391401  0.17678751 -0.03180729 -0.27006552]
[ Experience replay ] starts
[ episode 491 ][ timestamp 8 ] state=[ 0.07391401  0.17678751 -0.03180729 -0.27006552], action=1, reward=1.0, next_state=[ 0.07744976  0.37234858 -0.0372086  -0.57260852]
[ Experience replay ] starts
[ episode 491 ][ timestamp 9 ] state=[ 0.07744976  0.37234858 -0.0372086  -0.57260852], action=0, reward=1.0, next_state=[ 0.08489673  0.17776757 -0.04866077 -0.29187582]
[ Experience replay ] starts


[ episode 493 ][ timestamp 13 ] state=[ 0.07414339  0.05929686 -0.17115368 -0.349774  ], action=1, reward=1.0, next_state=[ 0.07532932  0.25638708 -0.17814916 -0.69116365]
[ Experience replay ] starts
[ episode 493 ][ timestamp 14 ] state=[ 0.07532932  0.25638708 -0.17814916 -0.69116365], action=0, reward=1.0, next_state=[ 0.08045707  0.06412552 -0.19197243 -0.45943483]
[ Experience replay ] starts
[ episode 493 ][ timestamp 15 ] state=[ 0.08045707  0.06412552 -0.19197243 -0.45943483], action=0, reward=1.0, next_state=[ 0.08173958 -0.12783838 -0.20116113 -0.23287247]
[ Experience replay ] starts
[ episode 493 ][ timestamp 16 ] state=[ 0.08173958 -0.12783838 -0.20116113 -0.23287247], action=0, reward=1.0, next_state=[ 0.07918281 -0.31960298 -0.20581858 -0.00976921]
[ Experience replay ] starts
[ episode 493 ][ timestamp 17 ] state=[ 0.07918281 -0.31960298 -0.20581858 -0.00976921], action=0, reward=1.0, next_state=[ 0.07279075 -0.51127012 -0.20601396  0.21157916]
[ Experience replay ] st

[ episode 494 ][ timestamp 22 ] state=[-0.02907276  0.14877946  0.0914227  -0.04016599], action=1, reward=1.0, next_state=[-0.02609717  0.34247958  0.09061938 -0.30266195]
[ Experience replay ] starts
[ episode 494 ][ timestamp 23 ] state=[-0.02609717  0.34247958  0.09061938 -0.30266195], action=1, reward=1.0, next_state=[-0.01924758  0.53620103  0.08456614 -0.56544617]
[ Experience replay ] starts
[ episode 494 ][ timestamp 24 ] state=[-0.01924758  0.53620103  0.08456614 -0.56544617], action=0, reward=1.0, next_state=[-0.00852356  0.34000082  0.07325722 -0.24736474]
[ Experience replay ] starts
[ episode 494 ][ timestamp 25 ] state=[-0.00852356  0.34000082  0.07325722 -0.24736474], action=1, reward=1.0, next_state=[-0.00172354  0.53400423  0.06830992 -0.51607099]
[ Experience replay ] starts
[ episode 494 ][ timestamp 26 ] state=[-0.00172354  0.53400423  0.06830992 -0.51607099], action=1, reward=1.0, next_state=[ 0.00895654  0.72810113  0.0579885  -0.78646983]
[ Experience replay ] st

[ episode 495 ][ timestamp 25 ] state=[ 0.16466526  1.53953358 -0.09156247 -2.11235739], action=1, reward=1.0, next_state=[ 0.19545594  1.73544347 -0.13380962 -2.43187301]
[ Experience replay ] starts
[ episode 495 ][ timestamp 26 ] state=[ 0.19545594  1.73544347 -0.13380962 -2.43187301], action=1, reward=1.0, next_state=[ 0.23016481  1.93143662 -0.18244708 -2.76245747]
[ Experience replay ] starts
[ episode 495 ][ timestamp 27 ] state=[ 0.23016481  1.93143662 -0.18244708 -2.76245747], action=1, reward=-1.0, next_state=[ 0.26879354  2.12729488 -0.23769623 -3.10471111]
[ Experience replay ] starts
[ Ended! ] Episode 495: Exploration_rate=0.01. Score=27.
[ episode 496 ] state=[-0.03093138 -0.0454347   0.01369762  0.03757432]
[ episode 496 ][ timestamp 1 ] state=[-0.03093138 -0.0454347   0.01369762  0.03757432], action=1, reward=1.0, next_state=[-0.03184008  0.14948817  0.01444911 -0.25075559]
[ Experience replay ] starts
[ episode 496 ][ timestamp 2 ] state=[-0.03184008  0.14948817  0.01

[ episode 496 ][ timestamp 42 ] state=[-0.25595007 -0.55937813 -0.14612261 -0.66605425], action=1, reward=1.0, next_state=[-0.26713764 -0.3625586  -0.15944369 -1.00094464]
[ Experience replay ] starts
[ episode 496 ][ timestamp 43 ] state=[-0.26713764 -0.3625586  -0.15944369 -1.00094464], action=1, reward=1.0, next_state=[-0.27438881 -0.16570683 -0.17946259 -1.33915501]
[ Experience replay ] starts
[ episode 496 ][ timestamp 44 ] state=[-0.27438881 -0.16570683 -0.17946259 -1.33915501], action=0, reward=1.0, next_state=[-0.27770294 -0.35817352 -0.20624569 -1.10757081]
[ Experience replay ] starts
[ episode 496 ][ timestamp 45 ] state=[-0.27770294 -0.35817352 -0.20624569 -1.10757081], action=0, reward=-1.0, next_state=[-0.28486641 -0.55007698 -0.2283971  -0.88602349]
[ Experience replay ] starts
[ Ended! ] Episode 496: Exploration_rate=0.01. Score=45.
[ episode 497 ] state=[ 0.03105517 -0.00774151 -0.00283979  0.02913354]
[ episode 497 ][ timestamp 1 ] state=[ 0.03105517 -0.00774151 -0.0

[ episode 497 ][ timestamp 39 ] state=[ 0.06187533  0.3903504  -0.06829248 -0.73072196], action=0, reward=1.0, next_state=[ 0.06968233  0.19623541 -0.08290692 -0.4602906 ]
[ Experience replay ] starts
[ episode 497 ][ timestamp 40 ] state=[ 0.06968233  0.19623541 -0.08290692 -0.4602906 ], action=0, reward=1.0, next_state=[ 0.07360704  0.00237718 -0.09211273 -0.19484878]
[ Experience replay ] starts
[ episode 497 ][ timestamp 41 ] state=[ 0.07360704  0.00237718 -0.09211273 -0.19484878], action=0, reward=1.0, next_state=[ 0.07365459 -0.19131469 -0.0960097   0.06741447]
[ Experience replay ] starts
[ episode 497 ][ timestamp 42 ] state=[ 0.07365459 -0.19131469 -0.0960097   0.06741447], action=1, reward=1.0, next_state=[ 0.06982829  0.00504328 -0.09466141 -0.25394953]
[ Experience replay ] starts
[ episode 497 ][ timestamp 43 ] state=[ 0.06982829  0.00504328 -0.09466141 -0.25394953], action=1, reward=1.0, next_state=[ 0.06992916  0.20138042 -0.0997404  -0.57492564]
[ Experience replay ] st

[ episode 499 ][ timestamp 9 ] state=[-0.01922629 -0.77830937  0.07771577  1.22550441], action=0, reward=1.0, next_state=[-0.03479248 -0.97434118  0.10222586  1.54149003]
[ Experience replay ] starts
[ episode 499 ][ timestamp 10 ] state=[-0.03479248 -0.97434118  0.10222586  1.54149003], action=0, reward=1.0, next_state=[-0.0542793  -1.17053294  0.13305566  1.86424343]
[ Experience replay ] starts
[ episode 499 ][ timestamp 11 ] state=[-0.0542793  -1.17053294  0.13305566  1.86424343], action=0, reward=1.0, next_state=[-0.07768996 -1.36683805  0.17034053  2.19510147]
[ Experience replay ] starts
[ episode 499 ][ timestamp 12 ] state=[-0.07768996 -1.36683805  0.17034053  2.19510147], action=0, reward=-1.0, next_state=[-0.10502672 -1.56314629  0.21424256  2.53514038]
[ Experience replay ] starts
[ Ended! ] Episode 499: Exploration_rate=0.01. Score=12.
[ episode 500 ] state=[-0.03948708 -0.01641352 -0.0173079  -0.03202856]
[ episode 500 ][ timestamp 1 ] state=[-0.03948708 -0.01641352 -0.01

[ episode 500 ][ timestamp 39 ] state=[-0.06813301 -0.38543008 -0.08607206  0.08547859], action=1, reward=1.0, next_state=[-0.07584161 -0.18918655 -0.08436249 -0.23307094]
[ Experience replay ] starts
[ episode 500 ][ timestamp 40 ] state=[-0.07584161 -0.18918655 -0.08436249 -0.23307094], action=0, reward=1.0, next_state=[-0.07962534 -0.38300812 -0.08902391  0.03185429]
[ Experience replay ] starts
[ episode 500 ][ timestamp 41 ] state=[-0.07962534 -0.38300812 -0.08902391  0.03185429], action=1, reward=1.0, next_state=[-0.08728551 -0.18672978 -0.08838682 -0.2875358 ]
[ Experience replay ] starts
[ episode 500 ][ timestamp 42 ] state=[-0.08728551 -0.18672978 -0.08838682 -0.2875358 ], action=1, reward=1.0, next_state=[-0.0910201   0.00953416 -0.09413754 -0.60673441]
[ Experience replay ] starts
[ episode 500 ][ timestamp 43 ] state=[-0.0910201   0.00953416 -0.09413754 -0.60673441], action=0, reward=1.0, next_state=[-0.09082942 -0.18415422 -0.10627223 -0.3451238 ]
[ Experience replay ] st